nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XSLT
3
5
  ###
@@ -8,16 +10,38 @@ module Nokogiri
8
10
  # doc = Nokogiri::XML(File.read('some_file.xml'))
9
11
  # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
10
12
  #
11
- # puts xslt.transform(doc)
13
+ # xslt.transform(doc) # => Nokogiri::XML::Document
12
14
  #
13
- # See Nokogiri::XSLT::Stylesheet#transform for more transformation
14
- # information.
15
+ # Many XSLT transformations include serialization behavior to emit a non-XML document. For these
16
+ # cases, please take care to invoke the #serialize method on the result of the transformation:
17
+ #
18
+ # doc = Nokogiri::XML(File.read('some_file.xml'))
19
+ # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
20
+ # xslt.serialize(xslt.transform(doc)) # => String
21
+ #
22
+ # or use the #apply_to method, which is a shortcut for `serialize(transform(document))`:
23
+ #
24
+ # doc = Nokogiri::XML(File.read('some_file.xml'))
25
+ # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
26
+ # xslt.apply_to(doc) # => String
27
+ #
28
+ # See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
15
29
  class Stylesheet
16
- ###
17
- # Apply an XSLT stylesheet to an XML::Document.
18
- # +params+ is an array of strings used as XSLT parameters.
19
- # returns serialized document
20
- def apply_to document, params = []
30
+ # :call-seq:
31
+ # apply_to(document, params = []) -> String
32
+ #
33
+ # Apply an XSLT stylesheet to an XML::Document and serialize it properly. This method is
34
+ # equivalent to calling #serialize on the result of #transform.
35
+ #
36
+ # [Parameters]
37
+ # - +document+ is an instance of XML::Document to transform
38
+ # - +params+ is an array of strings used as XSLT parameters, passed into #transform
39
+ #
40
+ # [Returns]
41
+ # A string containing the serialized result of the transformation.
42
+ #
43
+ # See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
44
+ def apply_to(document, params = [])
21
45
  serialize(transform(document, params))
22
46
  end
23
47
  end
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,16 +1,11 @@
1
- require 'nokogiri/xslt/stylesheet'
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
4
5
  class << self
5
- ###
6
- # Create a Nokogiri::XSLT::Stylesheet with +stylesheet+.
7
- #
8
- # Example:
9
- #
10
- # xslt = Nokogiri::XSLT(File.read(ARGV[0]))
11
- #
12
- def XSLT stylesheet, modules = {}
13
- XSLT.parse(stylesheet, modules)
6
+ # Convenience method for Nokogiri::XSLT.parse
7
+ def XSLT(...)
8
+ XSLT.parse(...)
14
9
  end
15
10
  end
16
11
 
@@ -19,38 +14,116 @@ module Nokogiri
19
14
  # Stylesheet object.
20
15
  module XSLT
21
16
  class << self
22
- ###
23
- # Parse the stylesheet in +string+, register any +modules+
24
- def parse string, modules = {}
17
+ # :call-seq:
18
+ # parse(xsl) Nokogiri::XSLT::Stylesheet
19
+ # parse(xsl, modules) Nokogiri::XSLT::Stylesheet
20
+ #
21
+ # Parse the stylesheet in +xsl+, registering optional +modules+ as custom class handlers.
22
+ #
23
+ # [Parameters]
24
+ # - +xsl+ (String) XSL content to be parsed into a stylesheet
25
+ # - +modules+ (Hash<String ⇒ Class>) A hash of URI-to-handler relations for linking a
26
+ # namespace to a custom function handler.
27
+ #
28
+ # ⚠ The XSLT handler classes are registered *globally*.
29
+ #
30
+ # Also see Nokogiri::XSLT.register
31
+ #
32
+ # *Example*
33
+ #
34
+ # xml = Nokogiri.XML(<<~XML)
35
+ # <nodes>
36
+ # <node>Foo</node>
37
+ # <node>Bar</node>
38
+ # </nodes>
39
+ # XML
40
+ #
41
+ # handler = Class.new do
42
+ # def reverse(node)
43
+ # node.text.reverse
44
+ # end
45
+ # end
46
+ #
47
+ # xsl = <<~XSL
48
+ # <xsl:stylesheet version="1.0"
49
+ # xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
50
+ # xmlns:myfuncs="http://nokogiri.org/xslt/myfuncs"
51
+ # extension-element-prefixes="myfuncs">
52
+ # <xsl:template match="/">
53
+ # <reversed>
54
+ # <xsl:for-each select="nodes/node">
55
+ # <reverse><xsl:copy-of select="myfuncs:reverse(.)"/></reverse>
56
+ # </xsl:for-each>
57
+ # </reversed>
58
+ # </xsl:template>
59
+ # </xsl:stylesheet>
60
+ # XSL
61
+ #
62
+ # xsl = Nokogiri.XSLT(xsl, "http://nokogiri.org/xslt/myfuncs" => handler)
63
+ # xsl.transform(xml).to_xml
64
+ # # => "<?xml version=\"1.0\"?>\n" +
65
+ # # "<reversed>\n" +
66
+ # # " <reverse>ooF</reverse>\n" +
67
+ # # " <reverse>raB</reverse>\n" +
68
+ # # "</reversed>\n"
69
+ #
70
+ def parse(string, modules = {})
25
71
  modules.each do |url, klass|
26
- XSLT.register url, klass
72
+ XSLT.register(url, klass)
27
73
  end
28
74
 
75
+ doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
29
76
  if Nokogiri.jruby?
30
- Stylesheet.parse_stylesheet_doc(XML.parse(string), string)
77
+ Stylesheet.parse_stylesheet_doc(doc, string)
31
78
  else
32
- Stylesheet.parse_stylesheet_doc(XML.parse(string))
79
+ Stylesheet.parse_stylesheet_doc(doc)
33
80
  end
34
81
  end
35
82
 
36
- ###
37
- # Quote parameters in +params+ for stylesheet safety
38
- def quote_params params
39
- parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
40
- parray.each_with_index do |v,i|
41
- if i % 2 > 0
42
- parray[i]=
43
- if v =~ /'/
44
- "concat('#{ v.gsub(/'/, %q{', "'", '}) }')"
45
- else
46
- "'#{v}'";
47
- end
83
+ # :call-seq:
84
+ # quote_params(params) Array
85
+ #
86
+ # Quote parameters in +params+ for stylesheet safety.
87
+ # See Nokogiri::XSLT::Stylesheet.transform for example usage.
88
+ #
89
+ # [Parameters]
90
+ # - +params+ (Hash, Array) XSLT parameters (key->value, or tuples of [key, value])
91
+ #
92
+ # [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
93
+ #
94
+ def quote_params(params)
95
+ params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
96
+ key, value = kv.map(&:to_s)
97
+ value = if value.include?("'")
98
+ "concat('#{value.gsub("'", %q{', "'", '})}')"
48
99
  else
49
- parray[i] = v.to_s
100
+ "'#{value}'"
50
101
  end
102
+ quoted_params << key
103
+ quoted_params << value
51
104
  end
52
- parray.flatten
53
105
  end
106
+
107
+ # call-seq:
108
+ # register(uri, custom_handler_class)
109
+ #
110
+ # Register a class that implements custom XSLT transformation functions.
111
+ #
112
+ # ⚠ The XSLT handler classes are registered *globally*.
113
+ #
114
+ # [Parameters}
115
+ # - +uri+ (String) The namespace for the custom handlers
116
+ # - +custom_handler_class+ (Class) A class with ruby methods that can be called during
117
+ # transformation
118
+ #
119
+ # See Nokogiri::XSLT.parse for usage.
120
+ #
121
+ def register(uri, custom_handler_class)
122
+ # NOTE: this is implemented in the C extension, see ext/nokogiri/xslt_stylesheet.c
123
+ raise NotImplementedError, "Nokogiri::XSLT.register is not implemented on JRuby"
124
+ end if Nokogiri.jruby?
54
125
  end
55
126
  end
56
127
  end
128
+
129
+ require_relative "xslt/stylesheet"
data/lib/nokogiri.rb CHANGED
@@ -1,96 +1,73 @@
1
- # -*- coding: utf-8 -*-
2
- # Modify the PATH on windows so that the external DLLs will get loaded.
3
-
4
- require 'rbconfig'
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
5
3
 
6
4
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
7
- # The line below caused a problem on non-GAE rack environment.
8
- # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
9
- #
10
- # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
11
- # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
12
- # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
13
- # should skip loading xml jars. This is because those are in WEB-INF/lib and
14
- # already set in the classpath.
15
- unless $LOAD_PATH.to_s.include?("appengine-rack")
16
- require 'stringio'
17
- require 'isorelax.jar'
18
- require 'jing.jar'
19
- require 'nekohtml.jar'
20
- require 'nekodtd.jar'
21
- require 'xercesImpl.jar'
22
- require 'serializer.jar'
23
- require 'xalan.jar'
24
- require 'xml-apis.jar'
25
- end
5
+ require_relative "nokogiri/jruby/dependencies"
26
6
  end
27
7
 
28
- begin
29
- RUBY_VERSION =~ /(\d+\.\d+)/
30
- require "nokogiri/#{$1}/nokogiri"
31
- rescue LoadError
32
- require 'nokogiri/nokogiri'
33
- end
34
- require 'nokogiri/version'
35
- require 'nokogiri/syntax_error'
36
- require 'nokogiri/xml'
37
- require 'nokogiri/xslt'
38
- require 'nokogiri/html'
39
- require 'nokogiri/decorators/slop'
40
- require 'nokogiri/css'
41
- require 'nokogiri/html/builder'
8
+ require_relative "nokogiri/extension"
42
9
 
43
10
  # Nokogiri parses and searches XML/HTML very quickly, and also has
44
11
  # correctly implemented CSS3 selector support as well as XPath 1.0
45
12
  # support.
46
13
  #
47
14
  # Parsing a document returns either a Nokogiri::XML::Document, or a
48
- # Nokogiri::HTML::Document depending on the kind of document you parse.
15
+ # Nokogiri::HTML4::Document depending on the kind of document you parse.
49
16
  #
50
17
  # Here is an example:
51
18
  #
52
- # require 'nokogiri'
53
- # require 'open-uri'
19
+ # require 'nokogiri'
20
+ # require 'open-uri'
21
+ #
22
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
54
23
  #
55
- # # Get a Nokogiri::HTML:Document for the page we’re interested in...
24
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
56
25
  #
57
- # doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
26
+ # # Do funky things with it using Nokogiri::XML::Node methods...
58
27
  #
59
- # # Do funky things with it using Nokogiri::XML::Node methods...
28
+ # ####
29
+ # # Search for nodes by css
30
+ # doc.css('h3.r a.l').each do |link|
31
+ # puts link.content
32
+ # end
60
33
  #
61
- # ####
62
- # # Search for nodes by css
63
- # doc.css('h3.r a.l').each do |link|
64
- # puts link.content
65
- # end
34
+ # See also:
66
35
  #
67
- # See Nokogiri::XML::Searchable#css for more information about CSS searching.
68
- # See Nokogiri::XML::Searchable#xpath for more information about XPath searching.
36
+ # - Nokogiri::XML::Searchable#css for more information about CSS searching
37
+ # - Nokogiri::XML::Searchable#xpath for more information about XPath searching
69
38
  module Nokogiri
70
39
  class << self
71
40
  ###
72
41
  # Parse an HTML or XML document. +string+ contains the document.
73
- def parse string, url = nil, encoding = nil, options = nil
42
+ def parse(string, url = nil, encoding = nil, options = nil)
74
43
  if string.respond_to?(:read) ||
75
- /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
44
+ /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i.match?(string[0, 512])
76
45
  # Expect an HTML indicator to appear within the first 512
77
46
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
78
47
  # shouldn't be that long)
79
- Nokogiri.HTML(string, url, encoding,
80
- options || XML::ParseOptions::DEFAULT_HTML)
48
+ Nokogiri.HTML4(
49
+ string,
50
+ url,
51
+ encoding,
52
+ options || XML::ParseOptions::DEFAULT_HTML,
53
+ )
81
54
  else
82
- Nokogiri.XML(string, url, encoding,
83
- options || XML::ParseOptions::DEFAULT_XML)
84
- end.tap { |doc|
55
+ Nokogiri.XML(
56
+ string,
57
+ url,
58
+ encoding,
59
+ options || XML::ParseOptions::DEFAULT_XML,
60
+ )
61
+ end.tap do |doc|
85
62
  yield doc if block_given?
86
- }
63
+ end
87
64
  end
88
65
 
89
66
  ###
90
67
  # Create a new Nokogiri::XML::DocumentFragment
91
- def make input = nil, opts = {}, &blk
68
+ def make(input = nil, opts = {}, &blk)
92
69
  if input
93
- Nokogiri::HTML.fragment(input).children.first
70
+ Nokogiri::HTML4.fragment(input).children.first
94
71
  else
95
72
  Nokogiri(&blk)
96
73
  end
@@ -115,30 +92,37 @@ module Nokogiri
115
92
  Nokogiri(*args, &block).slop!
116
93
  end
117
94
 
95
+ # :nodoc:
118
96
  def install_default_aliases
119
- # Make sure to support some popular encoding aliases not known by
120
- # all iconv implementations.
121
- {
122
- 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
123
- }.each { |alias_name, name|
124
- EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
125
- }
97
+ warn("Nokogiri.install_default_aliases is deprecated. Please call Nokogiri::EncodingHandler.install_default_aliases instead. This will become an error in Nokogiri v1.17.0.", uplevel: 1, category: :deprecated) # deprecated in v1.14.0, remove in v1.17.0
98
+ Nokogiri::EncodingHandler.install_default_aliases
126
99
  end
127
100
  end
128
-
129
- Nokogiri.install_default_aliases
130
101
  end
131
102
 
132
103
  ###
133
- # Parser a document contained in +args+. Nokogiri will try to guess what
134
- # type of document you are attempting to parse. For more information, see
135
- # Nokogiri.parse
104
+ # Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
105
+ # attempting to parse. For more information, see Nokogiri.parse
136
106
  #
137
- # To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
107
+ # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
138
108
  def Nokogiri(*args, &block)
139
- if block_given?
140
- Nokogiri::HTML::Builder.new(&block).doc.root
109
+ if block
110
+ Nokogiri::HTML4::Builder.new(&block).doc.root
141
111
  else
142
112
  Nokogiri.parse(*args)
143
113
  end
144
114
  end
115
+
116
+ require_relative "nokogiri/version"
117
+ require_relative "nokogiri/class_resolver"
118
+ require_relative "nokogiri/syntax_error"
119
+ require_relative "nokogiri/xml"
120
+ require_relative "nokogiri/xslt"
121
+ require_relative "nokogiri/html4"
122
+ require_relative "nokogiri/html"
123
+ require_relative "nokogiri/decorators/slop"
124
+ require_relative "nokogiri/css"
125
+ require_relative "nokogiri/html4/builder"
126
+ require_relative "nokogiri/encoding_handler"
127
+
128
+ require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
@@ -1,14 +1,15 @@
1
- require 'nokogiri'
1
+ # frozen_string_literal: true
2
2
 
3
- module XSD # :nodoc:
4
- module XMLParser # :nodoc:
3
+ require "nokogiri"
4
+
5
+ module XSD
6
+ module XMLParser
5
7
  ###
6
8
  # Nokogiri XML parser for soap4r.
7
9
  #
8
- # Nokogiri may be used as the XML parser in soap4r. Simply require
9
- # 'xsd/xmlparser/nokogiri' in your soap4r applications, and soap4r
10
- # will use Nokogiri as it's XML parser. No other changes should be
11
- # required to use Nokogiri as the XML parser.
10
+ # Nokogiri may be used as the XML parser in soap4r. Require 'xsd/xmlparser/nokogiri' in your
11
+ # soap4r applications, and soap4r will use Nokogiri as its XML parser. No other changes should
12
+ # be required to use Nokogiri as the XML parser.
12
13
  #
13
14
  # Example (using UW ITS Web Services):
14
15
  #
@@ -26,40 +27,40 @@ module XSD # :nodoc:
26
27
  class Nokogiri < XSD::XMLParser::Parser
27
28
  ###
28
29
  # Create a new XSD parser with +host+ and +opt+
29
- def initialize host, opt = {}
30
+ def initialize(host, opt = {})
30
31
  super
31
- @parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || 'UTF-8')
32
+ @parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || "UTF-8")
32
33
  end
33
34
 
34
35
  ###
35
36
  # Start parsing +string_or_readable+
36
- def do_parse string_or_readable
37
+ def do_parse(string_or_readable)
37
38
  @parser.parse(string_or_readable)
38
39
  end
39
40
 
40
41
  ###
41
42
  # Handle the start_element event with +name+ and +attrs+
42
- def start_element name, attrs = []
43
+ def start_element(name, attrs = [])
43
44
  super(name, Hash[*attrs.flatten])
44
45
  end
45
46
 
46
47
  ###
47
48
  # Handle the end_element event with +name+
48
- def end_element name
49
+ def end_element(name)
49
50
  super
50
51
  end
51
52
 
52
53
  ###
53
54
  # Handle errors with message +msg+
54
- def error msg
55
- raise ParseError.new(msg)
55
+ def error(msg)
56
+ raise ParseError, msg
56
57
  end
57
- alias :warning :error
58
+ alias_method :warning, :error
58
59
 
59
60
  ###
60
61
  # Handle cdata_blocks containing +string+
61
- def cdata_block string
62
- characters string
62
+ def cdata_block(string)
63
+ characters(string)
63
64
  end
64
65
 
65
66
  ###
@@ -69,16 +70,16 @@ module XSD # :nodoc:
69
70
  # +prefix+ is the namespace prefix for the element
70
71
  # +uri+ is the associated namespace URI
71
72
  # +ns+ is a hash of namespace prefix:urls associated with the element
72
- def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
73
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
73
74
  ###
74
75
  # Deal with SAX v1 interface
75
- name = [prefix, name].compact.join(':')
76
- attributes = ns.map { |ns_prefix,ns_uri|
77
- [['xmlns', ns_prefix].compact.join(':'), ns_uri]
78
- } + attrs.map { |attr|
79
- [[attr.prefix, attr.localname].compact.join(':'), attr.value]
80
- }.flatten
81
- start_element name, attributes
76
+ name = [prefix, name].compact.join(":")
77
+ attributes = ns.map do |ns_prefix, ns_uri|
78
+ [["xmlns", ns_prefix].compact.join(":"), ns_uri]
79
+ end + attrs.map do |attr|
80
+ [[attr.prefix, attr.localname].compact.join(":"), attr.value]
81
+ end.flatten
82
+ start_element(name, attributes)
82
83
  end
83
84
 
84
85
  ###
@@ -86,14 +87,16 @@ module XSD # :nodoc:
86
87
  # +name+ is the element's name
87
88
  # +prefix+ is the namespace prefix associated with the element
88
89
  # +uri+ is the associated namespace URI
89
- def end_element_namespace name, prefix = nil, uri = nil
90
+ def end_element_namespace(name, prefix = nil, uri = nil)
90
91
  ###
91
92
  # Deal with SAX v1 interface
92
- end_element [prefix, name].compact.join(':')
93
+ end_element([prefix, name].compact.join(":"))
93
94
  end
94
95
 
95
- %w{ xmldecl start_document end_document comment }.each do |name|
96
- class_eval %{ def #{name}(*args); end }
96
+ ["xmldecl", "start_document", "end_document", "comment"].each do |name|
97
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
98
+ def #{name}(*args); end
99
+ RUBY
97
100
  end
98
101
 
99
102
  add_factory(self)
@@ -0,0 +1,77 @@
1
+ From 74c95ec5932c737d4fcb06b8646b0017364ada14 Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Fri, 24 Dec 2021 19:08:01 -0500
4
+ Subject: [PATCH] attempt to hack in wildcard namespaces to xpath
5
+
6
+ I'm not confident this is a bulletproof patch.
7
+ ---
8
+ xpath.c | 24 ++++++++++++++++++------
9
+ 1 file changed, 18 insertions(+), 6 deletions(-)
10
+
11
+ diff --git a/xpath.c b/xpath.c
12
+ index 1aa2f1a..c7f0885 100644
13
+ --- a/xpath.c
14
+ +++ b/xpath.c
15
+ @@ -146,6 +146,9 @@
16
+ #define XPATH_MAX_RECURSION_DEPTH 5000
17
+ #endif
18
+
19
+ +#define WILDCARD_PREFIX "*"
20
+ +#define IS_WILDCARD_PREFIX(p) xmlStrEqual((xmlChar*)WILDCARD_PREFIX, p)
21
+ +
22
+ /*
23
+ * TODO:
24
+ * There are a few spots where some tests are done which depend upon ascii
25
+ @@ -11073,12 +11076,15 @@ xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test,
26
+ SKIP_BLANKS;
27
+
28
+ if ((name == NULL) && (CUR == '*')) {
29
+ - /*
30
+ - * All elements
31
+ - */
32
+ NEXT;
33
+ - *test = NODE_TEST_ALL;
34
+ - return(NULL);
35
+ + if (CUR != ':') {
36
+ + /*
37
+ + * All elements
38
+ + */
39
+ + *test = NODE_TEST_ALL;
40
+ + return(NULL);
41
+ + }
42
+ + name = xmlCharStrdup(WILDCARD_PREFIX);
43
+ }
44
+
45
+ if (name == NULL)
46
+ @@ -11327,6 +11333,10 @@ xmlXPathCompStep(xmlXPathParserContextPtr ctxt) {
47
+ }
48
+ #endif
49
+ if (CUR == '*') {
50
+ + if (NXT(1) == ':') {
51
+ + NEXT;
52
+ + name = xmlCharStrdup(WILDCARD_PREFIX);
53
+ + }
54
+ axis = AXIS_CHILD;
55
+ } else {
56
+ if (name == NULL)
57
+ @@ -12030,7 +12040,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
58
+ /*
59
+ * Setup namespaces.
60
+ */
61
+ - if (prefix != NULL) {
62
+ + if (prefix != NULL && !IS_WILDCARD_PREFIX(prefix)) {
63
+ URI = xmlXPathNsLookup(xpctxt, prefix);
64
+ if (URI == NULL) {
65
+ xmlXPathReleaseObject(xpctxt, obj);
66
+ @@ -12369,6 +12379,8 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
67
+ {
68
+ XP_TEST_HIT
69
+ }
70
+ + } else if (IS_WILDCARD_PREFIX(prefix)) {
71
+ + XP_TEST_HIT
72
+ } else {
73
+ if ((cur->ns != NULL) &&
74
+ (xmlStrEqual(URI, cur->ns->href)))
75
+ --
76
+ 2.31.0
77
+