nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,19 +1,25 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  module Nokogiri
2
5
  module XML
3
6
  #
4
7
  # The Searchable module declares the interface used for searching your DOM.
5
8
  #
6
- # It implements the public methods `search`, `css`, and `xpath`,
9
+ # It implements the public methods #search, #css, and #xpath,
7
10
  # as well as allowing specific implementations to specialize some
8
11
  # of the important behaviors.
9
12
  #
10
13
  module Searchable
11
14
  # Regular expression used by Searchable#search to determine if a query
12
15
  # string is CSS or XPath
13
- LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
14
-
16
+ LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
17
+
18
+ # :section: Searching via XPath or CSS Queries
19
+
15
20
  ###
16
- # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
21
+ # call-seq:
22
+ # search(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
17
23
  #
18
24
  # Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
19
25
  #
@@ -24,52 +30,56 @@ module Nokogiri
24
30
  # node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
25
31
  # node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
26
32
  #
27
- # For XPath queries, a hash of variable bindings may also be
28
- # appended to the namespace bindings. For example:
33
+ # For XPath queries, a hash of variable bindings may also be appended to the namespace
34
+ # bindings. For example:
29
35
  #
30
36
  # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
31
37
  #
32
- # Custom XPath functions and CSS pseudo-selectors may also be
33
- # defined. To define custom functions create a class and
34
- # implement the function you want to define. The first argument
35
- # to the method will be the current matching NodeSet. Any other
36
- # arguments are ones that you pass in. Note that this class may
37
- # appear anywhere in the argument list. For example:
38
- #
39
- # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")'
40
- # Class.new {
41
- # def regex node_set, regex
42
- # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
43
- # end
44
- # }.new
45
- # )
38
+ # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
39
+ # functions create a class and implement the function you want to define, which will be in the
40
+ # `nokogiri` namespace in XPath queries.
41
+ #
42
+ # The first argument to the method will be the current matching NodeSet. Any other arguments
43
+ # are ones that you pass in. Note that this class may appear anywhere in the argument
44
+ # list. For example:
45
+ #
46
+ # handler = Class.new {
47
+ # def regex node_set, regex
48
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
49
+ # end
50
+ # }.new
51
+ # node.search('.//title[nokogiri:regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
46
52
  #
47
53
  # See Searchable#xpath and Searchable#css for further usage help.
48
- def search *args
54
+ def search(*args)
49
55
  paths, handler, ns, binds = extract_params(args)
50
56
 
51
57
  xpaths = paths.map(&:to_s).map do |path|
52
- (path =~ LOOKS_LIKE_XPATH) ? path : xpath_query_from_css_rule(path, ns)
58
+ LOOKS_LIKE_XPATH.match?(path) ? path : xpath_query_from_css_rule(path, ns)
53
59
  end.flatten.uniq
54
60
 
55
61
  xpath(*(xpaths + [ns, handler, binds].compact))
56
62
  end
57
- alias :/ :search
63
+
64
+ alias_method :/, :search
58
65
 
59
66
  ###
60
- # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
67
+ # call-seq:
68
+ # at(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
61
69
  #
62
70
  # Search this object for +paths+, and return only the first
63
71
  # result. +paths+ must be one or more XPath or CSS queries.
64
72
  #
65
73
  # See Searchable#search for more information.
66
- def at *args
74
+ def at(*args)
67
75
  search(*args).first
68
76
  end
69
- alias :% :at
77
+
78
+ alias_method :%, :at
70
79
 
71
80
  ###
72
- # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
81
+ # call-seq:
82
+ # css(*rules, [namespace-bindings, custom-pseudo-class])
73
83
  #
74
84
  # Search this object for CSS +rules+. +rules+ must be one or more CSS
75
85
  # selectors. For example:
@@ -82,44 +92,61 @@ module Nokogiri
82
92
  #
83
93
  # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
84
94
  #
85
- # Custom CSS pseudo classes may also be defined. To define
86
- # custom pseudo classes, create a class and implement the custom
87
- # pseudo class you want defined. The first argument to the
88
- # method will be the current matching NodeSet. Any other
89
- # arguments are ones that you pass in. For example:
95
+ # 💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath
96
+ # function. To define custom pseudo classes, create a class and implement the custom pseudo
97
+ # class you want defined. The first argument to the method will be the matching context
98
+ # NodeSet. Any other arguments are ones that you pass in. For example:
90
99
  #
91
- # node.css('title:regex("\w+")', Class.new {
92
- # def regex node_set, regex
100
+ # handler = Class.new {
101
+ # def regex(node_set, regex)
93
102
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
94
103
  # end
95
- # }.new)
104
+ # }.new
105
+ # node.css('title:regex("\w+")', handler)
106
+ #
107
+ # 💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute:
108
+ #
109
+ # node.css('img > @href') # returns all +href+ attributes on an +img+ element
110
+ # node.css('img / @href') # same
111
+ #
112
+ # # ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN!
113
+ # node.css('div @class')
114
+ #
115
+ # node.css
116
+ #
117
+ # 💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+.
96
118
  #
97
- # Note that the CSS query string is case-sensitive with regards
98
- # to your document type. That is, if you're looking for "H1" in
99
- # an HTML document, you'll never find anything, since HTML tags
100
- # will match only lowercase CSS queries. However, "H1" might be
101
- # found in an XML document, where tags names are case-sensitive
102
- # (e.g., "H1" is distinct from "h1").
119
+ # NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For
120
+ # example:
103
121
  #
104
- def css *args
122
+ # # equivalent to 'li:nth-child(2)'
123
+ # node.css('li[2]') # retrieve the second li element in a list
124
+ #
125
+ # ⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML
126
+ # tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document,
127
+ # you'll never find anything. However, "H1" might be found in an XML document, where tags
128
+ # names are case-sensitive (e.g., "H1" is distinct from "h1").
129
+ def css(*args)
105
130
  rules, handler, ns, _ = extract_params(args)
106
131
 
107
- css_internal self, rules, handler, ns
132
+ css_internal(self, rules, handler, ns)
108
133
  end
109
134
 
110
135
  ##
111
- # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
136
+ # call-seq:
137
+ # at_css(*rules, [namespace-bindings, custom-pseudo-class])
112
138
  #
113
139
  # Search this object for CSS +rules+, and return only the first
114
140
  # match. +rules+ must be one or more CSS selectors.
115
141
  #
116
142
  # See Searchable#css for more information.
117
- def at_css *args
143
+ def at_css(*args)
118
144
  css(*args).first
119
145
  end
120
146
 
121
147
  ###
122
- # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
148
+ # call-seq:
149
+ # xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
123
150
  #
124
151
  # Search this node for XPath +paths+. +paths+ must be one or more XPath
125
152
  # queries.
@@ -135,95 +162,112 @@ module Nokogiri
135
162
  #
136
163
  # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
137
164
  #
138
- # Custom XPath functions may also be defined. To define custom
139
- # functions create a class and implement the function you want
140
- # to define. The first argument to the method will be the
141
- # current matching NodeSet. Any other arguments are ones that
142
- # you pass in. Note that this class may appear anywhere in the
143
- # argument list. For example:
165
+ # 💡 Custom XPath functions may also be defined. To define custom functions create a class and
166
+ # implement the function you want to define, which will be in the `nokogiri` namespace.
144
167
  #
145
- # node.xpath('.//title[regex(., "\w+")]', Class.new {
146
- # def regex node_set, regex
168
+ # The first argument to the method will be the current matching NodeSet. Any other arguments
169
+ # are ones that you pass in. Note that this class may appear anywhere in the argument
170
+ # list. For example:
171
+ #
172
+ # handler = Class.new {
173
+ # def regex(node_set, regex)
147
174
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
148
175
  # end
149
- # }.new)
176
+ # }.new
177
+ # node.xpath('.//title[nokogiri:regex(., "\w+")]', handler)
150
178
  #
151
- def xpath *args
179
+ def xpath(*args)
152
180
  paths, handler, ns, binds = extract_params(args)
153
181
 
154
- xpath_internal self, paths, handler, ns, binds
182
+ xpath_internal(self, paths, handler, ns, binds)
155
183
  end
156
184
 
157
185
  ##
158
- # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
186
+ # call-seq:
187
+ # at_xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
159
188
  #
160
189
  # Search this node for XPath +paths+, and return only the first
161
190
  # match. +paths+ must be one or more XPath queries.
162
191
  #
163
192
  # See Searchable#xpath for more information.
164
- def at_xpath *args
193
+ def at_xpath(*args)
165
194
  xpath(*args).first
166
195
  end
167
196
 
168
- private
169
-
170
- def css_internal node, rules, handler, ns
171
- xpath_internal node, css_rules_to_xpath(rules, ns), handler, ns, nil
197
+ # :call-seq:
198
+ # >(selector) → NodeSet
199
+ #
200
+ # Search this node's immediate children using CSS selector +selector+
201
+ def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
202
+ ns = document.root&.namespaces || {}
203
+ xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
172
204
  end
173
205
 
174
- def xpath_internal node, paths, handler, ns, binds
175
- document = node.document
176
- return NodeSet.new(document) unless document
206
+ # :section:
177
207
 
178
- if paths.length == 1
179
- return xpath_impl(node, paths.first, handler, ns, binds)
208
+ private
209
+
210
+ def extract_params(params) # :nodoc:
211
+ handler = params.find do |param|
212
+ ![Hash, String, Symbol].include?(param.class)
180
213
  end
214
+ params -= [handler] if handler
181
215
 
182
- NodeSet.new(document) do |combined|
183
- paths.each do |path|
184
- xpath_impl(node, path, handler, ns, binds).each { |set| combined << set }
185
- end
216
+ hashes = []
217
+ while Hash === params.last || params.last.nil?
218
+ hashes << params.pop
219
+ break if params.empty?
186
220
  end
187
- end
221
+ ns, binds = hashes.reverse
188
222
 
189
- def xpath_impl node, path, handler, ns, binds
190
- ctx = XPathContext.new(node)
191
- ctx.register_namespaces(ns)
192
- path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
223
+ ns ||= document.root&.namespaces || {}
193
224
 
194
- binds.each do |key,value|
195
- ctx.register_variable key.to_s, value
196
- end if binds
225
+ [params, handler, ns, binds]
226
+ end
197
227
 
198
- ctx.evaluate(path, handler)
228
+ def css_internal(node, rules, handler, ns)
229
+ xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
199
230
  end
200
231
 
201
232
  def css_rules_to_xpath(rules, ns)
202
233
  rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
203
234
  end
204
235
 
205
- def xpath_query_from_css_rule rule, ns
236
+ def xpath_query_from_css_rule(rule, ns)
206
237
  self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
207
- CSS.xpath_for(rule.to_s, :prefix => implied_xpath_context, :ns => ns)
208
- end.join(' | ')
238
+ visitor = Nokogiri::CSS::XPathVisitor.new(
239
+ builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
240
+ doctype: document.xpath_doctype,
241
+ prefix: implied_xpath_context,
242
+ namespaces: ns,
243
+ )
244
+ CSS.xpath_for(rule.to_s, visitor: visitor)
245
+ end.join(" | ")
209
246
  end
210
247
 
211
- def extract_params params # :nodoc:
212
- handler = params.find do |param|
213
- ![Hash, String, Symbol].include?(param.class)
248
+ def xpath_internal(node, paths, handler, ns, binds)
249
+ document = node.document
250
+ return NodeSet.new(document) unless document
251
+
252
+ if paths.length == 1
253
+ return xpath_impl(node, paths.first, handler, ns, binds)
214
254
  end
215
- params -= [handler] if handler
216
255
 
217
- hashes = []
218
- while Hash === params.last || params.last.nil?
219
- hashes << params.pop
220
- break if params.empty?
256
+ NodeSet.new(document) do |combined|
257
+ paths.each do |path|
258
+ xpath_impl(node, path, handler, ns, binds).each { |set| combined << set }
259
+ end
221
260
  end
222
- ns, binds = hashes.reverse
261
+ end
223
262
 
224
- ns ||= document.root ? document.root.namespaces : {}
263
+ def xpath_impl(node, path, handler, ns, binds)
264
+ context = XPathContext.new(node)
265
+ context.register_namespaces(ns)
266
+ context.register_variables(binds)
225
267
 
226
- [params, handler, ns, binds]
268
+ path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml?
269
+
270
+ context.evaluate(path, handler)
227
271
  end
228
272
  end
229
273
  end
@@ -1,14 +1,38 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  ###
4
6
  # This class provides information about XML SyntaxErrors. These
5
7
  # exceptions are typically stored on Nokogiri::XML::Document#errors.
6
8
  class SyntaxError < ::Nokogiri::SyntaxError
9
+ class << self
10
+ def aggregate(errors)
11
+ return nil if errors.empty?
12
+ return errors.first if errors.length == 1
13
+
14
+ messages = ["Multiple errors encountered:"]
15
+ errors.each do |error|
16
+ messages << error.to_s
17
+ end
18
+ new(messages.join("\n"))
19
+ end
20
+ end
21
+
7
22
  attr_reader :domain
8
23
  attr_reader :code
9
24
  attr_reader :level
10
25
  attr_reader :file
11
26
  attr_reader :line
27
+
28
+ # The XPath path of the node that caused the error when validating a `Nokogiri::XML::Document`.
29
+ #
30
+ # This attribute will only be non-nil when the error is emitted by `Schema#validate` on
31
+ # Document objects. It will return `nil` for DOM parsing errors and for errors emitted during
32
+ # Schema validation of files.
33
+ #
34
+ # ⚠ `#path` is not supported on JRuby, where it will always return `nil`.
35
+ attr_reader :path
12
36
  attr_reader :str1
13
37
  attr_reader :str2
14
38
  attr_reader :str3
@@ -41,9 +65,9 @@ module Nokogiri
41
65
 
42
66
  def to_s
43
67
  message = super.chomp
44
- [location_to_s, level_to_s, message].
45
- compact.join(": ").
46
- force_encoding(message.encoding)
68
+ [location_to_s, level_to_s, message]
69
+ .compact.join(": ")
70
+ .force_encoding(message.encoding)
47
71
  end
48
72
 
49
73
  private
@@ -53,7 +77,6 @@ module Nokogiri
53
77
  when 3 then "FATAL"
54
78
  when 2 then "ERROR"
55
79
  when 1 then "WARNING"
56
- else nil
57
80
  end
58
81
  end
59
82
 
@@ -62,7 +85,8 @@ module Nokogiri
62
85
  end
63
86
 
64
87
  def location_to_s
65
- return nil if nil_or_zero?(line) && nil_or_zero?(column)
88
+ return if nil_or_zero?(line) && nil_or_zero?(column)
89
+
66
90
  "#{line}:#{column}"
67
91
  end
68
92
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class Text < Nokogiri::XML::CharacterData
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
- class XPath
5
+ module XPath
4
6
  class SyntaxError < XML::SyntaxError
5
7
  def to_s
6
- [super.chomp, str1].compact.join(': ')
8
+ [super.chomp, str1].compact.join(": ")
7
9
  end
8
10
  end
9
11
  end
@@ -1,10 +1,21 @@
1
- require 'nokogiri/xml/xpath/syntax_error'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Nokogiri
4
4
  module XML
5
- class XPath
6
- # The Nokogiri::XML::Document tied to this XPath instance
7
- attr_accessor :document
5
+ module XPath
6
+ # The XPath search prefix to search globally, +//+
7
+ GLOBAL_SEARCH_PREFIX = "//"
8
+
9
+ # The XPath search prefix to search direct descendants of the root element, +/+
10
+ ROOT_SEARCH_PREFIX = "/"
11
+
12
+ # The XPath search prefix to search direct descendants of the current element, +./+
13
+ CURRENT_SEARCH_PREFIX = "./"
14
+
15
+ # The XPath search prefix to search anywhere in the current element's subtree, +.//+
16
+ SUBTREE_SEARCH_PREFIX = ".//"
8
17
  end
9
18
  end
10
19
  end
20
+
21
+ require_relative "xpath/syntax_error"
@@ -1,16 +1,27 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class XPathContext
4
-
5
6
  ###
6
7
  # Register namespaces in +namespaces+
7
8
  def register_namespaces(namespaces)
8
- namespaces.each do |k, v|
9
- k = k.to_s.gsub(/.*:/,'') # strip off 'xmlns:' or 'xml:'
10
- register_ns(k, v)
9
+ namespaces.each do |key, value|
10
+ key = key.to_s.gsub(/.*:/, "") # strip off 'xmlns:' or 'xml:'
11
+
12
+ register_ns(key, value)
11
13
  end
12
14
  end
13
15
 
16
+ def register_variables(binds)
17
+ return if binds.nil?
18
+
19
+ binds.each do |key, value|
20
+ key = key.to_s
21
+
22
+ register_variable(key, value)
23
+ end
24
+ end
14
25
  end
15
26
  end
16
27
  end
data/lib/nokogiri/xml.rb CHANGED
@@ -1,75 +1,65 @@
1
- require 'nokogiri/xml/pp'
2
- require 'nokogiri/xml/parse_options'
3
- require 'nokogiri/xml/sax'
4
- require 'nokogiri/xml/searchable'
5
- require 'nokogiri/xml/node'
6
- require 'nokogiri/xml/attribute_decl'
7
- require 'nokogiri/xml/element_decl'
8
- require 'nokogiri/xml/element_content'
9
- require 'nokogiri/xml/character_data'
10
- require 'nokogiri/xml/namespace'
11
- require 'nokogiri/xml/attr'
12
- require 'nokogiri/xml/dtd'
13
- require 'nokogiri/xml/cdata'
14
- require 'nokogiri/xml/text'
15
- require 'nokogiri/xml/document'
16
- require 'nokogiri/xml/document_fragment'
17
- require 'nokogiri/xml/processing_instruction'
18
- require 'nokogiri/xml/node_set'
19
- require 'nokogiri/xml/syntax_error'
20
- require 'nokogiri/xml/xpath'
21
- require 'nokogiri/xml/xpath_context'
22
- require 'nokogiri/xml/builder'
23
- require 'nokogiri/xml/reader'
24
- require 'nokogiri/xml/notation'
25
- require 'nokogiri/xml/entity_decl'
26
- require 'nokogiri/xml/entity_reference'
27
- require 'nokogiri/xml/schema'
28
- require 'nokogiri/xml/relax_ng'
1
+ # frozen_string_literal: true
29
2
 
30
3
  module Nokogiri
31
4
  class << self
32
- ###
33
- # Parse XML. Convenience method for Nokogiri::XML::Document.parse
34
- def XML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block
35
- Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
5
+ # Convenience method for Nokogiri::XML::Document.parse
6
+ def XML(...)
7
+ Nokogiri::XML::Document.parse(...)
36
8
  end
37
9
  end
38
10
 
39
11
  module XML
40
12
  # Original C14N 1.0 spec canonicalization
41
- XML_C14N_1_0 = 0
13
+ XML_C14N_1_0 = 0
42
14
  # Exclusive C14N 1.0 spec canonicalization
43
- XML_C14N_EXCLUSIVE_1_0 = 1
15
+ XML_C14N_EXCLUSIVE_1_0 = 1
44
16
  # C14N 1.1 spec canonicalization
45
17
  XML_C14N_1_1 = 2
46
- class << self
47
- ###
48
- # Parse an XML document using the Nokogiri::XML::Reader API. See
49
- # Nokogiri::XML::Reader for mor information
50
- def Reader string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT
51
-
52
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
53
- # Give the options to the user
54
- yield options if block_given?
55
18
 
56
- if string_or_io.respond_to? :read
57
- return Reader.from_io(string_or_io, url, encoding, options.to_i)
58
- end
59
- Reader.from_memory(string_or_io, url, encoding, options.to_i)
19
+ class << self
20
+ # Convenience method for Nokogiri::XML::Reader.new
21
+ def Reader(...)
22
+ Reader.new(...)
60
23
  end
61
24
 
62
- ###
63
- # Parse XML. Convenience method for Nokogiri::XML::Document.parse
64
- def parse thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
65
- Document.parse(thing, url, encoding, options, &block)
25
+ # Convenience method for Nokogiri::XML::Document.parse
26
+ def parse(...)
27
+ Document.parse(...)
66
28
  end
67
29
 
68
- ####
69
- # Parse a fragment from +string+ in to a NodeSet.
70
- def fragment string
71
- XML::DocumentFragment.parse(string)
30
+ # Convenience method for Nokogiri::XML::DocumentFragment.parse
31
+ def fragment(...)
32
+ XML::DocumentFragment.parse(...)
72
33
  end
73
34
  end
74
35
  end
75
36
  end
37
+
38
+ require_relative "xml/pp"
39
+ require_relative "xml/parse_options"
40
+ require_relative "xml/sax"
41
+ require_relative "xml/searchable"
42
+ require_relative "xml/node"
43
+ require_relative "xml/attribute_decl"
44
+ require_relative "xml/element_decl"
45
+ require_relative "xml/element_content"
46
+ require_relative "xml/character_data"
47
+ require_relative "xml/namespace"
48
+ require_relative "xml/attr"
49
+ require_relative "xml/dtd"
50
+ require_relative "xml/cdata"
51
+ require_relative "xml/text"
52
+ require_relative "xml/document"
53
+ require_relative "xml/document_fragment"
54
+ require_relative "xml/processing_instruction"
55
+ require_relative "xml/node_set"
56
+ require_relative "xml/syntax_error"
57
+ require_relative "xml/xpath"
58
+ require_relative "xml/xpath_context"
59
+ require_relative "xml/builder"
60
+ require_relative "xml/reader"
61
+ require_relative "xml/notation"
62
+ require_relative "xml/entity_decl"
63
+ require_relative "xml/entity_reference"
64
+ require_relative "xml/schema"
65
+ require_relative "xml/relax_ng"