nokogiri 1.12.5 → 1.14.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (156) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +41 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +23 -14
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -66
  8. data/ext/nokogiri/extconf.rb +159 -63
  9. data/ext/nokogiri/gumbo.c +21 -11
  10. data/ext/nokogiri/html4_document.c +2 -2
  11. data/ext/nokogiri/html4_element_description.c +1 -1
  12. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  13. data/ext/nokogiri/html4_sax_parser_context.c +3 -9
  14. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  15. data/ext/nokogiri/nokogiri.c +38 -51
  16. data/ext/nokogiri/nokogiri.h +26 -14
  17. data/ext/nokogiri/test_global_handlers.c +1 -1
  18. data/ext/nokogiri/xml_attr.c +3 -3
  19. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  20. data/ext/nokogiri/xml_cdata.c +3 -3
  21. data/ext/nokogiri/xml_comment.c +1 -1
  22. data/ext/nokogiri/xml_document.c +53 -44
  23. data/ext/nokogiri/xml_document_fragment.c +1 -3
  24. data/ext/nokogiri/xml_dtd.c +11 -11
  25. data/ext/nokogiri/xml_element_content.c +3 -3
  26. data/ext/nokogiri/xml_element_decl.c +5 -5
  27. data/ext/nokogiri/xml_encoding_handler.c +28 -14
  28. data/ext/nokogiri/xml_entity_decl.c +6 -6
  29. data/ext/nokogiri/xml_entity_reference.c +1 -1
  30. data/ext/nokogiri/xml_namespace.c +80 -14
  31. data/ext/nokogiri/xml_node.c +982 -396
  32. data/ext/nokogiri/xml_node_set.c +4 -6
  33. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  34. data/ext/nokogiri/xml_reader.c +133 -32
  35. data/ext/nokogiri/xml_relax_ng.c +1 -3
  36. data/ext/nokogiri/xml_sax_parser.c +23 -17
  37. data/ext/nokogiri/xml_sax_parser_context.c +11 -9
  38. data/ext/nokogiri/xml_sax_push_parser.c +1 -3
  39. data/ext/nokogiri/xml_schema.c +4 -6
  40. data/ext/nokogiri/xml_syntax_error.c +1 -1
  41. data/ext/nokogiri/xml_text.c +2 -2
  42. data/ext/nokogiri/xml_xpath_context.c +144 -114
  43. data/ext/nokogiri/xslt_stylesheet.c +122 -23
  44. data/gumbo-parser/Makefile +10 -0
  45. data/gumbo-parser/src/attribute.h +1 -1
  46. data/gumbo-parser/src/error.c +2 -2
  47. data/gumbo-parser/src/error.h +1 -1
  48. data/gumbo-parser/src/foreign_attrs.c +2 -2
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +8 -16
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/class_resolver.rb +67 -0
  69. data/lib/nokogiri/css/node.rb +9 -8
  70. data/lib/nokogiri/css/parser.rb +360 -341
  71. data/lib/nokogiri/css/parser.y +249 -244
  72. data/lib/nokogiri/css/parser_extras.rb +22 -20
  73. data/lib/nokogiri/css/syntax_error.rb +1 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -3
  75. data/lib/nokogiri/css/tokenizer.rex +3 -2
  76. data/lib/nokogiri/css/xpath_visitor.rb +184 -85
  77. data/lib/nokogiri/css.rb +44 -6
  78. data/lib/nokogiri/decorators/slop.rb +8 -7
  79. data/lib/nokogiri/encoding_handler.rb +57 -0
  80. data/lib/nokogiri/extension.rb +4 -3
  81. data/lib/nokogiri/gumbo.rb +1 -0
  82. data/lib/nokogiri/html.rb +16 -10
  83. data/lib/nokogiri/html4/builder.rb +1 -0
  84. data/lib/nokogiri/html4/document.rb +56 -164
  85. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  86. data/lib/nokogiri/html4/element_description.rb +1 -0
  87. data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
  88. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  89. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  90. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  91. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  92. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  93. data/lib/nokogiri/html4.rb +12 -5
  94. data/lib/nokogiri/html5/document.rb +126 -32
  95. data/lib/nokogiri/html5/document_fragment.rb +14 -4
  96. data/lib/nokogiri/html5/node.rb +12 -7
  97. data/lib/nokogiri/html5.rb +138 -222
  98. data/lib/nokogiri/jruby/dependencies.rb +2 -19
  99. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  100. data/lib/nokogiri/syntax_error.rb +1 -0
  101. data/lib/nokogiri/version/constant.rb +2 -1
  102. data/lib/nokogiri/version/info.rb +32 -24
  103. data/lib/nokogiri/version.rb +1 -0
  104. data/lib/nokogiri/xml/attr.rb +54 -3
  105. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  106. data/lib/nokogiri/xml/builder.rb +35 -33
  107. data/lib/nokogiri/xml/cdata.rb +2 -1
  108. data/lib/nokogiri/xml/character_data.rb +1 -0
  109. data/lib/nokogiri/xml/document.rb +232 -143
  110. data/lib/nokogiri/xml/document_fragment.rb +88 -42
  111. data/lib/nokogiri/xml/dtd.rb +3 -2
  112. data/lib/nokogiri/xml/element_content.rb +1 -0
  113. data/lib/nokogiri/xml/element_decl.rb +2 -1
  114. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  115. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  116. data/lib/nokogiri/xml/namespace.rb +44 -0
  117. data/lib/nokogiri/xml/node/save_options.rb +14 -8
  118. data/lib/nokogiri/xml/node.rb +708 -383
  119. data/lib/nokogiri/xml/node_set.rb +134 -59
  120. data/lib/nokogiri/xml/notation.rb +12 -0
  121. data/lib/nokogiri/xml/parse_options.rb +140 -56
  122. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  123. data/lib/nokogiri/xml/pp/node.rb +26 -26
  124. data/lib/nokogiri/xml/pp.rb +1 -0
  125. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  126. data/lib/nokogiri/xml/reader.rb +20 -24
  127. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  128. data/lib/nokogiri/xml/sax/document.rb +20 -19
  129. data/lib/nokogiri/xml/sax/parser.rb +38 -36
  130. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  131. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  132. data/lib/nokogiri/xml/sax.rb +1 -0
  133. data/lib/nokogiri/xml/schema.rb +7 -6
  134. data/lib/nokogiri/xml/searchable.rb +93 -62
  135. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  136. data/lib/nokogiri/xml/text.rb +1 -0
  137. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  138. data/lib/nokogiri/xml/xpath.rb +12 -0
  139. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  140. data/lib/nokogiri/xml.rb +4 -3
  141. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  142. data/lib/nokogiri/xslt.rb +21 -13
  143. data/lib/nokogiri.rb +22 -27
  144. data/lib/xsd/xmlparser/nokogiri.rb +28 -25
  145. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  146. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  147. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  148. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  149. metadata +20 -171
  150. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  151. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  152. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
  153. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  154. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  155. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  156. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,22 +1,25 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  module Nokogiri
3
5
  module XML
4
6
  #
5
7
  # The Searchable module declares the interface used for searching your DOM.
6
8
  #
7
- # It implements the public methods `search`, `css`, and `xpath`,
9
+ # It implements the public methods #search, #css, and #xpath,
8
10
  # as well as allowing specific implementations to specialize some
9
11
  # of the important behaviors.
10
12
  #
11
13
  module Searchable
12
14
  # Regular expression used by Searchable#search to determine if a query
13
15
  # string is CSS or XPath
14
- LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
16
+ LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
15
17
 
16
- # @!group Searching via XPath or CSS Queries
18
+ # :section: Searching via XPath or CSS Queries
17
19
 
18
20
  ###
19
- # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
21
+ # call-seq:
22
+ # search(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
20
23
  #
21
24
  # Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
22
25
  #
@@ -27,41 +30,39 @@ module Nokogiri
27
30
  # node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
28
31
  # node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
29
32
  #
30
- # For XPath queries, a hash of variable bindings may also be
31
- # appended to the namespace bindings. For example:
33
+ # For XPath queries, a hash of variable bindings may also be appended to the namespace
34
+ # bindings. For example:
32
35
  #
33
36
  # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
34
37
  #
35
- # Custom XPath functions and CSS pseudo-selectors may also be
36
- # defined. To define custom functions create a class and
37
- # implement the function you want to define. The first argument
38
- # to the method will be the current matching NodeSet. Any other
39
- # arguments are ones that you pass in. Note that this class may
40
- # appear anywhere in the argument list. For example:
41
- #
42
- # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")'
43
- # Class.new {
44
- # def regex node_set, regex
45
- # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
46
- # end
47
- # }.new
48
- # )
38
+ # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
39
+ # functions create a class and implement the function you want to define. The first argument
40
+ # to the method will be the current matching NodeSet. Any other arguments are ones that you
41
+ # pass in. Note that this class may appear anywhere in the argument list. For example:
42
+ #
43
+ # handler = Class.new {
44
+ # def regex node_set, regex
45
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
46
+ # end
47
+ # }.new
48
+ # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
49
49
  #
50
50
  # See Searchable#xpath and Searchable#css for further usage help.
51
51
  def search(*args)
52
52
  paths, handler, ns, binds = extract_params(args)
53
53
 
54
54
  xpaths = paths.map(&:to_s).map do |path|
55
- (path =~ LOOKS_LIKE_XPATH) ? path : xpath_query_from_css_rule(path, ns)
55
+ LOOKS_LIKE_XPATH.match?(path) ? path : xpath_query_from_css_rule(path, ns)
56
56
  end.flatten.uniq
57
57
 
58
58
  xpath(*(xpaths + [ns, handler, binds].compact))
59
59
  end
60
60
 
61
- alias :/ :search
61
+ alias_method :/, :search
62
62
 
63
63
  ###
64
- # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
64
+ # call-seq:
65
+ # at(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
65
66
  #
66
67
  # Search this object for +paths+, and return only the first
67
68
  # result. +paths+ must be one or more XPath or CSS queries.
@@ -71,10 +72,11 @@ module Nokogiri
71
72
  search(*args).first
72
73
  end
73
74
 
74
- alias :% :at
75
+ alias_method :%, :at
75
76
 
76
77
  ###
77
- # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
78
+ # call-seq:
79
+ # css(*rules, [namespace-bindings, custom-pseudo-class])
78
80
  #
79
81
  # Search this object for CSS +rules+. +rules+ must be one or more CSS
80
82
  # selectors. For example:
@@ -87,33 +89,49 @@ module Nokogiri
87
89
  #
88
90
  # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
89
91
  #
90
- # Custom CSS pseudo classes may also be defined. To define
91
- # custom pseudo classes, create a class and implement the custom
92
- # pseudo class you want defined. The first argument to the
93
- # method will be the current matching NodeSet. Any other
94
- # arguments are ones that you pass in. For example:
92
+ # 💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath
93
+ # function. To define custom pseudo classes, create a class and implement the custom pseudo
94
+ # class you want defined. The first argument to the method will be the matching context
95
+ # NodeSet. Any other arguments are ones that you pass in. For example:
95
96
  #
96
- # node.css('title:regex("\w+")', Class.new {
97
- # def regex node_set, regex
97
+ # handler = Class.new {
98
+ # def regex(node_set, regex)
98
99
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
99
100
  # end
100
- # }.new)
101
+ # }.new
102
+ # node.css('title:regex("\w+")', handler)
103
+ #
104
+ # 💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute:
105
+ #
106
+ # node.css('img > @href') # returns all +href+ attributes on an +img+ element
107
+ # node.css('img / @href') # same
108
+ #
109
+ # # ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN!
110
+ # node.css('div @class')
101
111
  #
102
- # Note that the CSS query string is case-sensitive with regards
103
- # to your document type. That is, if you're looking for "H1" in
104
- # an HTML document, you'll never find anything, since HTML tags
105
- # will match only lowercase CSS queries. However, "H1" might be
106
- # found in an XML document, where tags names are case-sensitive
107
- # (e.g., "H1" is distinct from "h1").
112
+ # node.css
108
113
  #
114
+ # 💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+.
115
+ #
116
+ # ⚠ NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For
117
+ # example:
118
+ #
119
+ # # equivalent to 'li:nth-child(2)'
120
+ # node.css('li[2]') # retrieve the second li element in a list
121
+ #
122
+ # ⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML
123
+ # tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document,
124
+ # you'll never find anything. However, "H1" might be found in an XML document, where tags
125
+ # names are case-sensitive (e.g., "H1" is distinct from "h1").
109
126
  def css(*args)
110
127
  rules, handler, ns, _ = extract_params(args)
111
128
 
112
- css_internal self, rules, handler, ns
129
+ css_internal(self, rules, handler, ns)
113
130
  end
114
131
 
115
132
  ##
116
- # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
133
+ # call-seq:
134
+ # at_css(*rules, [namespace-bindings, custom-pseudo-class])
117
135
  #
118
136
  # Search this object for CSS +rules+, and return only the first
119
137
  # match. +rules+ must be one or more CSS selectors.
@@ -124,7 +142,8 @@ module Nokogiri
124
142
  end
125
143
 
126
144
  ###
127
- # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
145
+ # call-seq:
146
+ # xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
128
147
  #
129
148
  # Search this node for XPath +paths+. +paths+ must be one or more XPath
130
149
  # queries.
@@ -140,27 +159,27 @@ module Nokogiri
140
159
  #
141
160
  # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
142
161
  #
143
- # Custom XPath functions may also be defined. To define custom
144
- # functions create a class and implement the function you want
145
- # to define. The first argument to the method will be the
146
- # current matching NodeSet. Any other arguments are ones that
147
- # you pass in. Note that this class may appear anywhere in the
148
- # argument list. For example:
162
+ # 💡 Custom XPath functions may also be defined. To define custom functions create a class and
163
+ # implement the function you want to define. The first argument to the method will be the
164
+ # current matching NodeSet. Any other arguments are ones that you pass in. Note that this
165
+ # class may appear anywhere in the argument list. For example:
149
166
  #
150
- # node.xpath('.//title[regex(., "\w+")]', Class.new {
151
- # def regex node_set, regex
167
+ # handler = Class.new {
168
+ # def regex(node_set, regex)
152
169
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
153
170
  # end
154
- # }.new)
171
+ # }.new
172
+ # node.xpath('.//title[regex(., "\w+")]', handler)
155
173
  #
156
174
  def xpath(*args)
157
175
  paths, handler, ns, binds = extract_params(args)
158
176
 
159
- xpath_internal self, paths, handler, ns, binds
177
+ xpath_internal(self, paths, handler, ns, binds)
160
178
  end
161
179
 
162
180
  ##
163
- # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
181
+ # call-seq:
182
+ # at_xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
164
183
  #
165
184
  # Search this node for XPath +paths+, and return only the first
166
185
  # match. +paths+ must be one or more XPath queries.
@@ -170,12 +189,21 @@ module Nokogiri
170
189
  xpath(*args).first
171
190
  end
172
191
 
173
- # @!endgroup
192
+ # :call-seq:
193
+ # >(selector) → NodeSet
194
+ #
195
+ # Search this node's immediate children using CSS selector +selector+
196
+ def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
197
+ ns = (document.root&.namespaces || {})
198
+ xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
199
+ end
200
+
201
+ # :section:
174
202
 
175
203
  private
176
204
 
177
205
  def css_internal(node, rules, handler, ns)
178
- xpath_internal node, css_rules_to_xpath(rules, ns), handler, ns, nil
206
+ xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
179
207
  end
180
208
 
181
209
  def xpath_internal(node, paths, handler, ns, binds)
@@ -198,9 +226,9 @@ module Nokogiri
198
226
  ctx.register_namespaces(ns)
199
227
  path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
200
228
 
201
- binds.each do |key, value|
202
- ctx.register_variable key.to_s, value
203
- end if binds
229
+ binds&.each do |key, value|
230
+ ctx.register_variable(key.to_s, value)
231
+ end
204
232
 
205
233
  ctx.evaluate(path, handler)
206
234
  end
@@ -210,10 +238,13 @@ module Nokogiri
210
238
  end
211
239
 
212
240
  def xpath_query_from_css_rule(rule, ns)
213
- visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
241
+ visitor = Nokogiri::CSS::XPathVisitor.new(
242
+ builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
243
+ doctype: document.xpath_doctype,
244
+ )
214
245
  self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
215
- CSS.xpath_for(rule.to_s, {:prefix => implied_xpath_context, :ns => ns,
216
- :visitor => visitor})
246
+ CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
247
+ visitor: visitor, })
217
248
  end.join(" | ")
218
249
  end
219
250
 
@@ -230,7 +261,7 @@ module Nokogiri
230
261
  end
231
262
  ns, binds = hashes.reverse
232
263
 
233
- ns ||= document.root ? document.root.namespaces : {}
264
+ ns ||= (document.root&.namespaces || {})
234
265
 
235
266
  [params, handler, ns, binds]
236
267
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  ###
@@ -42,9 +43,9 @@ module Nokogiri
42
43
 
43
44
  def to_s
44
45
  message = super.chomp
45
- [location_to_s, level_to_s, message].
46
- compact.join(": ").
47
- force_encoding(message.encoding)
46
+ [location_to_s, level_to_s, message]
47
+ .compact.join(": ")
48
+ .force_encoding(message.encoding)
48
49
  end
49
50
 
50
51
  private
@@ -54,7 +55,6 @@ module Nokogiri
54
55
  when 3 then "FATAL"
55
56
  when 2 then "ERROR"
56
57
  when 1 then "WARNING"
57
- else nil
58
58
  end
59
59
  end
60
60
 
@@ -64,6 +64,7 @@ module Nokogiri
64
64
 
65
65
  def location_to_s
66
66
  return nil if nil_or_zero?(line) && nil_or_zero?(column)
67
+
67
68
  "#{line}:#{column}"
68
69
  end
69
70
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class Text < Nokogiri::XML::CharacterData
@@ -1,10 +1,11 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module XPath
5
6
  class SyntaxError < XML::SyntaxError
6
7
  def to_s
7
- [super.chomp, str1].compact.join(': ')
8
+ [super.chomp, str1].compact.join(": ")
8
9
  end
9
10
  end
10
11
  end
@@ -1,7 +1,19 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module XPath
6
+ # The XPath search prefix to search globally, +//+
7
+ GLOBAL_SEARCH_PREFIX = "//"
8
+
9
+ # The XPath search prefix to search direct descendants of the root element, +/+
10
+ ROOT_SEARCH_PREFIX = "/"
11
+
12
+ # The XPath search prefix to search direct descendants of the current element, +./+
13
+ CURRENT_SEARCH_PREFIX = "./"
14
+
15
+ # The XPath search prefix to search anywhere in the current element's subtree, +.//+
16
+ SUBTREE_SEARCH_PREFIX = ".//"
5
17
  end
6
18
  end
7
19
  end
@@ -1,17 +1,16 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class XPathContext
5
-
6
6
  ###
7
7
  # Register namespaces in +namespaces+
8
8
  def register_namespaces(namespaces)
9
9
  namespaces.each do |k, v|
10
- k = k.to_s.gsub(/.*:/,'') # strip off 'xmlns:' or 'xml:'
10
+ k = k.to_s.gsub(/.*:/, "") # strip off 'xmlns:' or 'xml:'
11
11
  register_ns(k, v)
12
12
  end
13
13
  end
14
-
15
14
  end
16
15
  end
17
16
  end
data/lib/nokogiri/xml.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  class << self
4
5
  ###
@@ -21,12 +22,12 @@ module Nokogiri
21
22
  # Nokogiri::XML::Reader for mor information
22
23
  def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
23
24
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
24
- # Give the options to the user
25
25
  yield options if block_given?
26
26
 
27
27
  if string_or_io.respond_to?(:read)
28
28
  return Reader.from_io(string_or_io, url, encoding, options.to_i)
29
29
  end
30
+
30
31
  Reader.from_memory(string_or_io, url, encoding, options.to_i)
31
32
  end
32
33
 
@@ -38,8 +39,8 @@ module Nokogiri
38
39
 
39
40
  ####
40
41
  # Parse a fragment from +string+ in to a NodeSet.
41
- def fragment(string)
42
- XML::DocumentFragment.parse(string)
42
+ def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
43
+ XML::DocumentFragment.parse(string, options, &block)
43
44
  end
44
45
  end
45
46
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XSLT
4
5
  ###
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,4 +1,6 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  module Nokogiri
3
5
  class << self
4
6
  ###
@@ -8,7 +10,7 @@ module Nokogiri
8
10
  #
9
11
  # xslt = Nokogiri::XSLT(File.read(ARGV[0]))
10
12
  #
11
- def XSLT stylesheet, modules = {}
13
+ def XSLT(stylesheet, modules = {})
12
14
  XSLT.parse(stylesheet, modules)
13
15
  end
14
16
  end
@@ -33,22 +35,28 @@ module Nokogiri
33
35
  end
34
36
  end
35
37
 
36
- ###
37
- # Quote parameters in +params+ for stylesheet safety
38
+ # :call-seq:
39
+ # quote_params(params) Array
40
+ #
41
+ # Quote parameters in +params+ for stylesheet safety.
42
+ # See Nokogiri::XSLT::Stylesheet.transform for example usage.
43
+ #
44
+ # [Parameters]
45
+ # - +params+ (Hash, Array) XSLT parameters (key->value, or tuples of [key, value])
46
+ #
47
+ # [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
48
+ #
38
49
  def quote_params(params)
39
- parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
40
- parray.each_with_index do |v, i|
41
- parray[i] = if i % 2 > 0
42
- if v =~ /'/
43
- "concat('#{v.gsub(/'/, %q{', "'", '})}')"
44
- else
45
- "'#{v}'"
46
- end
50
+ params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
51
+ key, value = kv.map(&:to_s)
52
+ value = if /'/.match?(value)
53
+ "concat('#{value.gsub(/'/, %q{', "'", '})}')"
47
54
  else
48
- v.to_s
55
+ "'#{value}'"
49
56
  end
57
+ quoted_params << key
58
+ quoted_params << value
50
59
  end
51
- parray.flatten
52
60
  end
53
61
  end
54
62
  end
data/lib/nokogiri.rb CHANGED
@@ -1,8 +1,5 @@
1
- # -*- coding: utf-8 -*-
1
+ # coding: utf-8
2
2
  # frozen_string_literal: true
3
- # Modify the PATH on windows so that the external DLLs will get loaded.
4
-
5
- require "rbconfig"
6
3
 
7
4
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
8
5
  require_relative "nokogiri/jruby/dependencies"
@@ -19,30 +16,32 @@ require_relative "nokogiri/extension"
19
16
  #
20
17
  # Here is an example:
21
18
  #
22
- # require 'nokogiri'
23
- # require 'open-uri'
19
+ # require 'nokogiri'
20
+ # require 'open-uri'
21
+ #
22
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
24
23
  #
25
- # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
24
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
26
25
  #
27
- # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
26
+ # # Do funky things with it using Nokogiri::XML::Node methods...
28
27
  #
29
- # # Do funky things with it using Nokogiri::XML::Node methods...
28
+ # ####
29
+ # # Search for nodes by css
30
+ # doc.css('h3.r a.l').each do |link|
31
+ # puts link.content
32
+ # end
30
33
  #
31
- # ####
32
- # # Search for nodes by css
33
- # doc.css('h3.r a.l').each do |link|
34
- # puts link.content
35
- # end
34
+ # See also:
36
35
  #
37
- # See Nokogiri::XML::Searchable#css for more information about CSS searching.
38
- # See Nokogiri::XML::Searchable#xpath for more information about XPath searching.
36
+ # - Nokogiri::XML::Searchable#css for more information about CSS searching
37
+ # - Nokogiri::XML::Searchable#xpath for more information about XPath searching
39
38
  module Nokogiri
40
39
  class << self
41
40
  ###
42
41
  # Parse an HTML or XML document. +string+ contains the document.
43
42
  def parse(string, url = nil, encoding = nil, options = nil)
44
43
  if string.respond_to?(:read) ||
45
- /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
44
+ /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i.match?(string[0, 512])
46
45
  # Expect an HTML indicator to appear within the first 512
47
46
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
48
47
  # shouldn't be that long)
@@ -85,18 +84,12 @@ module Nokogiri
85
84
  Nokogiri(*args, &block).slop!
86
85
  end
87
86
 
87
+ # :nodoc:
88
88
  def install_default_aliases
89
- # Make sure to support some popular encoding aliases not known by
90
- # all iconv implementations.
91
- {
92
- "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
93
- }.each do |alias_name, name|
94
- EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
95
- end
89
+ warn("Nokogiri.install_default_aliases is deprecated and will be removed in a future version of Nokogiri. Please call Nokogiri::EncodingHandler.install_default_aliases instead.")
90
+ Nokogiri::EncodingHandler.install_default_aliases
96
91
  end
97
92
  end
98
-
99
- Nokogiri.install_default_aliases
100
93
  end
101
94
 
102
95
  ###
@@ -105,7 +98,7 @@ end
105
98
  #
106
99
  # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
107
100
  def Nokogiri(*args, &block)
108
- if block_given?
101
+ if block
109
102
  Nokogiri::HTML4::Builder.new(&block).doc.root
110
103
  else
111
104
  Nokogiri.parse(*args)
@@ -113,6 +106,7 @@ def Nokogiri(*args, &block)
113
106
  end
114
107
 
115
108
  require_relative "nokogiri/version"
109
+ require_relative "nokogiri/class_resolver"
116
110
  require_relative "nokogiri/syntax_error"
117
111
  require_relative "nokogiri/xml"
118
112
  require_relative "nokogiri/xslt"
@@ -121,5 +115,6 @@ require_relative "nokogiri/html"
121
115
  require_relative "nokogiri/decorators/slop"
122
116
  require_relative "nokogiri/css"
123
117
  require_relative "nokogiri/html4/builder"
118
+ require_relative "nokogiri/encoding_handler"
124
119
 
125
120
  require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?