nokogiri 1.12.5 → 1.14.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (156) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +41 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +23 -14
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -66
  8. data/ext/nokogiri/extconf.rb +159 -63
  9. data/ext/nokogiri/gumbo.c +21 -11
  10. data/ext/nokogiri/html4_document.c +2 -2
  11. data/ext/nokogiri/html4_element_description.c +1 -1
  12. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  13. data/ext/nokogiri/html4_sax_parser_context.c +3 -9
  14. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  15. data/ext/nokogiri/nokogiri.c +38 -51
  16. data/ext/nokogiri/nokogiri.h +26 -14
  17. data/ext/nokogiri/test_global_handlers.c +1 -1
  18. data/ext/nokogiri/xml_attr.c +3 -3
  19. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  20. data/ext/nokogiri/xml_cdata.c +3 -3
  21. data/ext/nokogiri/xml_comment.c +1 -1
  22. data/ext/nokogiri/xml_document.c +53 -44
  23. data/ext/nokogiri/xml_document_fragment.c +1 -3
  24. data/ext/nokogiri/xml_dtd.c +11 -11
  25. data/ext/nokogiri/xml_element_content.c +3 -3
  26. data/ext/nokogiri/xml_element_decl.c +5 -5
  27. data/ext/nokogiri/xml_encoding_handler.c +28 -14
  28. data/ext/nokogiri/xml_entity_decl.c +6 -6
  29. data/ext/nokogiri/xml_entity_reference.c +1 -1
  30. data/ext/nokogiri/xml_namespace.c +80 -14
  31. data/ext/nokogiri/xml_node.c +982 -396
  32. data/ext/nokogiri/xml_node_set.c +4 -6
  33. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  34. data/ext/nokogiri/xml_reader.c +133 -32
  35. data/ext/nokogiri/xml_relax_ng.c +1 -3
  36. data/ext/nokogiri/xml_sax_parser.c +23 -17
  37. data/ext/nokogiri/xml_sax_parser_context.c +11 -9
  38. data/ext/nokogiri/xml_sax_push_parser.c +1 -3
  39. data/ext/nokogiri/xml_schema.c +4 -6
  40. data/ext/nokogiri/xml_syntax_error.c +1 -1
  41. data/ext/nokogiri/xml_text.c +2 -2
  42. data/ext/nokogiri/xml_xpath_context.c +144 -114
  43. data/ext/nokogiri/xslt_stylesheet.c +122 -23
  44. data/gumbo-parser/Makefile +10 -0
  45. data/gumbo-parser/src/attribute.h +1 -1
  46. data/gumbo-parser/src/error.c +2 -2
  47. data/gumbo-parser/src/error.h +1 -1
  48. data/gumbo-parser/src/foreign_attrs.c +2 -2
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +8 -16
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/class_resolver.rb +67 -0
  69. data/lib/nokogiri/css/node.rb +9 -8
  70. data/lib/nokogiri/css/parser.rb +360 -341
  71. data/lib/nokogiri/css/parser.y +249 -244
  72. data/lib/nokogiri/css/parser_extras.rb +22 -20
  73. data/lib/nokogiri/css/syntax_error.rb +1 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -3
  75. data/lib/nokogiri/css/tokenizer.rex +3 -2
  76. data/lib/nokogiri/css/xpath_visitor.rb +184 -85
  77. data/lib/nokogiri/css.rb +44 -6
  78. data/lib/nokogiri/decorators/slop.rb +8 -7
  79. data/lib/nokogiri/encoding_handler.rb +57 -0
  80. data/lib/nokogiri/extension.rb +4 -3
  81. data/lib/nokogiri/gumbo.rb +1 -0
  82. data/lib/nokogiri/html.rb +16 -10
  83. data/lib/nokogiri/html4/builder.rb +1 -0
  84. data/lib/nokogiri/html4/document.rb +56 -164
  85. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  86. data/lib/nokogiri/html4/element_description.rb +1 -0
  87. data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
  88. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  89. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  90. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  91. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  92. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  93. data/lib/nokogiri/html4.rb +12 -5
  94. data/lib/nokogiri/html5/document.rb +126 -32
  95. data/lib/nokogiri/html5/document_fragment.rb +14 -4
  96. data/lib/nokogiri/html5/node.rb +12 -7
  97. data/lib/nokogiri/html5.rb +138 -222
  98. data/lib/nokogiri/jruby/dependencies.rb +2 -19
  99. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  100. data/lib/nokogiri/syntax_error.rb +1 -0
  101. data/lib/nokogiri/version/constant.rb +2 -1
  102. data/lib/nokogiri/version/info.rb +32 -24
  103. data/lib/nokogiri/version.rb +1 -0
  104. data/lib/nokogiri/xml/attr.rb +54 -3
  105. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  106. data/lib/nokogiri/xml/builder.rb +35 -33
  107. data/lib/nokogiri/xml/cdata.rb +2 -1
  108. data/lib/nokogiri/xml/character_data.rb +1 -0
  109. data/lib/nokogiri/xml/document.rb +232 -143
  110. data/lib/nokogiri/xml/document_fragment.rb +88 -42
  111. data/lib/nokogiri/xml/dtd.rb +3 -2
  112. data/lib/nokogiri/xml/element_content.rb +1 -0
  113. data/lib/nokogiri/xml/element_decl.rb +2 -1
  114. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  115. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  116. data/lib/nokogiri/xml/namespace.rb +44 -0
  117. data/lib/nokogiri/xml/node/save_options.rb +14 -8
  118. data/lib/nokogiri/xml/node.rb +708 -383
  119. data/lib/nokogiri/xml/node_set.rb +134 -59
  120. data/lib/nokogiri/xml/notation.rb +12 -0
  121. data/lib/nokogiri/xml/parse_options.rb +140 -56
  122. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  123. data/lib/nokogiri/xml/pp/node.rb +26 -26
  124. data/lib/nokogiri/xml/pp.rb +1 -0
  125. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  126. data/lib/nokogiri/xml/reader.rb +20 -24
  127. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  128. data/lib/nokogiri/xml/sax/document.rb +20 -19
  129. data/lib/nokogiri/xml/sax/parser.rb +38 -36
  130. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  131. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  132. data/lib/nokogiri/xml/sax.rb +1 -0
  133. data/lib/nokogiri/xml/schema.rb +7 -6
  134. data/lib/nokogiri/xml/searchable.rb +93 -62
  135. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  136. data/lib/nokogiri/xml/text.rb +1 -0
  137. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  138. data/lib/nokogiri/xml/xpath.rb +12 -0
  139. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  140. data/lib/nokogiri/xml.rb +4 -3
  141. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  142. data/lib/nokogiri/xslt.rb +21 -13
  143. data/lib/nokogiri.rb +22 -27
  144. data/lib/xsd/xmlparser/nokogiri.rb +28 -25
  145. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  146. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  147. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  148. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  149. metadata +20 -171
  150. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  151. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  152. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
  153. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  154. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  155. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  156. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,42 +1,116 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  module Nokogiri
3
5
  module CSS
4
- class XPathVisitor # :nodoc:
5
- def visit_function node
6
- msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
- return self.send(msg, node) if self.respond_to?(msg)
6
+ # When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
7
+ # class allows for changing some of the behaviors related to builtin xpath functions and quirks
8
+ # of HTML5.
9
+ class XPathVisitor
10
+ WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
11
+
12
+ # Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
13
+ module BuiltinsConfig
14
+ # Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
15
+ # the default when calling Nokogiri::CSS.xpath_for directly.
16
+ NEVER = :never
17
+
18
+ # Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
19
+ ALWAYS = :always
20
+
21
+ # Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
22
+ # the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
23
+ # node.
24
+ OPTIMAL = :optimal
25
+
26
+ # :nodoc: array of values for validation
27
+ VALUES = [NEVER, ALWAYS, OPTIMAL]
28
+ end
29
+
30
+ # Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
31
+ # being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
32
+ # node will choose the correct option automatically.
33
+ module DoctypeConfig
34
+ # The document being searched is an XML document. This is the default.
35
+ XML = :xml
36
+
37
+ # The document being searched is an HTML4 document.
38
+ HTML4 = :html4
39
+
40
+ # The document being searched is an HTML5 document.
41
+ HTML5 = :html5
42
+
43
+ # :nodoc: array of values for validation
44
+ VALUES = [XML, HTML4, HTML5]
45
+ end
46
+
47
+ # :call-seq:
48
+ # new() → XPathVisitor
49
+ # new(builtins:, doctype:) → XPathVisitor
50
+ #
51
+ # [Parameters]
52
+ # - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
53
+ # - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
54
+ #
55
+ # [Returns] XPathVisitor
56
+ #
57
+ def initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML)
58
+ unless BuiltinsConfig::VALUES.include?(builtins)
59
+ raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
60
+ end
61
+ unless DoctypeConfig::VALUES.include?(doctype)
62
+ raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
63
+ end
64
+
65
+ @builtins = builtins
66
+ @doctype = doctype
67
+ end
68
+
69
+ # :call-seq: config() → Hash
70
+ #
71
+ # [Returns]
72
+ # a Hash representing the configuration of the XPathVisitor, suitable for use as
73
+ # part of the CSS cache key.
74
+ def config
75
+ { builtins: @builtins, doctype: @doctype }
76
+ end
77
+
78
+ # :stopdoc:
79
+ def visit_function(node)
80
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
81
+ return send(msg, node) if respond_to?(msg)
8
82
 
9
83
  case node.value.first
10
84
  when /^text\(/
11
- 'child::text()'
85
+ "child::text()"
12
86
  when /^self\(/
13
87
  "self::#{node.value[1]}"
14
88
  when /^eq\(/
15
89
  "position()=#{node.value[1]}"
16
90
  when /^(nth|nth-of-type)\(/
17
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
91
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
18
92
  nth(node.value[1])
19
93
  else
20
94
  "position()=#{node.value[1]}"
21
95
  end
22
96
  when /^nth-child\(/
23
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
- nth(node.value[1], :child => true)
97
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
98
+ nth(node.value[1], child: true)
25
99
  else
26
- "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
100
+ "count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
27
101
  end
28
102
  when /^nth-last-of-type\(/
29
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
- nth(node.value[1], :last => true)
103
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
104
+ nth(node.value[1], last: true)
31
105
  else
32
106
  index = node.value[1].to_i - 1
33
107
  index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
108
  end
35
109
  when /^nth-last-child\(/
36
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
- nth(node.value[1], :last => true, :child => true)
110
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
111
+ nth(node.value[1], last: true, child: true)
38
112
  else
39
- "count(following-sibling::*)=#{node.value[1].to_i-1}"
113
+ "count(following-sibling::*)=#{node.value[1].to_i - 1}"
40
114
  end
41
115
  when /^(first|first-of-type)\(/
42
116
  "position()=1"
@@ -52,15 +126,18 @@ module Nokogiri
52
126
  "comment()"
53
127
  when /^has\(/
54
128
  is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
55
- ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
129
+ ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
56
130
  else
57
- # non-standard. this looks like a function call.
58
- args = ['.'] + node.value[1..-1]
59
- "#{node.value.first}#{args.join(',')})"
131
+ # xpath function call, let's marshal those arguments
132
+ args = ["."]
133
+ args += node.value[1..-1].map do |n|
134
+ n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
135
+ end
136
+ "#{node.value.first}#{args.join(",")})"
60
137
  end
61
138
  end
62
139
 
63
- def visit_not node
140
+ def visit_not(node)
64
141
  child = node.value.first
65
142
  if :ELEMENT_NAME == child.type
66
143
  "not(self::#{child.accept(self)})"
@@ -69,29 +146,20 @@ module Nokogiri
69
146
  end
70
147
  end
71
148
 
72
- def visit_id node
149
+ def visit_id(node)
73
150
  node.value.first =~ /^#(.*)$/
74
- "@id='#{$1}'"
151
+ "@id='#{Regexp.last_match(1)}'"
75
152
  end
76
153
 
77
- def visit_attribute_condition node
78
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
- ''
80
- else
81
- '@'
82
- end
83
- attribute += node.value.first.accept(self)
84
-
85
- # non-standard. attributes starting with '@'
86
- attribute.gsub!(/^@@/, '@')
87
-
88
- return attribute unless node.value.length == 3
154
+ def visit_attribute_condition(node)
155
+ attribute = node.value.first.accept(self)
156
+ return attribute if node.value.length == 1
89
157
 
90
158
  value = node.value.last
91
- value = "'#{value}'" if value !~ /^['"]/
159
+ value = "'#{value}'" unless /^['"]/.match?(value)
92
160
 
93
161
  # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
94
- if (value[0]==value[-1]) && %q{"'}.include?(value[0])
162
+ if (value[0] == value[-1]) && %q{"'}.include?(value[0])
95
163
  str_value = value[1..-2]
96
164
  if str_value.include?(value[0])
97
165
  value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
@@ -100,9 +168,9 @@ module Nokogiri
100
168
 
101
169
  case node.value[1]
102
170
  when :equal
103
- attribute + "=" + "#{value}"
171
+ attribute + "=" + value.to_s
104
172
  when :not_equal
105
- attribute + "!=" + "#{value}"
173
+ attribute + "!=" + value.to_s
106
174
  when :substring_match
107
175
  "contains(#{attribute},#{value})"
108
176
  when :prefix_match
@@ -115,16 +183,16 @@ module Nokogiri
115
183
  when :suffix_match
116
184
  "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
117
185
  else
118
- attribute + " #{node.value[1]} " + "#{value}"
186
+ attribute + " #{node.value[1]} " + value.to_s
119
187
  end
120
188
  end
121
189
 
122
- def visit_pseudo_class node
123
- if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
190
+ def visit_pseudo_class(node)
191
+ if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
124
192
  node.value.first.accept(self)
125
193
  else
126
- msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
127
- return self.send(msg, node) if self.respond_to?(msg)
194
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
195
+ return send(msg, node) if respond_to?(msg)
128
196
 
129
197
  case node.value.first
130
198
  when "first" then "position()=1"
@@ -144,50 +212,77 @@ module Nokogiri
144
212
  end
145
213
  end
146
214
 
147
- def visit_class_condition node
215
+ def visit_class_condition(node)
148
216
  css_class("@class", node.value.first)
149
217
  end
150
218
 
151
- def visit_combinator node
219
+ def visit_combinator(node)
152
220
  if is_of_type_pseudo_class?(node.value.last)
153
- "#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
221
+ "#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
154
222
  else
155
- "#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
223
+ "#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
156
224
  end
157
225
  end
158
226
 
159
227
  {
160
- 'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
161
- 'following_selector' => "/following-sibling::",
162
- 'descendant_selector' => '//',
163
- 'child_selector' => '/',
164
- }.each do |k,v|
165
- class_eval %{
228
+ "direct_adjacent_selector" => "/following-sibling::*[1]/self::",
229
+ "following_selector" => "/following-sibling::",
230
+ "descendant_selector" => "//",
231
+ "child_selector" => "/",
232
+ }.each do |k, v|
233
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
166
234
  def visit_#{k} node
167
235
  "\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
168
236
  end
169
- }
237
+ RUBY
170
238
  end
171
239
 
172
- def visit_conditional_selector node
173
- node.value.first.accept(self) + '[' +
174
- node.value.last.accept(self) + ']'
240
+ def visit_conditional_selector(node)
241
+ node.value.first.accept(self) + "[" +
242
+ node.value.last.accept(self) + "]"
243
+ end
244
+
245
+ def visit_element_name(node)
246
+ if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
247
+ # HTML5 has namespaces that should be ignored in CSS queries
248
+ # https://github.com/sparklemotion/nokogiri/issues/2376
249
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
250
+ if WILDCARD_NAMESPACES
251
+ "*:#{node.value.first}"
252
+ else
253
+ "*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
254
+ end
255
+ else
256
+ "*[local-name()='#{node.value.first}']"
257
+ end
258
+ else
259
+ node.value.first
260
+ end
175
261
  end
176
262
 
177
- def visit_element_name node
178
- node.value.first
263
+ def visit_attrib_name(node)
264
+ "@#{node.value.first}"
179
265
  end
180
266
 
181
- def accept node
267
+ def accept(node)
182
268
  node.accept(self)
183
269
  end
184
270
 
185
271
  private
186
272
 
187
- def nth node, options={}
188
- raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
273
+ def html5_element_name_needs_namespace_handling(node)
274
+ # if this is the wildcard selector "*", use it as normal
275
+ node.value.first != "*" &&
276
+ # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
277
+ !node.value.first.include?(":")
278
+ end
189
279
 
190
- a, b = read_a_and_positive_b node.value
280
+ def nth(node, options = {})
281
+ unless node.value.size == 4
282
+ raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
283
+ end
284
+
285
+ a, b = read_a_and_positive_b(node.value)
191
286
  position = if options[:child]
192
287
  options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
193
288
  else
@@ -206,7 +301,7 @@ module Nokogiri
206
301
  end
207
302
  end
208
303
 
209
- def read_a_and_positive_b values
304
+ def read_a_and_positive_b(values)
210
305
  op = values[2]
211
306
  if op == "+"
212
307
  a = values[0].to_i
@@ -220,9 +315,9 @@ module Nokogiri
220
315
  [a, b]
221
316
  end
222
317
 
223
- def is_of_type_pseudo_class? node
224
- if node.type==:PSEUDO_CLASS
225
- if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
318
+ def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
319
+ if node.type == :PSEUDO_CLASS
320
+ if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
226
321
  node.value[0].value[0]
227
322
  else
228
323
  node.value[0]
@@ -230,30 +325,34 @@ module Nokogiri
230
325
  end
231
326
  end
232
327
 
233
- # use only ordinary xpath functions
234
- def css_class_standard(hay, needle)
235
- "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
- end
237
-
238
- # use the builtin implementation
239
- def css_class_builtin(hay, needle)
240
- "nokogiri-builtin:css-class(#{hay},'#{needle}')"
328
+ def css_class(hay, needle)
329
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
330
+ # use the builtin implementation
331
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
332
+ else
333
+ # use only ordinary xpath functions
334
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
335
+ end
241
336
  end
242
-
243
- alias_method :css_class, :css_class_standard
244
337
  end
245
338
 
246
- class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
- private
248
- alias_method :css_class, :css_class_builtin
339
+ module XPathVisitorAlwaysUseBuiltins # :nodoc:
340
+ def self.new
341
+ warn(
342
+ "Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
343
+ { uplevel: 1 },
344
+ )
345
+ XPathVisitor.new(builtins: :always)
346
+ end
249
347
  end
250
348
 
251
- class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
- private
253
- if Nokogiri.uses_libxml?
254
- alias_method :css_class, :css_class_builtin
255
- else
256
- alias_method :css_class, :css_class_standard
349
+ module XPathVisitorOptimallyUseBuiltins # :nodoc:
350
+ def self.new
351
+ warn(
352
+ "Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
353
+ { uplevel: 1 },
354
+ )
355
+ XPathVisitor.new(builtins: :optimal)
257
356
  end
258
357
  end
259
358
  end
data/lib/nokogiri/css.rb CHANGED
@@ -1,17 +1,55 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  module Nokogiri
5
+ # Translate a CSS selector into an XPath 1.0 query
3
6
  module CSS
4
7
  class << self
5
- ###
6
- # Parse this CSS selector in +selector+. Returns an AST.
7
- def parse(selector)
8
+ # TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
9
+ # It is not used by Nokogiri and shouldn't be part of the public API.
10
+ def parse(selector) # :nodoc:
8
11
  Parser.new.parse(selector)
9
12
  end
10
13
 
11
- ###
12
- # Get the XPath for +selector+.
14
+ # :call-seq:
15
+ # xpath_for(selector) String
16
+ # xpath_for(selector [, prefix:] [, visitor:] [, ns:]) → String
17
+ #
18
+ # Translate a CSS selector to the equivalent XPath query.
19
+ #
20
+ # [Parameters]
21
+ # - +selector+ (String) The CSS selector to be translated into XPath
22
+ #
23
+ # - +prefix:+ (String)
24
+ #
25
+ # The XPath prefix for the query, see Nokogiri::XML::XPath for some options. Default is
26
+ # +XML::XPath::GLOBAL_SEARCH_PREFIX+.
27
+ #
28
+ # - +visitor:+ (Nokogiri::CSS::XPathVisitor)
29
+ #
30
+ # The visitor class to use to transform the AST into XPath. Default is
31
+ # +Nokogiri::CSS::XPathVisitor.new+.
32
+ #
33
+ # - +ns:+ (Hash<String ⇒ String>)
34
+ #
35
+ # The namespaces that are referenced in the query, if any. This is a hash where the keys are
36
+ # the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
37
+ #
38
+ # [Returns] (String) The equivalent XPath query for +selector+
39
+ #
40
+ # 💡 Note that translated queries are cached for performance concerns.
41
+ #
13
42
  def xpath_for(selector, options = {})
14
- Parser.new(options[:ns] || {}).xpath_for(selector, options)
43
+ raise TypeError, "no implicit conversion of #{selector.inspect} to String" unless selector.respond_to?(:to_str)
44
+
45
+ selector = selector.to_str
46
+ raise Nokogiri::CSS::SyntaxError, "empty CSS selector" if selector.empty?
47
+
48
+ prefix = options.fetch(:prefix, Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX)
49
+ visitor = options.fetch(:visitor) { Nokogiri::CSS::XPathVisitor.new }
50
+ ns = options.fetch(:ns, {})
51
+
52
+ Parser.new(ns).xpath_for(selector, prefix, visitor)
15
53
  end
16
54
  end
17
55
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module Decorators
4
5
  ###
@@ -10,21 +11,21 @@ module Nokogiri
10
11
 
11
12
  ###
12
13
  # look for node with +name+. See Nokogiri.Slop
13
- def method_missing name, *args, &block
14
+ def method_missing(name, *args, &block)
14
15
  if args.empty?
15
- list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
16
- elsif args.first.is_a? Hash
16
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
17
+ elsif args.first.is_a?(Hash)
17
18
  hash = args.first
18
19
  if hash[:css]
19
20
  list = css("#{name}#{hash[:css]}")
20
21
  elsif hash[:xpath]
21
- conds = Array(hash[:xpath]).join(' and ')
22
+ conds = Array(hash[:xpath]).join(" and ")
22
23
  list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
23
24
  end
24
25
  else
25
26
  CSS::Parser.without_cache do
26
27
  list = xpath(
27
- *CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
28
+ *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX),
28
29
  )
29
30
  end
30
31
  end
@@ -33,8 +34,8 @@ module Nokogiri
33
34
  list.length == 1 ? list.first : list
34
35
  end
35
36
 
36
- def respond_to_missing? name, include_private = false
37
- list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
37
+ def respond_to_missing?(name, include_private = false)
38
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
38
39
 
39
40
  !list.empty?
40
41
  end
@@ -0,0 +1,57 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ class EncodingHandler
6
+ # Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
7
+ USEFUL_ALIASES = {
8
+ # alias_name => true_name
9
+ "NOKOGIRI-SENTINEL" => "UTF-8", # indicating the Nokogiri has installed aliases
10
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
11
+ "UTF-8" => "UTF-8", # for JRuby tests, this is a no-op in CRuby
12
+ }
13
+
14
+ class << self
15
+ def install_default_aliases
16
+ USEFUL_ALIASES.each do |alias_name, name|
17
+ EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
18
+ end
19
+ end
20
+ end
21
+
22
+ # :stopdoc:
23
+ if Nokogiri.jruby?
24
+ class << self
25
+ def [](name)
26
+ storage.key?(name) ? new(storage[name]) : nil
27
+ end
28
+
29
+ def alias(name, alias_name)
30
+ storage[alias_name] = name
31
+ end
32
+
33
+ def delete(name)
34
+ storage.delete(name)
35
+ end
36
+
37
+ def clear_aliases!
38
+ storage.clear
39
+ end
40
+
41
+ private
42
+
43
+ def storage
44
+ @storage ||= {}
45
+ end
46
+ end
47
+
48
+ def initialize(name)
49
+ @name = name
50
+ end
51
+
52
+ attr_reader :name
53
+ end
54
+ end
55
+ end
56
+
57
+ Nokogiri::EncodingHandler.install_default_aliases
@@ -3,13 +3,14 @@
3
3
  # load the C or Java extension
4
4
  begin
5
5
  # native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
6
- ::RUBY_VERSION =~ /(\d+\.\d+)/
6
+ RUBY_VERSION =~ /(\d+\.\d+)/
7
7
  require_relative "#{Regexp.last_match(1)}/nokogiri"
8
8
  rescue LoadError => e
9
- if e.message =~ /GLIBC/
9
+ if /GLIBC/.match?(e.message)
10
10
  warn(<<~EOM)
11
11
 
12
- ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
12
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
13
+ with an unsupported version of glibc.
13
14
 
14
15
  #{e.message}
15
16
 
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module Gumbo
4
5
  # The default maximum number of attributes per element.
data/lib/nokogiri/html.rb CHANGED
@@ -1,40 +1,46 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  require_relative "html4"
3
5
 
4
6
  module Nokogiri
7
+ # Alias for Nokogiri::HTML4
5
8
  HTML = Nokogiri::HTML4
6
9
 
7
- # @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
10
+ # :singleton-method: HTML
11
+ # :call-seq: HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) → Nokogiri::HTML4::Document
12
+ #
8
13
  # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
9
- # @!scope class
14
+
15
+ # :nodoc:
10
16
  define_singleton_method(:HTML, Nokogiri.method(:HTML4))
11
17
 
12
- # @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
13
- # {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
18
+ # 💡 This module/namespace is an alias for Nokogiri::HTML4 as of v1.12.0. Before v1.12.0,
19
+ # Nokogiri::HTML4 did not exist, and this was the module/namespace for all HTML-related
14
20
  # classes.
15
21
  module HTML
16
- # @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
22
+ # 💡 This class is an alias for Nokogiri::HTML4::Document as of v1.12.0.
17
23
  class Document < Nokogiri::XML::Document
18
24
  end
19
25
 
20
- # @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
26
+ # 💡 This class is an alias for Nokogiri::HTML4::DocumentFragment as of v1.12.0.
21
27
  class DocumentFragment < Nokogiri::XML::DocumentFragment
22
28
  end
23
29
 
24
- # @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
30
+ # 💡 This class is an alias for Nokogiri::HTML4::Builder as of v1.12.0.
25
31
  class Builder < Nokogiri::XML::Builder
26
32
  end
27
33
 
28
34
  module SAX
29
- # @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
35
+ # 💡 This class is an alias for Nokogiri::HTML4::SAX::Parser as of v1.12.0.
30
36
  class Parser < Nokogiri::XML::SAX::Parser
31
37
  end
32
38
 
33
- # @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
39
+ # 💡 This class is an alias for Nokogiri::HTML4::SAX::ParserContext as of v1.12.0.
34
40
  class ParserContext < Nokogiri::XML::SAX::ParserContext
35
41
  end
36
42
 
37
- # @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
43
+ # 💡 This class is an alias for Nokogiri::HTML4::SAX::PushParser as of v1.12.0.
38
44
  class PushParser
39
45
  end
40
46
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module HTML4
4
5
  ###