nokogiri 1.10.3 → 1.13.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (220) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +178 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -62
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +761 -424
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +199 -88
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +21 -21
  22. data/ext/nokogiri/xml_cdata.c +14 -19
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +296 -220
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +25 -25
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +98 -53
  33. data/ext/nokogiri/xml_node.c +1065 -653
  34. data/ext/nokogiri/xml_node_set.c +178 -166
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +277 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +114 -35
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +226 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +46 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +884 -378
  139. data/lib/nokogiri/xml/node_set.rb +51 -54
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +21 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  166. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  171. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  172. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  173. metadata +205 -138
  174. data/ext/nokogiri/html_document.c +0 -170
  175. data/ext/nokogiri/html_document.h +0 -10
  176. data/ext/nokogiri/html_element_description.c +0 -279
  177. data/ext/nokogiri/html_element_description.h +0 -10
  178. data/ext/nokogiri/html_entity_lookup.c +0 -32
  179. data/ext/nokogiri/html_entity_lookup.h +0 -8
  180. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  181. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  182. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  183. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_attr.h +0 -9
  185. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  186. data/ext/nokogiri/xml_cdata.h +0 -9
  187. data/ext/nokogiri/xml_comment.h +0 -9
  188. data/ext/nokogiri/xml_document.h +0 -23
  189. data/ext/nokogiri/xml_document_fragment.h +0 -10
  190. data/ext/nokogiri/xml_dtd.h +0 -10
  191. data/ext/nokogiri/xml_element_content.h +0 -10
  192. data/ext/nokogiri/xml_element_decl.h +0 -9
  193. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  194. data/ext/nokogiri/xml_entity_decl.h +0 -10
  195. data/ext/nokogiri/xml_entity_reference.h +0 -9
  196. data/ext/nokogiri/xml_io.c +0 -61
  197. data/ext/nokogiri/xml_io.h +0 -11
  198. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  199. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  200. data/ext/nokogiri/xml_namespace.h +0 -14
  201. data/ext/nokogiri/xml_node.h +0 -13
  202. data/ext/nokogiri/xml_node_set.h +0 -12
  203. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  204. data/ext/nokogiri/xml_reader.h +0 -10
  205. data/ext/nokogiri/xml_relax_ng.h +0 -9
  206. data/ext/nokogiri/xml_sax_parser.h +0 -39
  207. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  208. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  209. data/ext/nokogiri/xml_schema.h +0 -9
  210. data/ext/nokogiri/xml_syntax_error.h +0 -13
  211. data/ext/nokogiri/xml_text.h +0 -9
  212. data/ext/nokogiri/xml_xpath_context.h +0 -10
  213. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  214. data/lib/nokogiri/html/document_fragment.rb +0 -49
  215. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  216. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  217. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  218. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
  219. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  220. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,64 +1,143 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  module Nokogiri
2
5
  module CSS
3
- class XPathVisitor # :nodoc:
4
- def visit_function node
6
+ # When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
7
+ # class allows for changing some of the behaviors related to builtin xpath functions and quirks
8
+ # of HTML5.
9
+ class XPathVisitor
10
+ WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
11
+
12
+ # Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
13
+ module BuiltinsConfig
14
+ # Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
15
+ # the default when calling Nokogiri::CSS.xpath_for directly.
16
+ NEVER = :never
17
+
18
+ # Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
19
+ ALWAYS = :always
20
+
21
+ # Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
22
+ # the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
23
+ # node.
24
+ OPTIMAL = :optimal
25
+
26
+ # :nodoc: array of values for validation
27
+ VALUES = [NEVER, ALWAYS, OPTIMAL]
28
+ end
29
+
30
+ # Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
31
+ # being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
32
+ # node will choose the correct option automatically.
33
+ module DoctypeConfig
34
+ # The document being searched is an XML document. This is the default.
35
+ XML = :xml
36
+
37
+ # The document being searched is an HTML4 document.
38
+ HTML4 = :html4
39
+
40
+ # The document being searched is an HTML5 document.
41
+ HTML5 = :html5
42
+
43
+ # :nodoc: array of values for validation
44
+ VALUES = [XML, HTML4, HTML5]
45
+ end
46
+
47
+ # :call-seq:
48
+ # new() → XPathVisitor
49
+ # new(builtins:, doctype:) → XPathVisitor
50
+ #
51
+ # [Parameters]
52
+ # - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
53
+ # - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
54
+ #
55
+ # [Returns] XPathVisitor
56
+ #
57
+ def initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML)
58
+ unless BuiltinsConfig::VALUES.include?(builtins)
59
+ raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
60
+ end
61
+ unless DoctypeConfig::VALUES.include?(doctype)
62
+ raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
63
+ end
64
+
65
+ @builtins = builtins
66
+ @doctype = doctype
67
+ end
68
+
69
+ # :call-seq: config() → Hash
70
+ #
71
+ # [Returns]
72
+ # a Hash representing the configuration of the XPathVisitor, suitable for use as
73
+ # part of the CSS cache key.
74
+ def config
75
+ { builtins: @builtins, doctype: @doctype }
76
+ end
5
77
 
6
- msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
- return self.send(msg, node) if self.respond_to?(msg)
78
+ # :stopdoc:
79
+ def visit_function(node)
80
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
81
+ return send(msg, node) if respond_to?(msg)
8
82
 
9
83
  case node.value.first
10
84
  when /^text\(/
11
- 'child::text()'
85
+ "child::text()"
12
86
  when /^self\(/
13
87
  "self::#{node.value[1]}"
14
88
  when /^eq\(/
15
- "position() = #{node.value[1]}"
89
+ "position()=#{node.value[1]}"
16
90
  when /^(nth|nth-of-type)\(/
17
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
91
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
18
92
  nth(node.value[1])
19
93
  else
20
- "position() = #{node.value[1]}"
94
+ "position()=#{node.value[1]}"
21
95
  end
22
96
  when /^nth-child\(/
23
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
- nth(node.value[1], :child => true)
97
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
98
+ nth(node.value[1], child: true)
25
99
  else
26
- "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
100
+ "count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
27
101
  end
28
102
  when /^nth-last-of-type\(/
29
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
- nth(node.value[1], :last => true)
103
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
104
+ nth(node.value[1], last: true)
31
105
  else
32
106
  index = node.value[1].to_i - 1
33
- index == 0 ? "position() = last()" : "position() = last() - #{index}"
107
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
108
  end
35
109
  when /^nth-last-child\(/
36
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
- nth(node.value[1], :last => true, :child => true)
110
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
111
+ nth(node.value[1], last: true, child: true)
38
112
  else
39
- "count(following-sibling::*) = #{node.value[1].to_i-1}"
113
+ "count(following-sibling::*)=#{node.value[1].to_i - 1}"
40
114
  end
41
115
  when /^(first|first-of-type)\(/
42
- "position() = 1"
116
+ "position()=1"
43
117
  when /^(last|last-of-type)\(/
44
- "position() = last()"
118
+ "position()=last()"
45
119
  when /^contains\(/
46
- "contains(., #{node.value[1]})"
120
+ "contains(.,#{node.value[1]})"
47
121
  when /^gt\(/
48
- "position() > #{node.value[1]}"
122
+ "position()>#{node.value[1]}"
49
123
  when /^only-child\(/
50
- "last() = 1"
124
+ "last()=1"
51
125
  when /^comment\(/
52
126
  "comment()"
53
127
  when /^has\(/
54
- ".//#{node.value[1].accept(self)}"
128
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
129
+ ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
55
130
  else
56
- args = ['.'] + node.value[1..-1]
57
- "#{node.value.first}#{args.join(', ')})"
131
+ # xpath function call, let's marshal those arguments
132
+ args = ["."]
133
+ args += node.value[1..-1].map do |n|
134
+ n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
135
+ end
136
+ "#{node.value.first}#{args.join(",")})"
58
137
  end
59
138
  end
60
139
 
61
- def visit_not node
140
+ def visit_not(node)
62
141
  child = node.value.first
63
142
  if :ELEMENT_NAME == child.type
64
143
  "not(self::#{child.accept(self)})"
@@ -67,71 +146,63 @@ module Nokogiri
67
146
  end
68
147
  end
69
148
 
70
- def visit_id node
149
+ def visit_id(node)
71
150
  node.value.first =~ /^#(.*)$/
72
- "@id = '#{$1}'"
151
+ "@id='#{Regexp.last_match(1)}'"
73
152
  end
74
153
 
75
- def visit_attribute_condition node
76
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
77
- ''
78
- else
79
- '@'
80
- end
81
- attribute += node.value.first.accept(self)
82
-
83
- # Support non-standard css
84
- attribute.gsub!(/^@@/, '@')
85
-
86
- return attribute unless node.value.length == 3
154
+ def visit_attribute_condition(node)
155
+ attribute = node.value.first.accept(self)
156
+ return attribute if node.value.length == 1
87
157
 
88
158
  value = node.value.last
89
- value = "'#{value}'" if value !~ /^['"]/
159
+ value = "'#{value}'" unless /^['"]/.match?(value)
90
160
 
91
- if (value[0]==value[-1]) && %q{"'}.include?(value[0])
161
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
162
+ if (value[0] == value[-1]) && %q{"'}.include?(value[0])
92
163
  str_value = value[1..-2]
93
164
  if str_value.include?(value[0])
94
- value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
165
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
95
166
  end
96
167
  end
97
168
 
98
169
  case node.value[1]
99
170
  when :equal
100
- attribute + " = " + "#{value}"
171
+ attribute + "=" + value.to_s
101
172
  when :not_equal
102
- attribute + " != " + "#{value}"
173
+ attribute + "!=" + value.to_s
103
174
  when :substring_match
104
- "contains(#{attribute}, #{value})"
175
+ "contains(#{attribute},#{value})"
105
176
  when :prefix_match
106
- "starts-with(#{attribute}, #{value})"
177
+ "starts-with(#{attribute},#{value})"
107
178
  when :dash_match
108
- "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
179
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
109
180
  when :includes
110
- "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
181
+ value = value[1..-2] # strip quotes
182
+ css_class(attribute, value)
111
183
  when :suffix_match
112
- "substring(#{attribute}, string-length(#{attribute}) - " +
113
- "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
184
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
114
185
  else
115
- attribute + " #{node.value[1]} " + "#{value}"
186
+ attribute + " #{node.value[1]} " + value.to_s
116
187
  end
117
188
  end
118
189
 
119
- def visit_pseudo_class node
120
- if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
190
+ def visit_pseudo_class(node)
191
+ if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
121
192
  node.value.first.accept(self)
122
193
  else
123
- msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
124
- return self.send(msg, node) if self.respond_to?(msg)
194
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
195
+ return send(msg, node) if respond_to?(msg)
125
196
 
126
197
  case node.value.first
127
- when "first" then "position() = 1"
128
- when "first-child" then "count(preceding-sibling::*) = 0"
129
- when "last" then "position() = last()"
130
- when "last-child" then "count(following-sibling::*) = 0"
131
- when "first-of-type" then "position() = 1"
132
- when "last-of-type" then "position() = last()"
133
- when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
134
- when "only-of-type" then "last() = 1"
198
+ when "first" then "position()=1"
199
+ when "first-child" then "count(preceding-sibling::*)=0"
200
+ when "last" then "position()=last()"
201
+ when "last-child" then "count(following-sibling::*)=0"
202
+ when "first-of-type" then "position()=1"
203
+ when "last-of-type" then "position()=last()"
204
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
205
+ when "only-of-type" then "last()=1"
135
206
  when "empty" then "not(node())"
136
207
  when "parent" then "node()"
137
208
  when "root" then "not(parent::*)"
@@ -141,24 +212,24 @@ module Nokogiri
141
212
  end
142
213
  end
143
214
 
144
- def visit_class_condition node
145
- "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
215
+ def visit_class_condition(node)
216
+ css_class("@class", node.value.first)
146
217
  end
147
218
 
148
- def visit_combinator node
219
+ def visit_combinator(node)
149
220
  if is_of_type_pseudo_class?(node.value.last)
150
- "#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
221
+ "#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
151
222
  else
152
- "#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
223
+ "#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
153
224
  end
154
225
  end
155
226
 
156
227
  {
157
- 'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
158
- 'following_selector' => "/following-sibling::",
159
- 'descendant_selector' => '//',
160
- 'child_selector' => '/',
161
- }.each do |k,v|
228
+ "direct_adjacent_selector" => "/following-sibling::*[1]/self::",
229
+ "following_selector" => "/following-sibling::",
230
+ "descendant_selector" => "//",
231
+ "child_selector" => "/",
232
+ }.each do |k, v|
162
233
  class_eval %{
163
234
  def visit_#{k} node
164
235
  "\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
@@ -166,43 +237,69 @@ module Nokogiri
166
237
  }
167
238
  end
168
239
 
169
- def visit_conditional_selector node
170
- node.value.first.accept(self) + '[' +
171
- node.value.last.accept(self) + ']'
240
+ def visit_conditional_selector(node)
241
+ node.value.first.accept(self) + "[" +
242
+ node.value.last.accept(self) + "]"
243
+ end
244
+
245
+ def visit_element_name(node)
246
+ if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
247
+ # HTML5 has namespaces that should be ignored in CSS queries
248
+ # https://github.com/sparklemotion/nokogiri/issues/2376
249
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
250
+ if WILDCARD_NAMESPACES
251
+ "*:#{node.value.first}"
252
+ else
253
+ "*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
254
+ end
255
+ else
256
+ "*[local-name()='#{node.value.first}']"
257
+ end
258
+ else
259
+ node.value.first
260
+ end
172
261
  end
173
262
 
174
- def visit_element_name node
175
- node.value.first
263
+ def visit_attrib_name(node)
264
+ "@#{node.value.first}"
176
265
  end
177
266
 
178
- def accept node
267
+ def accept(node)
179
268
  node.accept(self)
180
269
  end
181
270
 
182
- private
183
- def nth node, options={}
271
+ private
272
+
273
+ def html5_element_name_needs_namespace_handling(node)
274
+ # if this is the wildcard selector "*", use it as normal
275
+ node.value.first != "*" &&
276
+ # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
277
+ !node.value.first.include?(":")
278
+ end
279
+
280
+ def nth(node, options = {})
184
281
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
185
282
 
186
- a, b = read_a_and_positive_b node.value
283
+ a, b = read_a_and_positive_b(node.value)
187
284
  position = if options[:child]
188
- options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
285
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
189
286
  else
190
287
  options[:last] ? "(last()-position()+1)" : "position()"
191
288
  end
192
289
 
193
290
  if b.zero?
194
- "(#{position} mod #{a}) = 0"
291
+ "(#{position} mod #{a})=0"
195
292
  else
196
293
  compare = a < 0 ? "<=" : ">="
197
294
  if a.abs == 1
198
- "#{position} #{compare} #{b}"
295
+ "#{position}#{compare}#{b}"
199
296
  else
200
- "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
297
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
201
298
  end
202
299
  end
203
300
  end
204
301
 
205
- def read_a_and_positive_b values
302
+ def read_a_and_positive_b(values)
206
303
  op = values[2]
207
304
  if op == "+"
208
305
  a = values[0].to_i
@@ -216,15 +313,45 @@ module Nokogiri
216
313
  [a, b]
217
314
  end
218
315
 
219
- def is_of_type_pseudo_class? node
220
- if node.type==:PSEUDO_CLASS
221
- if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
316
+ def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
317
+ if node.type == :PSEUDO_CLASS
318
+ if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
222
319
  node.value[0].value[0]
223
320
  else
224
321
  node.value[0]
225
322
  end =~ /(nth|first|last|only)-of-type(\()?/
226
323
  end
227
324
  end
325
+
326
+ def css_class(hay, needle)
327
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
328
+ # use the builtin implementation
329
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
330
+ else
331
+ # use only ordinary xpath functions
332
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
333
+ end
334
+ end
335
+ end
336
+
337
+ module XPathVisitorAlwaysUseBuiltins # :nodoc:
338
+ def self.new
339
+ warn(
340
+ "Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
341
+ { uplevel: 1 },
342
+ )
343
+ XPathVisitor.new(builtins: :always)
344
+ end
345
+ end
346
+
347
+ module XPathVisitorOptimallyUseBuiltins # :nodoc:
348
+ def self.new
349
+ warn(
350
+ "Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
351
+ { uplevel: 1 },
352
+ )
353
+ XPathVisitor.new(builtins: :optimal)
354
+ end
228
355
  end
229
356
  end
230
357
  end
data/lib/nokogiri/css.rb CHANGED
@@ -1,27 +1,60 @@
1
- require 'nokogiri/css/node'
2
- require 'nokogiri/css/xpath_visitor'
3
- x = $-w
4
- $-w = false
5
- require 'nokogiri/css/parser'
6
- $-w = x
7
-
8
- require 'nokogiri/css/tokenizer'
9
- require 'nokogiri/css/syntax_error'
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
10
3
 
11
4
  module Nokogiri
5
+ # Translate a CSS selector into an XPath 1.0 query
12
6
  module CSS
13
7
  class << self
14
- ###
15
- # Parse this CSS selector in +selector+. Returns an AST.
16
- def parse selector
17
- Parser.new.parse selector
8
+ # TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
9
+ # It is not used by Nokogiri and shouldn't be part of the public API.
10
+ def parse(selector) # :nodoc:
11
+ Parser.new.parse(selector)
18
12
  end
19
13
 
20
- ###
21
- # Get the XPath for +selector+.
22
- def xpath_for selector, options={}
23
- Parser.new(options[:ns] || {}).xpath_for selector, options
14
+ # :call-seq:
15
+ # xpath_for(selector) String
16
+ # xpath_for(selector [, prefix:] [, visitor:] [, ns:]) → String
17
+ #
18
+ # Translate a CSS selector to the equivalent XPath query.
19
+ #
20
+ # [Parameters]
21
+ # - +selector+ (String) The CSS selector to be translated into XPath
22
+ #
23
+ # - +prefix:+ (String)
24
+ #
25
+ # The XPath prefix for the query, see Nokogiri::XML::XPath for some options. Default is
26
+ # +XML::XPath::GLOBAL_SEARCH_PREFIX+.
27
+ #
28
+ # - +visitor:+ (Nokogiri::CSS::XPathVisitor)
29
+ #
30
+ # The visitor class to use to transform the AST into XPath. Default is
31
+ # +Nokogiri::CSS::XPathVisitor.new+.
32
+ #
33
+ # - +ns:+ (Hash<String ⇒ String>)
34
+ #
35
+ # The namespaces that are referenced in the query, if any. This is a hash where the keys are
36
+ # the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
37
+ #
38
+ # [Returns] (String) The equivalent XPath query for +selector+
39
+ #
40
+ # 💡 Note that translated queries are cached for performance concerns.
41
+ #
42
+ def xpath_for(selector, options = {})
43
+ prefix = options.fetch(:prefix, Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX)
44
+ visitor = options.fetch(:visitor) { Nokogiri::CSS::XPathVisitor.new }
45
+ ns = options.fetch(:ns, {})
46
+ Parser.new(ns).xpath_for(selector, prefix, visitor)
24
47
  end
25
48
  end
26
49
  end
27
50
  end
51
+
52
+ require_relative "css/node"
53
+ require_relative "css/xpath_visitor"
54
+ x = $-w
55
+ $-w = false
56
+ require_relative "css/parser"
57
+ $-w = x
58
+
59
+ require_relative "css/tokenizer"
60
+ require_relative "css/syntax_error"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module Decorators
3
5
  ###
@@ -9,21 +11,21 @@ module Nokogiri
9
11
 
10
12
  ###
11
13
  # look for node with +name+. See Nokogiri.Slop
12
- def method_missing name, *args, &block
14
+ def method_missing(name, *args, &block)
13
15
  if args.empty?
14
- list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
15
- elsif args.first.is_a? Hash
16
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
17
+ elsif args.first.is_a?(Hash)
16
18
  hash = args.first
17
19
  if hash[:css]
18
20
  list = css("#{name}#{hash[:css]}")
19
21
  elsif hash[:xpath]
20
- conds = Array(hash[:xpath]).join(' and ')
22
+ conds = Array(hash[:xpath]).join(" and ")
21
23
  list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
22
24
  end
23
25
  else
24
26
  CSS::Parser.without_cache do
25
27
  list = xpath(
26
- *CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
28
+ *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX)
27
29
  )
28
30
  end
29
31
  end
@@ -32,8 +34,8 @@ module Nokogiri
32
34
  list.length == 1 ? list.first : list
33
35
  end
34
36
 
35
- def respond_to_missing? name, include_private = false
36
- list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
37
+ def respond_to_missing?(name, include_private = false)
38
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
37
39
 
38
40
  !list.empty?
39
41
  end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load the C or Java extension
4
+ begin
5
+ # native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
6
+ ::RUBY_VERSION =~ /(\d+\.\d+)/
7
+ require_relative "#{Regexp.last_match(1)}/nokogiri"
8
+ rescue LoadError => e
9
+ if /GLIBC/.match?(e.message)
10
+ warn(<<~EOM)
11
+
12
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
13
+
14
+ #{e.message}
15
+
16
+ If that's the case, then please install Nokogiri via the `ruby` platform gem:
17
+ gem install nokogiri --platform=ruby
18
+ or:
19
+ bundle config set force_ruby_platform true
20
+
21
+ Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
22
+
23
+ EOM
24
+ raise e
25
+ end
26
+
27
+ # use "require" instead of "require_relative" because non-native gems will place C extension files
28
+ # in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
29
+ # is in $LOAD_PATH but not necessarily relative to this file (see #2300)
30
+ require "nokogiri/nokogiri"
31
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module Gumbo
5
+ # The default maximum number of attributes per element.
6
+ DEFAULT_MAX_ATTRIBUTES = 400
7
+
8
+ # The default maximum number of errors for parsing a document or a fragment.
9
+ DEFAULT_MAX_ERRORS = 0
10
+
11
+ # The default maximum depth of the DOM tree produced by parsing a document
12
+ # or fragment.
13
+ DEFAULT_MAX_TREE_DEPTH = 400
14
+ end
15
+ end