nokogiri 1.18.0.rc1-aarch64-linux-gnu

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +502 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +297 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +49 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +324 -0
@@ -0,0 +1,375 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ module CSS
6
+ # When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
7
+ # class allows for changing some of the behaviors related to builtin xpath functions and quirks
8
+ # of HTML5.
9
+ class XPathVisitor
10
+ WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
11
+
12
+ # Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
13
+ module BuiltinsConfig
14
+ # Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
15
+ # the default when calling Nokogiri::CSS.xpath_for directly.
16
+ NEVER = :never
17
+
18
+ # Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
19
+ ALWAYS = :always
20
+
21
+ # Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
22
+ # the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
23
+ # node.
24
+ OPTIMAL = :optimal
25
+
26
+ # :nodoc: array of values for validation
27
+ VALUES = [NEVER, ALWAYS, OPTIMAL]
28
+ end
29
+
30
+ # Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
31
+ # being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
32
+ # node will choose the correct option automatically.
33
+ module DoctypeConfig
34
+ # The document being searched is an XML document. This is the default.
35
+ XML = :xml
36
+
37
+ # The document being searched is an HTML4 document.
38
+ HTML4 = :html4
39
+
40
+ # The document being searched is an HTML5 document.
41
+ HTML5 = :html5
42
+
43
+ # :nodoc: array of values for validation
44
+ VALUES = [XML, HTML4, HTML5]
45
+ end
46
+
47
+ # The visitor configuration set via the +builtins:+ keyword argument to XPathVisitor.new.
48
+ attr_reader :builtins
49
+
50
+ # The visitor configuration set via the +doctype:+ keyword argument to XPathVisitor.new.
51
+ attr_reader :doctype
52
+
53
+ # The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
54
+ attr_reader :prefix
55
+
56
+ # The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
57
+ attr_reader :namespaces
58
+
59
+ # :call-seq:
60
+ # new() → XPathVisitor
61
+ # new(builtins:, doctype:) → XPathVisitor
62
+ #
63
+ # [Parameters]
64
+ # - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
65
+ # - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
66
+ #
67
+ # [Returns] XPathVisitor
68
+ #
69
+ def initialize(
70
+ builtins: BuiltinsConfig::NEVER,
71
+ doctype: DoctypeConfig::XML,
72
+ prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
73
+ namespaces: nil
74
+ )
75
+ unless BuiltinsConfig::VALUES.include?(builtins)
76
+ raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
77
+ end
78
+ unless DoctypeConfig::VALUES.include?(doctype)
79
+ raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
80
+ end
81
+
82
+ @builtins = builtins
83
+ @doctype = doctype
84
+ @prefix = prefix
85
+ @namespaces = namespaces
86
+ end
87
+
88
+ # :call-seq: config() → Hash
89
+ #
90
+ # [Returns]
91
+ # a Hash representing the configuration of the XPathVisitor, suitable for use as
92
+ # part of the CSS cache key.
93
+ def config
94
+ { builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
95
+ end
96
+
97
+ # :stopdoc:
98
+ def visit_function(node)
99
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
100
+ return send(msg, node) if respond_to?(msg)
101
+
102
+ case node.value.first
103
+ when /^text\(/
104
+ "child::text()"
105
+ when /^self\(/
106
+ "self::#{node.value[1]}"
107
+ when /^eq\(/
108
+ "position()=#{node.value[1]}"
109
+ when /^(nth|nth-of-type)\(/
110
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
111
+ nth(node.value[1])
112
+ else
113
+ "position()=#{node.value[1]}"
114
+ end
115
+ when /^nth-child\(/
116
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
117
+ nth(node.value[1], child: true)
118
+ else
119
+ "count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
120
+ end
121
+ when /^nth-last-of-type\(/
122
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
123
+ nth(node.value[1], last: true)
124
+ else
125
+ index = node.value[1].to_i - 1
126
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
127
+ end
128
+ when /^nth-last-child\(/
129
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
130
+ nth(node.value[1], last: true, child: true)
131
+ else
132
+ "count(following-sibling::*)=#{node.value[1].to_i - 1}"
133
+ end
134
+ when /^(first|first-of-type)\(/
135
+ "position()=1"
136
+ when /^(last|last-of-type)\(/
137
+ "position()=last()"
138
+ when /^contains\(/
139
+ "contains(.,#{node.value[1]})"
140
+ when /^gt\(/
141
+ "position()>#{node.value[1]}"
142
+ when /^only-child\(/
143
+ "last()=1"
144
+ when /^comment\(/
145
+ "comment()"
146
+ when /^has\(/
147
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
148
+ ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
149
+ else
150
+ validate_xpath_function_name(node.value.first)
151
+
152
+ # xpath function call, let's marshal those arguments
153
+ args = ["."]
154
+ args += node.value[1..-1].map do |n|
155
+ n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
156
+ end
157
+ "nokogiri:#{node.value.first}#{args.join(",")})"
158
+ end
159
+ end
160
+
161
+ def visit_not(node)
162
+ child = node.value.first
163
+ if :ELEMENT_NAME == child.type
164
+ "not(self::#{child.accept(self)})"
165
+ else
166
+ "not(#{child.accept(self)})"
167
+ end
168
+ end
169
+
170
+ def visit_id(node)
171
+ node.value.first =~ /^#(.*)$/
172
+ "@id='#{Regexp.last_match(1)}'"
173
+ end
174
+
175
+ def visit_attribute_condition(node)
176
+ attribute = node.value.first.accept(self)
177
+ return attribute if node.value.length == 1
178
+
179
+ value = node.value.last
180
+ value = "'#{value}'" unless /^['"]/.match?(value)
181
+
182
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
183
+ if (value[0] == value[-1]) && %q{"'}.include?(value[0])
184
+ str_value = value[1..-2]
185
+ if str_value.include?(value[0])
186
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
187
+ end
188
+ end
189
+
190
+ case node.value[1]
191
+ when :equal
192
+ attribute + "=" + value.to_s
193
+ when :not_equal
194
+ attribute + "!=" + value.to_s
195
+ when :substring_match
196
+ "contains(#{attribute},#{value})"
197
+ when :prefix_match
198
+ "starts-with(#{attribute},#{value})"
199
+ when :dash_match
200
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
201
+ when :includes
202
+ value = value[1..-2] # strip quotes
203
+ css_class(attribute, value)
204
+ when :suffix_match
205
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
206
+ else
207
+ attribute + " #{node.value[1]} " + value.to_s
208
+ end
209
+ end
210
+
211
+ def visit_pseudo_class(node)
212
+ if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
213
+ node.value.first.accept(self)
214
+ else
215
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
216
+ return send(msg, node) if respond_to?(msg)
217
+
218
+ case node.value.first
219
+ when "first" then "position()=1"
220
+ when "first-child" then "count(preceding-sibling::*)=0"
221
+ when "last" then "position()=last()"
222
+ when "last-child" then "count(following-sibling::*)=0"
223
+ when "first-of-type" then "position()=1"
224
+ when "last-of-type" then "position()=last()"
225
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
226
+ when "only-of-type" then "last()=1"
227
+ when "empty" then "not(node())"
228
+ when "parent" then "node()"
229
+ when "root" then "not(parent::*)"
230
+ else
231
+ validate_xpath_function_name(node.value.first)
232
+ "nokogiri:#{node.value.first}(.)"
233
+ end
234
+ end
235
+ end
236
+
237
+ def visit_class_condition(node)
238
+ css_class("@class", node.value.first)
239
+ end
240
+
241
+ def visit_combinator(node)
242
+ if is_of_type_pseudo_class?(node.value.last)
243
+ "#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
244
+ else
245
+ "#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
246
+ end
247
+ end
248
+
249
+ {
250
+ "direct_adjacent_selector" => "/following-sibling::*[1]/self::",
251
+ "following_selector" => "/following-sibling::",
252
+ "descendant_selector" => "//",
253
+ "child_selector" => "/",
254
+ }.each do |k, v|
255
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
256
+ def visit_#{k} node
257
+ "\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
258
+ end
259
+ RUBY
260
+ end
261
+
262
+ def visit_conditional_selector(node)
263
+ node.value.first.accept(self) + "[" +
264
+ node.value.last.accept(self) + "]"
265
+ end
266
+
267
+ def visit_element_name(node)
268
+ if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
269
+ # HTML5 has namespaces that should be ignored in CSS queries
270
+ # https://github.com/sparklemotion/nokogiri/issues/2376
271
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
272
+ if WILDCARD_NAMESPACES
273
+ "*:#{node.value.first}"
274
+ else
275
+ "*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
276
+ end
277
+ else
278
+ "*[local-name()='#{node.value.first}']"
279
+ end
280
+ elsif node.value.length == 2 # has a namespace prefix
281
+ if node.value.first.nil? # namespace prefix is empty
282
+ node.value.last
283
+ else
284
+ node.value.join(":")
285
+ end
286
+ elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
287
+ "xmlns:#{node.value.first}"
288
+ else
289
+ node.value.first
290
+ end
291
+ end
292
+
293
+ def visit_attrib_name(node)
294
+ "@#{node.value.first}"
295
+ end
296
+
297
+ def accept(node)
298
+ node.accept(self)
299
+ end
300
+
301
+ private
302
+
303
+ def validate_xpath_function_name(name)
304
+ if name.start_with?("-")
305
+ raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
306
+ end
307
+ end
308
+
309
+ def html5_element_name_needs_namespace_handling(node)
310
+ # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
311
+ node.value.length == 1 &&
312
+ # if this is the wildcard selector "*", use it as normal
313
+ node.value.first != "*"
314
+ end
315
+
316
+ def nth(node, options = {})
317
+ unless node.value.size == 4
318
+ raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
319
+ end
320
+
321
+ a, b = read_a_and_positive_b(node.value)
322
+ position = if options[:child]
323
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
324
+ else
325
+ options[:last] ? "(last()-position()+1)" : "position()"
326
+ end
327
+
328
+ if b.zero?
329
+ "(#{position} mod #{a})=0"
330
+ else
331
+ compare = a < 0 ? "<=" : ">="
332
+ if a.abs == 1
333
+ "#{position}#{compare}#{b}"
334
+ else
335
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
336
+ end
337
+ end
338
+ end
339
+
340
+ def read_a_and_positive_b(values)
341
+ op = values[2].strip
342
+ if op == "+"
343
+ a = values[0].to_i
344
+ b = values[3].to_i
345
+ elsif op == "-"
346
+ a = values[0].to_i
347
+ b = a - (values[3].to_i % a)
348
+ else
349
+ raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
350
+ end
351
+ [a, b]
352
+ end
353
+
354
+ def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
355
+ if node.type == :PSEUDO_CLASS
356
+ if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
357
+ node.value[0].value[0]
358
+ else
359
+ node.value[0]
360
+ end =~ /(nth|first|last|only)-of-type(\()?/
361
+ end
362
+ end
363
+
364
+ def css_class(hay, needle)
365
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
366
+ # use the builtin implementation
367
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
368
+ else
369
+ # use only ordinary xpath functions
370
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
371
+ end
372
+ end
373
+ end
374
+ end
375
+ end
@@ -0,0 +1,132 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ # Translate a CSS selector into an XPath 1.0 query
6
+ module CSS
7
+ class << self
8
+ # TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
9
+ # It is not used by Nokogiri and shouldn't be part of the public API.
10
+ def parse(selector) # :nodoc:
11
+ warn("Nokogiri::CSS.parse is deprecated and will be removed in a future version of Nokogiri. Use Nokogiri::CSS::Parser#parse instead.", uplevel: 1, category: :deprecated)
12
+ Parser.new.parse(selector)
13
+ end
14
+
15
+ # :call-seq:
16
+ # xpath_for(selector_list) → Array<String>
17
+ # xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
18
+ #
19
+ # Translate a CSS selector list to the equivalent XPath expressions.
20
+ #
21
+ # 💡 Note that translated queries are cached by default for performance concerns.
22
+ #
23
+ # ⚠ Users should prefer Nokogiri::XML::Searchable#css, which is mixed into all document and
24
+ # node classes, for querying documents with CSS selectors. This method is the underlying
25
+ # mechanism used by XML::Searchable and is provided solely for advanced users to translate
26
+ # \CSS selectors to XPath directly.
27
+ #
28
+ # Also see Nokogiri::XML::Searchable#css for documentation on supported CSS selector features,
29
+ # some extended syntax that Nokogiri supports, and advanced CSS features like pseudo-class
30
+ # functions.
31
+ #
32
+ # [Parameters]
33
+ # - +selector_list+ (String)
34
+ #
35
+ # The CSS selector to be translated into XPath. This is always a String, but that string
36
+ # value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
37
+ # examples).
38
+ #
39
+ # [Keyword arguments]
40
+ # - +prefix:+ (String)
41
+ #
42
+ # The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
43
+ # for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
44
+ #
45
+ # - +ns:+ (Hash<String ⇒ String>, nil)
46
+ #
47
+ # Namespaces that are referenced in the query, if any. This is a hash where the keys are the
48
+ # namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
49
+ # empty set of namespaces.
50
+ #
51
+ # - +visitor:+ (Nokogiri::CSS::XPathVisitor)
52
+ #
53
+ # Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
54
+ # Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
55
+ # be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
56
+ #
57
+ # ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
58
+ # provided, +prefix+ and +ns+ must not be present.
59
+ #
60
+ # - +cache:+ (Boolean)
61
+ #
62
+ # Whether to use the SelectorCache for the translated query to ensure that repeated queries
63
+ # don't incur the overhead of re-parsing the selector. Default is +true+.
64
+ #
65
+ # [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
66
+ #
67
+ # *Example* with a simple selector:
68
+ #
69
+ # Nokogiri::CSS.xpath_for("div") # => ["//div"]
70
+ #
71
+ # *Example* with a compound selector:
72
+ #
73
+ # Nokogiri::CSS.xpath_for("div.xl") # => ["//div[contains(concat(' ',normalize-space(@class),' '),' xl ')]"]
74
+ #
75
+ # *Example* with a complex selector:
76
+ #
77
+ # Nokogiri::CSS.xpath_for("h1 + div") # => ["//h1/following-sibling::*[1]/self::div"]
78
+ #
79
+ # *Example* with a selector list:
80
+ #
81
+ # Nokogiri::CSS.xpath_for("h1, h2, h3") # => ["//h1", "//h2", "//h3"]
82
+ #
83
+ def xpath_for(
84
+ selector, options = nil,
85
+ prefix: options&.delete(:prefix),
86
+ visitor: options&.delete(:visitor),
87
+ ns: options&.delete(:ns),
88
+ cache: true
89
+ )
90
+ unless options.nil?
91
+ warn("Nokogiri::CSS.xpath_for: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
92
+ end
93
+
94
+ raise(TypeError, "no implicit conversion of #{selector.inspect} to String") unless selector.respond_to?(:to_str)
95
+
96
+ selector = selector.to_str
97
+ raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?
98
+
99
+ if visitor
100
+ raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
101
+ raise ArgumentError, "cannot provide both :ns and :visitor" if ns
102
+ end
103
+
104
+ visitor ||= begin
105
+ visitor_kw = {}
106
+ visitor_kw[:prefix] = prefix if prefix
107
+ visitor_kw[:namespaces] = ns if ns
108
+
109
+ Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
110
+ end
111
+
112
+ if cache
113
+ key = SelectorCache.key(selector: selector, visitor: visitor)
114
+ SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
115
+ else
116
+ Parser.new.xpath_for(selector, visitor)
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+
123
+ require_relative "css/selector_cache"
124
+ require_relative "css/node"
125
+ require_relative "css/xpath_visitor"
126
+ x = $-w
127
+ $-w = false
128
+ require_relative "css/parser"
129
+ $-w = x
130
+
131
+ require_relative "css/tokenizer"
132
+ require_relative "css/syntax_error"
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module Decorators
5
+ ###
6
+ # The Slop decorator implements method missing such that a methods may be
7
+ # used instead of XPath or CSS. See Nokogiri.Slop
8
+ module Slop
9
+ # The default XPath search context for Slop
10
+ XPATH_PREFIX = "./"
11
+
12
+ ###
13
+ # look for node with +name+. See Nokogiri.Slop
14
+ def method_missing(name, *args, &block)
15
+ if args.empty?
16
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
17
+ elsif args.first.is_a?(Hash)
18
+ hash = args.first
19
+ if hash[:css]
20
+ list = css("#{name}#{hash[:css]}")
21
+ elsif hash[:xpath]
22
+ conds = Array(hash[:xpath]).join(" and ")
23
+ list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
24
+ end
25
+ else
26
+ list = xpath(
27
+ *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
28
+ )
29
+ end
30
+
31
+ super if list.empty?
32
+ list.length == 1 ? list.first : list
33
+ end
34
+
35
+ def respond_to_missing?(name, include_private = false)
36
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
37
+
38
+ !list.empty?
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,57 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ class EncodingHandler
6
+ # Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
7
+ USEFUL_ALIASES = {
8
+ # alias_name => true_name
9
+ "ISO-2022-JP" => "ISO-2022-JP", # only for JRuby tests, this is a no-op in CRuby
10
+ "NOKOGIRI-SENTINEL" => "ISO-2022-JP", # indicating the Nokogiri has installed aliases
11
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
12
+ }
13
+
14
+ class << self
15
+ def install_default_aliases
16
+ USEFUL_ALIASES.each do |alias_name, name|
17
+ EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
18
+ end
19
+ end
20
+ end
21
+
22
+ # :stopdoc:
23
+ if Nokogiri.jruby?
24
+ class << self
25
+ def [](name)
26
+ storage.key?(name) ? new(storage[name]) : nil
27
+ end
28
+
29
+ def alias(name, alias_name)
30
+ storage[alias_name] = name
31
+ end
32
+
33
+ def delete(name)
34
+ storage.delete(name)
35
+ end
36
+
37
+ def clear_aliases!
38
+ storage.clear
39
+ end
40
+
41
+ private
42
+
43
+ def storage
44
+ @storage ||= {}
45
+ end
46
+ end
47
+
48
+ def initialize(name)
49
+ @name = name
50
+ end
51
+
52
+ attr_reader :name
53
+ end
54
+ end
55
+ end
56
+
57
+ Nokogiri::EncodingHandler.install_default_aliases
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load the C or Java extension
4
+ begin
5
+ # native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
6
+ RUBY_VERSION =~ /(\d+\.\d+)/
7
+ require_relative "#{Regexp.last_match(1)}/nokogiri"
8
+ rescue LoadError => e
9
+ if e.message.include?("GLIBC")
10
+ warn(<<~EOM)
11
+
12
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
13
+ with an unsupported version of glibc.
14
+
15
+ #{e.message}
16
+
17
+ If that's the case, then please install Nokogiri via the `ruby` platform gem:
18
+ gem install nokogiri --platform=ruby
19
+ or:
20
+ bundle config set force_ruby_platform true
21
+
22
+ Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
23
+
24
+ EOM
25
+ raise e
26
+ end
27
+
28
+ # use "require" instead of "require_relative" because non-native gems will place C extension files
29
+ # in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
30
+ # is in $LOAD_PATH but not necessarily relative to this file (see #2300)
31
+ require "nokogiri/nokogiri"
32
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module Gumbo
5
+ # The default maximum number of attributes per element.
6
+ DEFAULT_MAX_ATTRIBUTES = 400
7
+
8
+ # The default maximum number of errors for parsing a document or a fragment.
9
+ DEFAULT_MAX_ERRORS = 0
10
+
11
+ # The default maximum depth of the DOM tree produced by parsing a document
12
+ # or fragment.
13
+ DEFAULT_MAX_TREE_DEPTH = 400
14
+ end
15
+ end