nokogiri 1.18.0-aarch64-linux-gnu

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +39 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +486 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +274 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +27 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +321 -0
@@ -0,0 +1,375 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ module CSS
6
+ # When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
7
+ # class allows for changing some of the behaviors related to builtin xpath functions and quirks
8
+ # of HTML5.
9
+ class XPathVisitor
10
+ WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
11
+
12
+ # Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
13
+ module BuiltinsConfig
14
+ # Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
15
+ # the default when calling Nokogiri::CSS.xpath_for directly.
16
+ NEVER = :never
17
+
18
+ # Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
19
+ ALWAYS = :always
20
+
21
+ # Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
22
+ # the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
23
+ # node.
24
+ OPTIMAL = :optimal
25
+
26
+ # :nodoc: array of values for validation
27
+ VALUES = [NEVER, ALWAYS, OPTIMAL]
28
+ end
29
+
30
+ # Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
31
+ # being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
32
+ # node will choose the correct option automatically.
33
+ module DoctypeConfig
34
+ # The document being searched is an XML document. This is the default.
35
+ XML = :xml
36
+
37
+ # The document being searched is an HTML4 document.
38
+ HTML4 = :html4
39
+
40
+ # The document being searched is an HTML5 document.
41
+ HTML5 = :html5
42
+
43
+ # :nodoc: array of values for validation
44
+ VALUES = [XML, HTML4, HTML5]
45
+ end
46
+
47
+ # The visitor configuration set via the +builtins:+ keyword argument to XPathVisitor.new.
48
+ attr_reader :builtins
49
+
50
+ # The visitor configuration set via the +doctype:+ keyword argument to XPathVisitor.new.
51
+ attr_reader :doctype
52
+
53
+ # The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
54
+ attr_reader :prefix
55
+
56
+ # The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
57
+ attr_reader :namespaces
58
+
59
+ # :call-seq:
60
+ # new() → XPathVisitor
61
+ # new(builtins:, doctype:) → XPathVisitor
62
+ #
63
+ # [Parameters]
64
+ # - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
65
+ # - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
66
+ #
67
+ # [Returns] XPathVisitor
68
+ #
69
+ def initialize(
70
+ builtins: BuiltinsConfig::NEVER,
71
+ doctype: DoctypeConfig::XML,
72
+ prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
73
+ namespaces: nil
74
+ )
75
+ unless BuiltinsConfig::VALUES.include?(builtins)
76
+ raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
77
+ end
78
+ unless DoctypeConfig::VALUES.include?(doctype)
79
+ raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
80
+ end
81
+
82
+ @builtins = builtins
83
+ @doctype = doctype
84
+ @prefix = prefix
85
+ @namespaces = namespaces
86
+ end
87
+
88
+ # :call-seq: config() → Hash
89
+ #
90
+ # [Returns]
91
+ # a Hash representing the configuration of the XPathVisitor, suitable for use as
92
+ # part of the CSS cache key.
93
+ def config
94
+ { builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
95
+ end
96
+
97
+ # :stopdoc:
98
+ def visit_function(node)
99
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
100
+ return send(msg, node) if respond_to?(msg)
101
+
102
+ case node.value.first
103
+ when /^text\(/
104
+ "child::text()"
105
+ when /^self\(/
106
+ "self::#{node.value[1]}"
107
+ when /^eq\(/
108
+ "position()=#{node.value[1]}"
109
+ when /^(nth|nth-of-type)\(/
110
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
111
+ nth(node.value[1])
112
+ else
113
+ "position()=#{node.value[1]}"
114
+ end
115
+ when /^nth-child\(/
116
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
117
+ nth(node.value[1], child: true)
118
+ else
119
+ "count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
120
+ end
121
+ when /^nth-last-of-type\(/
122
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
123
+ nth(node.value[1], last: true)
124
+ else
125
+ index = node.value[1].to_i - 1
126
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
127
+ end
128
+ when /^nth-last-child\(/
129
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
130
+ nth(node.value[1], last: true, child: true)
131
+ else
132
+ "count(following-sibling::*)=#{node.value[1].to_i - 1}"
133
+ end
134
+ when /^(first|first-of-type)\(/
135
+ "position()=1"
136
+ when /^(last|last-of-type)\(/
137
+ "position()=last()"
138
+ when /^contains\(/
139
+ "contains(.,#{node.value[1]})"
140
+ when /^gt\(/
141
+ "position()>#{node.value[1]}"
142
+ when /^only-child\(/
143
+ "last()=1"
144
+ when /^comment\(/
145
+ "comment()"
146
+ when /^has\(/
147
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
148
+ ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
149
+ else
150
+ validate_xpath_function_name(node.value.first)
151
+
152
+ # xpath function call, let's marshal those arguments
153
+ args = ["."]
154
+ args += node.value[1..-1].map do |n|
155
+ n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
156
+ end
157
+ "nokogiri:#{node.value.first}#{args.join(",")})"
158
+ end
159
+ end
160
+
161
+ def visit_not(node)
162
+ child = node.value.first
163
+ if :ELEMENT_NAME == child.type
164
+ "not(self::#{child.accept(self)})"
165
+ else
166
+ "not(#{child.accept(self)})"
167
+ end
168
+ end
169
+
170
+ def visit_id(node)
171
+ node.value.first =~ /^#(.*)$/
172
+ "@id='#{Regexp.last_match(1)}'"
173
+ end
174
+
175
+ def visit_attribute_condition(node)
176
+ attribute = node.value.first.accept(self)
177
+ return attribute if node.value.length == 1
178
+
179
+ value = node.value.last
180
+ value = "'#{value}'" unless /^['"]/.match?(value)
181
+
182
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
183
+ if (value[0] == value[-1]) && %q{"'}.include?(value[0])
184
+ str_value = value[1..-2]
185
+ if str_value.include?(value[0])
186
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
187
+ end
188
+ end
189
+
190
+ case node.value[1]
191
+ when :equal
192
+ attribute + "=" + value.to_s
193
+ when :not_equal
194
+ attribute + "!=" + value.to_s
195
+ when :substring_match
196
+ "contains(#{attribute},#{value})"
197
+ when :prefix_match
198
+ "starts-with(#{attribute},#{value})"
199
+ when :dash_match
200
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
201
+ when :includes
202
+ value = value[1..-2] # strip quotes
203
+ css_class(attribute, value)
204
+ when :suffix_match
205
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
206
+ else
207
+ attribute + " #{node.value[1]} " + value.to_s
208
+ end
209
+ end
210
+
211
+ def visit_pseudo_class(node)
212
+ if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
213
+ node.value.first.accept(self)
214
+ else
215
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
216
+ return send(msg, node) if respond_to?(msg)
217
+
218
+ case node.value.first
219
+ when "first" then "position()=1"
220
+ when "first-child" then "count(preceding-sibling::*)=0"
221
+ when "last" then "position()=last()"
222
+ when "last-child" then "count(following-sibling::*)=0"
223
+ when "first-of-type" then "position()=1"
224
+ when "last-of-type" then "position()=last()"
225
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
226
+ when "only-of-type" then "last()=1"
227
+ when "empty" then "not(node())"
228
+ when "parent" then "node()"
229
+ when "root" then "not(parent::*)"
230
+ else
231
+ validate_xpath_function_name(node.value.first)
232
+ "nokogiri:#{node.value.first}(.)"
233
+ end
234
+ end
235
+ end
236
+
237
+ def visit_class_condition(node)
238
+ css_class("@class", node.value.first)
239
+ end
240
+
241
+ def visit_combinator(node)
242
+ if is_of_type_pseudo_class?(node.value.last)
243
+ "#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
244
+ else
245
+ "#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
246
+ end
247
+ end
248
+
249
+ {
250
+ "direct_adjacent_selector" => "/following-sibling::*[1]/self::",
251
+ "following_selector" => "/following-sibling::",
252
+ "descendant_selector" => "//",
253
+ "child_selector" => "/",
254
+ }.each do |k, v|
255
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
256
+ def visit_#{k} node
257
+ "\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
258
+ end
259
+ RUBY
260
+ end
261
+
262
+ def visit_conditional_selector(node)
263
+ node.value.first.accept(self) + "[" +
264
+ node.value.last.accept(self) + "]"
265
+ end
266
+
267
+ def visit_element_name(node)
268
+ if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
269
+ # HTML5 has namespaces that should be ignored in CSS queries
270
+ # https://github.com/sparklemotion/nokogiri/issues/2376
271
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
272
+ if WILDCARD_NAMESPACES
273
+ "*:#{node.value.first}"
274
+ else
275
+ "*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
276
+ end
277
+ else
278
+ "*[local-name()='#{node.value.first}']"
279
+ end
280
+ elsif node.value.length == 2 # has a namespace prefix
281
+ if node.value.first.nil? # namespace prefix is empty
282
+ node.value.last
283
+ else
284
+ node.value.join(":")
285
+ end
286
+ elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
287
+ "xmlns:#{node.value.first}"
288
+ else
289
+ node.value.first
290
+ end
291
+ end
292
+
293
+ def visit_attrib_name(node)
294
+ "@#{node.value.first}"
295
+ end
296
+
297
+ def accept(node)
298
+ node.accept(self)
299
+ end
300
+
301
+ private
302
+
303
+ def validate_xpath_function_name(name)
304
+ if name.start_with?("-")
305
+ raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
306
+ end
307
+ end
308
+
309
+ def html5_element_name_needs_namespace_handling(node)
310
+ # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
311
+ node.value.length == 1 &&
312
+ # if this is the wildcard selector "*", use it as normal
313
+ node.value.first != "*"
314
+ end
315
+
316
+ def nth(node, options = {})
317
+ unless node.value.size == 4
318
+ raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
319
+ end
320
+
321
+ a, b = read_a_and_positive_b(node.value)
322
+ position = if options[:child]
323
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
324
+ else
325
+ options[:last] ? "(last()-position()+1)" : "position()"
326
+ end
327
+
328
+ if b.zero?
329
+ "(#{position} mod #{a})=0"
330
+ else
331
+ compare = a < 0 ? "<=" : ">="
332
+ if a.abs == 1
333
+ "#{position}#{compare}#{b}"
334
+ else
335
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
336
+ end
337
+ end
338
+ end
339
+
340
+ def read_a_and_positive_b(values)
341
+ op = values[2].strip
342
+ if op == "+"
343
+ a = values[0].to_i
344
+ b = values[3].to_i
345
+ elsif op == "-"
346
+ a = values[0].to_i
347
+ b = a - (values[3].to_i % a)
348
+ else
349
+ raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
350
+ end
351
+ [a, b]
352
+ end
353
+
354
+ def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
355
+ if node.type == :PSEUDO_CLASS
356
+ if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
357
+ node.value[0].value[0]
358
+ else
359
+ node.value[0]
360
+ end =~ /(nth|first|last|only)-of-type(\()?/
361
+ end
362
+ end
363
+
364
+ def css_class(hay, needle)
365
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
366
+ # use the builtin implementation
367
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
368
+ else
369
+ # use only ordinary xpath functions
370
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
371
+ end
372
+ end
373
+ end
374
+ end
375
+ end
@@ -0,0 +1,132 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ # Translate a CSS selector into an XPath 1.0 query
6
+ module CSS
7
+ class << self
8
+ # TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
9
+ # It is not used by Nokogiri and shouldn't be part of the public API.
10
+ def parse(selector) # :nodoc:
11
+ warn("Nokogiri::CSS.parse is deprecated and will be removed in a future version of Nokogiri. Use Nokogiri::CSS::Parser#parse instead.", uplevel: 1, category: :deprecated)
12
+ Parser.new.parse(selector)
13
+ end
14
+
15
+ # :call-seq:
16
+ # xpath_for(selector_list) → Array<String>
17
+ # xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
18
+ #
19
+ # Translate a CSS selector list to the equivalent XPath expressions.
20
+ #
21
+ # 💡 Note that translated queries are cached by default for performance concerns.
22
+ #
23
+ # ⚠ Users should prefer Nokogiri::XML::Searchable#css, which is mixed into all document and
24
+ # node classes, for querying documents with CSS selectors. This method is the underlying
25
+ # mechanism used by XML::Searchable and is provided solely for advanced users to translate
26
+ # \CSS selectors to XPath directly.
27
+ #
28
+ # Also see Nokogiri::XML::Searchable#css for documentation on supported CSS selector features,
29
+ # some extended syntax that Nokogiri supports, and advanced CSS features like pseudo-class
30
+ # functions.
31
+ #
32
+ # [Parameters]
33
+ # - +selector_list+ (String)
34
+ #
35
+ # The CSS selector to be translated into XPath. This is always a String, but that string
36
+ # value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
37
+ # examples).
38
+ #
39
+ # [Keyword arguments]
40
+ # - +prefix:+ (String)
41
+ #
42
+ # The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
43
+ # for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
44
+ #
45
+ # - +ns:+ (Hash<String ⇒ String>, nil)
46
+ #
47
+ # Namespaces that are referenced in the query, if any. This is a hash where the keys are the
48
+ # namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
49
+ # empty set of namespaces.
50
+ #
51
+ # - +visitor:+ (Nokogiri::CSS::XPathVisitor)
52
+ #
53
+ # Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
54
+ # Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
55
+ # be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
56
+ #
57
+ # ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
58
+ # provided, +prefix+ and +ns+ must not be present.
59
+ #
60
+ # - +cache:+ (Boolean)
61
+ #
62
+ # Whether to use the SelectorCache for the translated query to ensure that repeated queries
63
+ # don't incur the overhead of re-parsing the selector. Default is +true+.
64
+ #
65
+ # [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
66
+ #
67
+ # *Example* with a simple selector:
68
+ #
69
+ # Nokogiri::CSS.xpath_for("div") # => ["//div"]
70
+ #
71
+ # *Example* with a compound selector:
72
+ #
73
+ # Nokogiri::CSS.xpath_for("div.xl") # => ["//div[contains(concat(' ',normalize-space(@class),' '),' xl ')]"]
74
+ #
75
+ # *Example* with a complex selector:
76
+ #
77
+ # Nokogiri::CSS.xpath_for("h1 + div") # => ["//h1/following-sibling::*[1]/self::div"]
78
+ #
79
+ # *Example* with a selector list:
80
+ #
81
+ # Nokogiri::CSS.xpath_for("h1, h2, h3") # => ["//h1", "//h2", "//h3"]
82
+ #
83
+ def xpath_for(
84
+ selector, options = nil,
85
+ prefix: options&.delete(:prefix),
86
+ visitor: options&.delete(:visitor),
87
+ ns: options&.delete(:ns),
88
+ cache: true
89
+ )
90
+ unless options.nil?
91
+ warn("Nokogiri::CSS.xpath_for: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
92
+ end
93
+
94
+ raise(TypeError, "no implicit conversion of #{selector.inspect} to String") unless selector.respond_to?(:to_str)
95
+
96
+ selector = selector.to_str
97
+ raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?
98
+
99
+ if visitor
100
+ raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
101
+ raise ArgumentError, "cannot provide both :ns and :visitor" if ns
102
+ end
103
+
104
+ visitor ||= begin
105
+ visitor_kw = {}
106
+ visitor_kw[:prefix] = prefix if prefix
107
+ visitor_kw[:namespaces] = ns if ns
108
+
109
+ Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
110
+ end
111
+
112
+ if cache
113
+ key = SelectorCache.key(selector: selector, visitor: visitor)
114
+ SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
115
+ else
116
+ Parser.new.xpath_for(selector, visitor)
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+
123
+ require_relative "css/selector_cache"
124
+ require_relative "css/node"
125
+ require_relative "css/xpath_visitor"
126
+ x = $-w
127
+ $-w = false
128
+ require_relative "css/parser"
129
+ $-w = x
130
+
131
+ require_relative "css/tokenizer"
132
+ require_relative "css/syntax_error"
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module Decorators
5
+ ###
6
+ # The Slop decorator implements method missing such that a methods may be
7
+ # used instead of XPath or CSS. See Nokogiri.Slop
8
+ module Slop
9
+ # The default XPath search context for Slop
10
+ XPATH_PREFIX = "./"
11
+
12
+ ###
13
+ # look for node with +name+. See Nokogiri.Slop
14
+ def method_missing(name, *args, &block)
15
+ if args.empty?
16
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
17
+ elsif args.first.is_a?(Hash)
18
+ hash = args.first
19
+ if hash[:css]
20
+ list = css("#{name}#{hash[:css]}")
21
+ elsif hash[:xpath]
22
+ conds = Array(hash[:xpath]).join(" and ")
23
+ list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
24
+ end
25
+ else
26
+ list = xpath(
27
+ *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
28
+ )
29
+ end
30
+
31
+ super if list.empty?
32
+ list.length == 1 ? list.first : list
33
+ end
34
+
35
+ def respond_to_missing?(name, include_private = false)
36
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
37
+
38
+ !list.empty?
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,57 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ class EncodingHandler
6
+ # Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
7
+ USEFUL_ALIASES = {
8
+ # alias_name => true_name
9
+ "ISO-2022-JP" => "ISO-2022-JP", # only for JRuby tests, this is a no-op in CRuby
10
+ "NOKOGIRI-SENTINEL" => "ISO-2022-JP", # indicating the Nokogiri has installed aliases
11
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
12
+ }
13
+
14
+ class << self
15
+ def install_default_aliases
16
+ USEFUL_ALIASES.each do |alias_name, name|
17
+ EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
18
+ end
19
+ end
20
+ end
21
+
22
+ # :stopdoc:
23
+ if Nokogiri.jruby?
24
+ class << self
25
+ def [](name)
26
+ storage.key?(name) ? new(storage[name]) : nil
27
+ end
28
+
29
+ def alias(name, alias_name)
30
+ storage[alias_name] = name
31
+ end
32
+
33
+ def delete(name)
34
+ storage.delete(name)
35
+ end
36
+
37
+ def clear_aliases!
38
+ storage.clear
39
+ end
40
+
41
+ private
42
+
43
+ def storage
44
+ @storage ||= {}
45
+ end
46
+ end
47
+
48
+ def initialize(name)
49
+ @name = name
50
+ end
51
+
52
+ attr_reader :name
53
+ end
54
+ end
55
+ end
56
+
57
+ Nokogiri::EncodingHandler.install_default_aliases
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load the C or Java extension
4
+ begin
5
+ # native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
6
+ RUBY_VERSION =~ /(\d+\.\d+)/
7
+ require_relative "#{Regexp.last_match(1)}/nokogiri"
8
+ rescue LoadError => e
9
+ if e.message.include?("GLIBC")
10
+ warn(<<~EOM)
11
+
12
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
13
+ with an unsupported version of glibc.
14
+
15
+ #{e.message}
16
+
17
+ If that's the case, then please install Nokogiri via the `ruby` platform gem:
18
+ gem install nokogiri --platform=ruby
19
+ or:
20
+ bundle config set force_ruby_platform true
21
+
22
+ Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
23
+
24
+ EOM
25
+ raise e
26
+ end
27
+
28
+ # use "require" instead of "require_relative" because non-native gems will place C extension files
29
+ # in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
30
+ # is in $LOAD_PATH but not necessarily relative to this file (see #2300)
31
+ require "nokogiri/nokogiri"
32
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module Gumbo
5
+ # The default maximum number of attributes per element.
6
+ DEFAULT_MAX_ATTRIBUTES = 400
7
+
8
+ # The default maximum number of errors for parsing a document or a fragment.
9
+ DEFAULT_MAX_ERRORS = 0
10
+
11
+ # The default maximum depth of the DOM tree produced by parsing a document
12
+ # or fragment.
13
+ DEFAULT_MAX_TREE_DEPTH = 400
14
+ end
15
+ end