nokogiri 1.14.0.rc1-arm-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1082 -0
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +114 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  17. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  18. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  19. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  21. data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
  23. data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
  24. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  25. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  26. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  27. data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
  28. data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
  29. data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
  30. data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
  31. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  32. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  33. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  35. data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
  37. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  38. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
  39. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  41. data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
  42. data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
  43. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  44. data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
  45. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
  65. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  66. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  67. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  68. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  69. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  70. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  71. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  72. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  73. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  74. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  75. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  76. data/ext/nokogiri/include/libxslt/security.h +104 -0
  77. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  78. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  79. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  80. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  81. data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
  82. data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
  83. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  84. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  85. data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
  86. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  87. data/ext/nokogiri/nokogiri.c +259 -0
  88. data/ext/nokogiri/nokogiri.h +235 -0
  89. data/ext/nokogiri/test_global_handlers.c +40 -0
  90. data/ext/nokogiri/xml_attr.c +103 -0
  91. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  92. data/ext/nokogiri/xml_cdata.c +57 -0
  93. data/ext/nokogiri/xml_comment.c +62 -0
  94. data/ext/nokogiri/xml_document.c +689 -0
  95. data/ext/nokogiri/xml_document_fragment.c +44 -0
  96. data/ext/nokogiri/xml_dtd.c +208 -0
  97. data/ext/nokogiri/xml_element_content.c +128 -0
  98. data/ext/nokogiri/xml_element_decl.c +69 -0
  99. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  100. data/ext/nokogiri/xml_entity_decl.c +112 -0
  101. data/ext/nokogiri/xml_entity_reference.c +50 -0
  102. data/ext/nokogiri/xml_namespace.c +186 -0
  103. data/ext/nokogiri/xml_node.c +2425 -0
  104. data/ext/nokogiri/xml_node_set.c +496 -0
  105. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  106. data/ext/nokogiri/xml_reader.c +794 -0
  107. data/ext/nokogiri/xml_relax_ng.c +183 -0
  108. data/ext/nokogiri/xml_sax_parser.c +316 -0
  109. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  110. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  111. data/ext/nokogiri/xml_schema.c +282 -0
  112. data/ext/nokogiri/xml_syntax_error.c +85 -0
  113. data/ext/nokogiri/xml_text.c +48 -0
  114. data/ext/nokogiri/xml_xpath_context.c +413 -0
  115. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  116. data/gumbo-parser/CHANGES.md +63 -0
  117. data/gumbo-parser/Makefile +111 -0
  118. data/gumbo-parser/THANKS +27 -0
  119. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  120. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  123. data/lib/nokogiri/class_resolver.rb +67 -0
  124. data/lib/nokogiri/css/node.rb +54 -0
  125. data/lib/nokogiri/css/parser.rb +770 -0
  126. data/lib/nokogiri/css/parser.y +277 -0
  127. data/lib/nokogiri/css/parser_extras.rb +96 -0
  128. data/lib/nokogiri/css/syntax_error.rb +9 -0
  129. data/lib/nokogiri/css/tokenizer.rb +155 -0
  130. data/lib/nokogiri/css/tokenizer.rex +56 -0
  131. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  132. data/lib/nokogiri/css.rb +66 -0
  133. data/lib/nokogiri/decorators/slop.rb +44 -0
  134. data/lib/nokogiri/encoding_handler.rb +57 -0
  135. data/lib/nokogiri/extension.rb +32 -0
  136. data/lib/nokogiri/gumbo.rb +15 -0
  137. data/lib/nokogiri/html.rb +48 -0
  138. data/lib/nokogiri/html4/builder.rb +37 -0
  139. data/lib/nokogiri/html4/document.rb +214 -0
  140. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  141. data/lib/nokogiri/html4/element_description.rb +25 -0
  142. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  143. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  144. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  145. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  146. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  147. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  148. data/lib/nokogiri/html4.rb +47 -0
  149. data/lib/nokogiri/html5/document.rb +168 -0
  150. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  151. data/lib/nokogiri/html5/node.rb +98 -0
  152. data/lib/nokogiri/html5.rb +389 -0
  153. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  154. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  155. data/lib/nokogiri/syntax_error.rb +6 -0
  156. data/lib/nokogiri/version/constant.rb +6 -0
  157. data/lib/nokogiri/version/info.rb +223 -0
  158. data/lib/nokogiri/version.rb +4 -0
  159. data/lib/nokogiri/xml/attr.rb +66 -0
  160. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  161. data/lib/nokogiri/xml/builder.rb +487 -0
  162. data/lib/nokogiri/xml/cdata.rb +13 -0
  163. data/lib/nokogiri/xml/character_data.rb +9 -0
  164. data/lib/nokogiri/xml/document.rb +471 -0
  165. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  166. data/lib/nokogiri/xml/dtd.rb +34 -0
  167. data/lib/nokogiri/xml/element_content.rb +38 -0
  168. data/lib/nokogiri/xml/element_decl.rb +15 -0
  169. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  170. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  171. data/lib/nokogiri/xml/namespace.rb +58 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  173. data/lib/nokogiri/xml/node.rb +1563 -0
  174. data/lib/nokogiri/xml/node_set.rb +446 -0
  175. data/lib/nokogiri/xml/notation.rb +19 -0
  176. data/lib/nokogiri/xml/parse_options.rb +213 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  178. data/lib/nokogiri/xml/pp/node.rb +57 -0
  179. data/lib/nokogiri/xml/pp.rb +4 -0
  180. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  181. data/lib/nokogiri/xml/reader.rb +105 -0
  182. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  183. data/lib/nokogiri/xml/sax/document.rb +167 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  187. data/lib/nokogiri/xml/sax.rb +6 -0
  188. data/lib/nokogiri/xml/schema.rb +73 -0
  189. data/lib/nokogiri/xml/searchable.rb +270 -0
  190. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  191. data/lib/nokogiri/xml/text.rb +11 -0
  192. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  193. data/lib/nokogiri/xml/xpath.rb +21 -0
  194. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  195. data/lib/nokogiri/xml.rb +76 -0
  196. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  197. data/lib/nokogiri/xslt.rb +65 -0
  198. data/lib/nokogiri.rb +120 -0
  199. data/lib/xsd/xmlparser/nokogiri.rb +104 -0
  200. metadata +317 -0
@@ -0,0 +1,359 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ module CSS
6
+ # When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
7
+ # class allows for changing some of the behaviors related to builtin xpath functions and quirks
8
+ # of HTML5.
9
+ class XPathVisitor
10
+ WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
11
+
12
+ # Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
13
+ module BuiltinsConfig
14
+ # Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
15
+ # the default when calling Nokogiri::CSS.xpath_for directly.
16
+ NEVER = :never
17
+
18
+ # Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
19
+ ALWAYS = :always
20
+
21
+ # Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
22
+ # the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
23
+ # node.
24
+ OPTIMAL = :optimal
25
+
26
+ # :nodoc: array of values for validation
27
+ VALUES = [NEVER, ALWAYS, OPTIMAL]
28
+ end
29
+
30
+ # Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
31
+ # being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
32
+ # node will choose the correct option automatically.
33
+ module DoctypeConfig
34
+ # The document being searched is an XML document. This is the default.
35
+ XML = :xml
36
+
37
+ # The document being searched is an HTML4 document.
38
+ HTML4 = :html4
39
+
40
+ # The document being searched is an HTML5 document.
41
+ HTML5 = :html5
42
+
43
+ # :nodoc: array of values for validation
44
+ VALUES = [XML, HTML4, HTML5]
45
+ end
46
+
47
+ # :call-seq:
48
+ # new() → XPathVisitor
49
+ # new(builtins:, doctype:) → XPathVisitor
50
+ #
51
+ # [Parameters]
52
+ # - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
53
+ # - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
54
+ #
55
+ # [Returns] XPathVisitor
56
+ #
57
+ def initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML)
58
+ unless BuiltinsConfig::VALUES.include?(builtins)
59
+ raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
60
+ end
61
+ unless DoctypeConfig::VALUES.include?(doctype)
62
+ raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
63
+ end
64
+
65
+ @builtins = builtins
66
+ @doctype = doctype
67
+ end
68
+
69
+ # :call-seq: config() → Hash
70
+ #
71
+ # [Returns]
72
+ # a Hash representing the configuration of the XPathVisitor, suitable for use as
73
+ # part of the CSS cache key.
74
+ def config
75
+ { builtins: @builtins, doctype: @doctype }
76
+ end
77
+
78
+ # :stopdoc:
79
+ def visit_function(node)
80
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
81
+ return send(msg, node) if respond_to?(msg)
82
+
83
+ case node.value.first
84
+ when /^text\(/
85
+ "child::text()"
86
+ when /^self\(/
87
+ "self::#{node.value[1]}"
88
+ when /^eq\(/
89
+ "position()=#{node.value[1]}"
90
+ when /^(nth|nth-of-type)\(/
91
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
92
+ nth(node.value[1])
93
+ else
94
+ "position()=#{node.value[1]}"
95
+ end
96
+ when /^nth-child\(/
97
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
98
+ nth(node.value[1], child: true)
99
+ else
100
+ "count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
101
+ end
102
+ when /^nth-last-of-type\(/
103
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
104
+ nth(node.value[1], last: true)
105
+ else
106
+ index = node.value[1].to_i - 1
107
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
108
+ end
109
+ when /^nth-last-child\(/
110
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
111
+ nth(node.value[1], last: true, child: true)
112
+ else
113
+ "count(following-sibling::*)=#{node.value[1].to_i - 1}"
114
+ end
115
+ when /^(first|first-of-type)\(/
116
+ "position()=1"
117
+ when /^(last|last-of-type)\(/
118
+ "position()=last()"
119
+ when /^contains\(/
120
+ "contains(.,#{node.value[1]})"
121
+ when /^gt\(/
122
+ "position()>#{node.value[1]}"
123
+ when /^only-child\(/
124
+ "last()=1"
125
+ when /^comment\(/
126
+ "comment()"
127
+ when /^has\(/
128
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
129
+ ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
130
+ else
131
+ # xpath function call, let's marshal those arguments
132
+ args = ["."]
133
+ args += node.value[1..-1].map do |n|
134
+ n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
135
+ end
136
+ "#{node.value.first}#{args.join(",")})"
137
+ end
138
+ end
139
+
140
+ def visit_not(node)
141
+ child = node.value.first
142
+ if :ELEMENT_NAME == child.type
143
+ "not(self::#{child.accept(self)})"
144
+ else
145
+ "not(#{child.accept(self)})"
146
+ end
147
+ end
148
+
149
+ def visit_id(node)
150
+ node.value.first =~ /^#(.*)$/
151
+ "@id='#{Regexp.last_match(1)}'"
152
+ end
153
+
154
+ def visit_attribute_condition(node)
155
+ attribute = node.value.first.accept(self)
156
+ return attribute if node.value.length == 1
157
+
158
+ value = node.value.last
159
+ value = "'#{value}'" unless /^['"]/.match?(value)
160
+
161
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
162
+ if (value[0] == value[-1]) && %q{"'}.include?(value[0])
163
+ str_value = value[1..-2]
164
+ if str_value.include?(value[0])
165
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
166
+ end
167
+ end
168
+
169
+ case node.value[1]
170
+ when :equal
171
+ attribute + "=" + value.to_s
172
+ when :not_equal
173
+ attribute + "!=" + value.to_s
174
+ when :substring_match
175
+ "contains(#{attribute},#{value})"
176
+ when :prefix_match
177
+ "starts-with(#{attribute},#{value})"
178
+ when :dash_match
179
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
180
+ when :includes
181
+ value = value[1..-2] # strip quotes
182
+ css_class(attribute, value)
183
+ when :suffix_match
184
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
185
+ else
186
+ attribute + " #{node.value[1]} " + value.to_s
187
+ end
188
+ end
189
+
190
+ def visit_pseudo_class(node)
191
+ if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
192
+ node.value.first.accept(self)
193
+ else
194
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
195
+ return send(msg, node) if respond_to?(msg)
196
+
197
+ case node.value.first
198
+ when "first" then "position()=1"
199
+ when "first-child" then "count(preceding-sibling::*)=0"
200
+ when "last" then "position()=last()"
201
+ when "last-child" then "count(following-sibling::*)=0"
202
+ when "first-of-type" then "position()=1"
203
+ when "last-of-type" then "position()=last()"
204
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
205
+ when "only-of-type" then "last()=1"
206
+ when "empty" then "not(node())"
207
+ when "parent" then "node()"
208
+ when "root" then "not(parent::*)"
209
+ else
210
+ node.value.first + "(.)"
211
+ end
212
+ end
213
+ end
214
+
215
+ def visit_class_condition(node)
216
+ css_class("@class", node.value.first)
217
+ end
218
+
219
+ def visit_combinator(node)
220
+ if is_of_type_pseudo_class?(node.value.last)
221
+ "#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
222
+ else
223
+ "#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
224
+ end
225
+ end
226
+
227
+ {
228
+ "direct_adjacent_selector" => "/following-sibling::*[1]/self::",
229
+ "following_selector" => "/following-sibling::",
230
+ "descendant_selector" => "//",
231
+ "child_selector" => "/",
232
+ }.each do |k, v|
233
+ class_eval %{
234
+ def visit_#{k} node
235
+ "\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
236
+ end
237
+ }
238
+ end
239
+
240
+ def visit_conditional_selector(node)
241
+ node.value.first.accept(self) + "[" +
242
+ node.value.last.accept(self) + "]"
243
+ end
244
+
245
+ def visit_element_name(node)
246
+ if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
247
+ # HTML5 has namespaces that should be ignored in CSS queries
248
+ # https://github.com/sparklemotion/nokogiri/issues/2376
249
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
250
+ if WILDCARD_NAMESPACES
251
+ "*:#{node.value.first}"
252
+ else
253
+ "*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
254
+ end
255
+ else
256
+ "*[local-name()='#{node.value.first}']"
257
+ end
258
+ else
259
+ node.value.first
260
+ end
261
+ end
262
+
263
+ def visit_attrib_name(node)
264
+ "@#{node.value.first}"
265
+ end
266
+
267
+ def accept(node)
268
+ node.accept(self)
269
+ end
270
+
271
+ private
272
+
273
+ def html5_element_name_needs_namespace_handling(node)
274
+ # if this is the wildcard selector "*", use it as normal
275
+ node.value.first != "*" &&
276
+ # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
277
+ !node.value.first.include?(":")
278
+ end
279
+
280
+ def nth(node, options = {})
281
+ unless node.value.size == 4
282
+ raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
283
+ end
284
+
285
+ a, b = read_a_and_positive_b(node.value)
286
+ position = if options[:child]
287
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
288
+ else
289
+ options[:last] ? "(last()-position()+1)" : "position()"
290
+ end
291
+
292
+ if b.zero?
293
+ "(#{position} mod #{a})=0"
294
+ else
295
+ compare = a < 0 ? "<=" : ">="
296
+ if a.abs == 1
297
+ "#{position}#{compare}#{b}"
298
+ else
299
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
300
+ end
301
+ end
302
+ end
303
+
304
+ def read_a_and_positive_b(values)
305
+ op = values[2]
306
+ if op == "+"
307
+ a = values[0].to_i
308
+ b = values[3].to_i
309
+ elsif op == "-"
310
+ a = values[0].to_i
311
+ b = a - (values[3].to_i % a)
312
+ else
313
+ raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
314
+ end
315
+ [a, b]
316
+ end
317
+
318
+ def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
319
+ if node.type == :PSEUDO_CLASS
320
+ if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
321
+ node.value[0].value[0]
322
+ else
323
+ node.value[0]
324
+ end =~ /(nth|first|last|only)-of-type(\()?/
325
+ end
326
+ end
327
+
328
+ def css_class(hay, needle)
329
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
330
+ # use the builtin implementation
331
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
332
+ else
333
+ # use only ordinary xpath functions
334
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
335
+ end
336
+ end
337
+ end
338
+
339
+ module XPathVisitorAlwaysUseBuiltins # :nodoc:
340
+ def self.new
341
+ warn(
342
+ "Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
343
+ { uplevel: 1 },
344
+ )
345
+ XPathVisitor.new(builtins: :always)
346
+ end
347
+ end
348
+
349
+ module XPathVisitorOptimallyUseBuiltins # :nodoc:
350
+ def self.new
351
+ warn(
352
+ "Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
353
+ { uplevel: 1 },
354
+ )
355
+ XPathVisitor.new(builtins: :optimal)
356
+ end
357
+ end
358
+ end
359
+ end
@@ -0,0 +1,66 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ # Translate a CSS selector into an XPath 1.0 query
6
+ module CSS
7
+ class << self
8
+ # TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
9
+ # It is not used by Nokogiri and shouldn't be part of the public API.
10
+ def parse(selector) # :nodoc:
11
+ Parser.new.parse(selector)
12
+ end
13
+
14
+ # :call-seq:
15
+ # xpath_for(selector) → String
16
+ # xpath_for(selector [, prefix:] [, visitor:] [, ns:]) → String
17
+ #
18
+ # Translate a CSS selector to the equivalent XPath query.
19
+ #
20
+ # [Parameters]
21
+ # - +selector+ (String) The CSS selector to be translated into XPath
22
+ #
23
+ # - +prefix:+ (String)
24
+ #
25
+ # The XPath prefix for the query, see Nokogiri::XML::XPath for some options. Default is
26
+ # +XML::XPath::GLOBAL_SEARCH_PREFIX+.
27
+ #
28
+ # - +visitor:+ (Nokogiri::CSS::XPathVisitor)
29
+ #
30
+ # The visitor class to use to transform the AST into XPath. Default is
31
+ # +Nokogiri::CSS::XPathVisitor.new+.
32
+ #
33
+ # - +ns:+ (Hash<String ⇒ String>)
34
+ #
35
+ # The namespaces that are referenced in the query, if any. This is a hash where the keys are
36
+ # the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
37
+ #
38
+ # [Returns] (String) The equivalent XPath query for +selector+
39
+ #
40
+ # 💡 Note that translated queries are cached for performance concerns.
41
+ #
42
+ def xpath_for(selector, options = {})
43
+ raise TypeError, "no implicit conversion of #{selector.inspect} to String" unless selector.respond_to?(:to_str)
44
+
45
+ selector = selector.to_str
46
+ raise Nokogiri::CSS::SyntaxError, "empty CSS selector" if selector.empty?
47
+
48
+ prefix = options.fetch(:prefix, Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX)
49
+ visitor = options.fetch(:visitor) { Nokogiri::CSS::XPathVisitor.new }
50
+ ns = options.fetch(:ns, {})
51
+
52
+ Parser.new(ns).xpath_for(selector, prefix, visitor)
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ require_relative "css/node"
59
+ require_relative "css/xpath_visitor"
60
+ x = $-w
61
+ $-w = false
62
+ require_relative "css/parser"
63
+ $-w = x
64
+
65
+ require_relative "css/tokenizer"
66
+ require_relative "css/syntax_error"
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module Decorators
5
+ ###
6
+ # The Slop decorator implements method missing such that a methods may be
7
+ # used instead of XPath or CSS. See Nokogiri.Slop
8
+ module Slop
9
+ # The default XPath search context for Slop
10
+ XPATH_PREFIX = "./"
11
+
12
+ ###
13
+ # look for node with +name+. See Nokogiri.Slop
14
+ def method_missing(name, *args, &block)
15
+ if args.empty?
16
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
17
+ elsif args.first.is_a?(Hash)
18
+ hash = args.first
19
+ if hash[:css]
20
+ list = css("#{name}#{hash[:css]}")
21
+ elsif hash[:xpath]
22
+ conds = Array(hash[:xpath]).join(" and ")
23
+ list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
24
+ end
25
+ else
26
+ CSS::Parser.without_cache do
27
+ list = xpath(
28
+ *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX)
29
+ )
30
+ end
31
+ end
32
+
33
+ super if list.empty?
34
+ list.length == 1 ? list.first : list
35
+ end
36
+
37
+ def respond_to_missing?(name, include_private = false)
38
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
39
+
40
+ !list.empty?
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,57 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ class EncodingHandler
6
+ # Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
7
+ USEFUL_ALIASES = {
8
+ # alias_name => true_name
9
+ "NOKOGIRI-SENTINEL" => "UTF-8", # indicating the Nokogiri has installed aliases
10
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
11
+ "UTF-8" => "UTF-8", # for JRuby tests, this is a no-op in CRuby
12
+ }
13
+
14
+ class << self
15
+ def install_default_aliases
16
+ USEFUL_ALIASES.each do |alias_name, name|
17
+ EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
18
+ end
19
+ end
20
+ end
21
+
22
+ # :stopdoc:
23
+ if Nokogiri.jruby?
24
+ class << self
25
+ def [](name)
26
+ storage.key?(name) ? new(storage[name]) : nil
27
+ end
28
+
29
+ def alias(name, alias_name)
30
+ storage[alias_name] = name
31
+ end
32
+
33
+ def delete(name)
34
+ storage.delete(name)
35
+ end
36
+
37
+ def clear_aliases!
38
+ storage.clear
39
+ end
40
+
41
+ private
42
+
43
+ def storage
44
+ @storage ||= {}
45
+ end
46
+ end
47
+
48
+ def initialize(name)
49
+ @name = name
50
+ end
51
+
52
+ attr_reader :name
53
+ end
54
+ end
55
+ end
56
+
57
+ Nokogiri::EncodingHandler.install_default_aliases
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load the C or Java extension
4
+ begin
5
+ # native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
6
+ RUBY_VERSION =~ /(\d+\.\d+)/
7
+ require_relative "#{Regexp.last_match(1)}/nokogiri"
8
+ rescue LoadError => e
9
+ if /GLIBC/.match?(e.message)
10
+ warn(<<~EOM)
11
+
12
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
13
+ with an unsupported version of glibc.
14
+
15
+ #{e.message}
16
+
17
+ If that's the case, then please install Nokogiri via the `ruby` platform gem:
18
+ gem install nokogiri --platform=ruby
19
+ or:
20
+ bundle config set force_ruby_platform true
21
+
22
+ Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
23
+
24
+ EOM
25
+ raise e
26
+ end
27
+
28
+ # use "require" instead of "require_relative" because non-native gems will place C extension files
29
+ # in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
30
+ # is in $LOAD_PATH but not necessarily relative to this file (see #2300)
31
+ require "nokogiri/nokogiri"
32
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module Gumbo
5
+ # The default maximum number of attributes per element.
6
+ DEFAULT_MAX_ATTRIBUTES = 400
7
+
8
+ # The default maximum number of errors for parsing a document or a fragment.
9
+ DEFAULT_MAX_ERRORS = 0
10
+
11
+ # The default maximum depth of the DOM tree produced by parsing a document
12
+ # or fragment.
13
+ DEFAULT_MAX_TREE_DEPTH = 400
14
+ end
15
+ end
@@ -0,0 +1,48 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "html4"
5
+
6
+ module Nokogiri
7
+ # Alias for Nokogiri::HTML4
8
+ HTML = Nokogiri::HTML4
9
+
10
+ # :singleton-method: HTML
11
+ # :call-seq: HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) → Nokogiri::HTML4::Document
12
+ #
13
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
14
+
15
+ # :nodoc:
16
+ define_singleton_method(:HTML, Nokogiri.method(:HTML4))
17
+
18
+ # 💡 This module/namespace is an alias for Nokogiri::HTML4 as of v1.12.0. Before v1.12.0,
19
+ # Nokogiri::HTML4 did not exist, and this was the module/namespace for all HTML-related
20
+ # classes.
21
+ module HTML
22
+ # 💡 This class is an alias for Nokogiri::HTML4::Document as of v1.12.0.
23
+ class Document < Nokogiri::XML::Document
24
+ end
25
+
26
+ # 💡 This class is an alias for Nokogiri::HTML4::DocumentFragment as of v1.12.0.
27
+ class DocumentFragment < Nokogiri::XML::DocumentFragment
28
+ end
29
+
30
+ # 💡 This class is an alias for Nokogiri::HTML4::Builder as of v1.12.0.
31
+ class Builder < Nokogiri::XML::Builder
32
+ end
33
+
34
+ module SAX
35
+ # 💡 This class is an alias for Nokogiri::HTML4::SAX::Parser as of v1.12.0.
36
+ class Parser < Nokogiri::XML::SAX::Parser
37
+ end
38
+
39
+ # 💡 This class is an alias for Nokogiri::HTML4::SAX::ParserContext as of v1.12.0.
40
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
41
+ end
42
+
43
+ # 💡 This class is an alias for Nokogiri::HTML4::SAX::PushParser as of v1.12.0.
44
+ class PushParser
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module HTML4
5
+ ###
6
+ # Nokogiri HTML builder is used for building HTML documents. It is very
7
+ # similar to the Nokogiri::XML::Builder. In fact, you should go read the
8
+ # documentation for Nokogiri::XML::Builder before reading this
9
+ # documentation.
10
+ #
11
+ # == Synopsis:
12
+ #
13
+ # Create an HTML document with a body that has an onload attribute, and a
14
+ # span tag with a class of "bold" that has content of "Hello world".
15
+ #
16
+ # builder = Nokogiri::HTML4::Builder.new do |doc|
17
+ # doc.html {
18
+ # doc.body(:onload => 'some_func();') {
19
+ # doc.span.bold {
20
+ # doc.text "Hello world"
21
+ # }
22
+ # }
23
+ # }
24
+ # end
25
+ # puts builder.to_html
26
+ #
27
+ # The HTML builder inherits from the XML builder, so make sure to read the
28
+ # Nokogiri::XML::Builder documentation.
29
+ class Builder < Nokogiri::XML::Builder
30
+ ###
31
+ # Convert the builder to HTML
32
+ def to_html
33
+ @doc.to_html
34
+ end
35
+ end
36
+ end
37
+ end