nokogiri 1.18.0-aarch64-linux-gnu
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +39 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +486 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +274 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +27 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +321 -0
@@ -0,0 +1,375 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module CSS
|
6
|
+
# When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
|
7
|
+
# class allows for changing some of the behaviors related to builtin xpath functions and quirks
|
8
|
+
# of HTML5.
|
9
|
+
class XPathVisitor
|
10
|
+
WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
|
11
|
+
|
12
|
+
# Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
|
13
|
+
module BuiltinsConfig
|
14
|
+
# Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
|
15
|
+
# the default when calling Nokogiri::CSS.xpath_for directly.
|
16
|
+
NEVER = :never
|
17
|
+
|
18
|
+
# Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
|
19
|
+
ALWAYS = :always
|
20
|
+
|
21
|
+
# Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
|
22
|
+
# the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
|
23
|
+
# node.
|
24
|
+
OPTIMAL = :optimal
|
25
|
+
|
26
|
+
# :nodoc: array of values for validation
|
27
|
+
VALUES = [NEVER, ALWAYS, OPTIMAL]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
|
31
|
+
# being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
|
32
|
+
# node will choose the correct option automatically.
|
33
|
+
module DoctypeConfig
|
34
|
+
# The document being searched is an XML document. This is the default.
|
35
|
+
XML = :xml
|
36
|
+
|
37
|
+
# The document being searched is an HTML4 document.
|
38
|
+
HTML4 = :html4
|
39
|
+
|
40
|
+
# The document being searched is an HTML5 document.
|
41
|
+
HTML5 = :html5
|
42
|
+
|
43
|
+
# :nodoc: array of values for validation
|
44
|
+
VALUES = [XML, HTML4, HTML5]
|
45
|
+
end
|
46
|
+
|
47
|
+
# The visitor configuration set via the +builtins:+ keyword argument to XPathVisitor.new.
|
48
|
+
attr_reader :builtins
|
49
|
+
|
50
|
+
# The visitor configuration set via the +doctype:+ keyword argument to XPathVisitor.new.
|
51
|
+
attr_reader :doctype
|
52
|
+
|
53
|
+
# The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
|
54
|
+
attr_reader :prefix
|
55
|
+
|
56
|
+
# The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
|
57
|
+
attr_reader :namespaces
|
58
|
+
|
59
|
+
# :call-seq:
|
60
|
+
# new() → XPathVisitor
|
61
|
+
# new(builtins:, doctype:) → XPathVisitor
|
62
|
+
#
|
63
|
+
# [Parameters]
|
64
|
+
# - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
|
65
|
+
# - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
|
66
|
+
#
|
67
|
+
# [Returns] XPathVisitor
|
68
|
+
#
|
69
|
+
def initialize(
|
70
|
+
builtins: BuiltinsConfig::NEVER,
|
71
|
+
doctype: DoctypeConfig::XML,
|
72
|
+
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
|
73
|
+
namespaces: nil
|
74
|
+
)
|
75
|
+
unless BuiltinsConfig::VALUES.include?(builtins)
|
76
|
+
raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
|
77
|
+
end
|
78
|
+
unless DoctypeConfig::VALUES.include?(doctype)
|
79
|
+
raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
|
80
|
+
end
|
81
|
+
|
82
|
+
@builtins = builtins
|
83
|
+
@doctype = doctype
|
84
|
+
@prefix = prefix
|
85
|
+
@namespaces = namespaces
|
86
|
+
end
|
87
|
+
|
88
|
+
# :call-seq: config() → Hash
|
89
|
+
#
|
90
|
+
# [Returns]
|
91
|
+
# a Hash representing the configuration of the XPathVisitor, suitable for use as
|
92
|
+
# part of the CSS cache key.
|
93
|
+
def config
|
94
|
+
{ builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
|
95
|
+
end
|
96
|
+
|
97
|
+
# :stopdoc:
|
98
|
+
def visit_function(node)
|
99
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
|
100
|
+
return send(msg, node) if respond_to?(msg)
|
101
|
+
|
102
|
+
case node.value.first
|
103
|
+
when /^text\(/
|
104
|
+
"child::text()"
|
105
|
+
when /^self\(/
|
106
|
+
"self::#{node.value[1]}"
|
107
|
+
when /^eq\(/
|
108
|
+
"position()=#{node.value[1]}"
|
109
|
+
when /^(nth|nth-of-type)\(/
|
110
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
111
|
+
nth(node.value[1])
|
112
|
+
else
|
113
|
+
"position()=#{node.value[1]}"
|
114
|
+
end
|
115
|
+
when /^nth-child\(/
|
116
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
117
|
+
nth(node.value[1], child: true)
|
118
|
+
else
|
119
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
|
120
|
+
end
|
121
|
+
when /^nth-last-of-type\(/
|
122
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
123
|
+
nth(node.value[1], last: true)
|
124
|
+
else
|
125
|
+
index = node.value[1].to_i - 1
|
126
|
+
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
127
|
+
end
|
128
|
+
when /^nth-last-child\(/
|
129
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
130
|
+
nth(node.value[1], last: true, child: true)
|
131
|
+
else
|
132
|
+
"count(following-sibling::*)=#{node.value[1].to_i - 1}"
|
133
|
+
end
|
134
|
+
when /^(first|first-of-type)\(/
|
135
|
+
"position()=1"
|
136
|
+
when /^(last|last-of-type)\(/
|
137
|
+
"position()=last()"
|
138
|
+
when /^contains\(/
|
139
|
+
"contains(.,#{node.value[1]})"
|
140
|
+
when /^gt\(/
|
141
|
+
"position()>#{node.value[1]}"
|
142
|
+
when /^only-child\(/
|
143
|
+
"last()=1"
|
144
|
+
when /^comment\(/
|
145
|
+
"comment()"
|
146
|
+
when /^has\(/
|
147
|
+
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
148
|
+
".#{"//" unless is_direct}#{node.value[1].accept(self)}"
|
149
|
+
else
|
150
|
+
validate_xpath_function_name(node.value.first)
|
151
|
+
|
152
|
+
# xpath function call, let's marshal those arguments
|
153
|
+
args = ["."]
|
154
|
+
args += node.value[1..-1].map do |n|
|
155
|
+
n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
|
156
|
+
end
|
157
|
+
"nokogiri:#{node.value.first}#{args.join(",")})"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def visit_not(node)
|
162
|
+
child = node.value.first
|
163
|
+
if :ELEMENT_NAME == child.type
|
164
|
+
"not(self::#{child.accept(self)})"
|
165
|
+
else
|
166
|
+
"not(#{child.accept(self)})"
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def visit_id(node)
|
171
|
+
node.value.first =~ /^#(.*)$/
|
172
|
+
"@id='#{Regexp.last_match(1)}'"
|
173
|
+
end
|
174
|
+
|
175
|
+
def visit_attribute_condition(node)
|
176
|
+
attribute = node.value.first.accept(self)
|
177
|
+
return attribute if node.value.length == 1
|
178
|
+
|
179
|
+
value = node.value.last
|
180
|
+
value = "'#{value}'" unless /^['"]/.match?(value)
|
181
|
+
|
182
|
+
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
183
|
+
if (value[0] == value[-1]) && %q{"'}.include?(value[0])
|
184
|
+
str_value = value[1..-2]
|
185
|
+
if str_value.include?(value[0])
|
186
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
case node.value[1]
|
191
|
+
when :equal
|
192
|
+
attribute + "=" + value.to_s
|
193
|
+
when :not_equal
|
194
|
+
attribute + "!=" + value.to_s
|
195
|
+
when :substring_match
|
196
|
+
"contains(#{attribute},#{value})"
|
197
|
+
when :prefix_match
|
198
|
+
"starts-with(#{attribute},#{value})"
|
199
|
+
when :dash_match
|
200
|
+
"#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
|
201
|
+
when :includes
|
202
|
+
value = value[1..-2] # strip quotes
|
203
|
+
css_class(attribute, value)
|
204
|
+
when :suffix_match
|
205
|
+
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
206
|
+
else
|
207
|
+
attribute + " #{node.value[1]} " + value.to_s
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def visit_pseudo_class(node)
|
212
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
|
213
|
+
node.value.first.accept(self)
|
214
|
+
else
|
215
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
|
216
|
+
return send(msg, node) if respond_to?(msg)
|
217
|
+
|
218
|
+
case node.value.first
|
219
|
+
when "first" then "position()=1"
|
220
|
+
when "first-child" then "count(preceding-sibling::*)=0"
|
221
|
+
when "last" then "position()=last()"
|
222
|
+
when "last-child" then "count(following-sibling::*)=0"
|
223
|
+
when "first-of-type" then "position()=1"
|
224
|
+
when "last-of-type" then "position()=last()"
|
225
|
+
when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
|
226
|
+
when "only-of-type" then "last()=1"
|
227
|
+
when "empty" then "not(node())"
|
228
|
+
when "parent" then "node()"
|
229
|
+
when "root" then "not(parent::*)"
|
230
|
+
else
|
231
|
+
validate_xpath_function_name(node.value.first)
|
232
|
+
"nokogiri:#{node.value.first}(.)"
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def visit_class_condition(node)
|
238
|
+
css_class("@class", node.value.first)
|
239
|
+
end
|
240
|
+
|
241
|
+
def visit_combinator(node)
|
242
|
+
if is_of_type_pseudo_class?(node.value.last)
|
243
|
+
"#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
|
244
|
+
else
|
245
|
+
"#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
{
|
250
|
+
"direct_adjacent_selector" => "/following-sibling::*[1]/self::",
|
251
|
+
"following_selector" => "/following-sibling::",
|
252
|
+
"descendant_selector" => "//",
|
253
|
+
"child_selector" => "/",
|
254
|
+
}.each do |k, v|
|
255
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
256
|
+
def visit_#{k} node
|
257
|
+
"\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
|
258
|
+
end
|
259
|
+
RUBY
|
260
|
+
end
|
261
|
+
|
262
|
+
def visit_conditional_selector(node)
|
263
|
+
node.value.first.accept(self) + "[" +
|
264
|
+
node.value.last.accept(self) + "]"
|
265
|
+
end
|
266
|
+
|
267
|
+
def visit_element_name(node)
|
268
|
+
if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
|
269
|
+
# HTML5 has namespaces that should be ignored in CSS queries
|
270
|
+
# https://github.com/sparklemotion/nokogiri/issues/2376
|
271
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
272
|
+
if WILDCARD_NAMESPACES
|
273
|
+
"*:#{node.value.first}"
|
274
|
+
else
|
275
|
+
"*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
|
276
|
+
end
|
277
|
+
else
|
278
|
+
"*[local-name()='#{node.value.first}']"
|
279
|
+
end
|
280
|
+
elsif node.value.length == 2 # has a namespace prefix
|
281
|
+
if node.value.first.nil? # namespace prefix is empty
|
282
|
+
node.value.last
|
283
|
+
else
|
284
|
+
node.value.join(":")
|
285
|
+
end
|
286
|
+
elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
|
287
|
+
"xmlns:#{node.value.first}"
|
288
|
+
else
|
289
|
+
node.value.first
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def visit_attrib_name(node)
|
294
|
+
"@#{node.value.first}"
|
295
|
+
end
|
296
|
+
|
297
|
+
def accept(node)
|
298
|
+
node.accept(self)
|
299
|
+
end
|
300
|
+
|
301
|
+
private
|
302
|
+
|
303
|
+
def validate_xpath_function_name(name)
|
304
|
+
if name.start_with?("-")
|
305
|
+
raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def html5_element_name_needs_namespace_handling(node)
|
310
|
+
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
|
311
|
+
node.value.length == 1 &&
|
312
|
+
# if this is the wildcard selector "*", use it as normal
|
313
|
+
node.value.first != "*"
|
314
|
+
end
|
315
|
+
|
316
|
+
def nth(node, options = {})
|
317
|
+
unless node.value.size == 4
|
318
|
+
raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
|
319
|
+
end
|
320
|
+
|
321
|
+
a, b = read_a_and_positive_b(node.value)
|
322
|
+
position = if options[:child]
|
323
|
+
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
324
|
+
else
|
325
|
+
options[:last] ? "(last()-position()+1)" : "position()"
|
326
|
+
end
|
327
|
+
|
328
|
+
if b.zero?
|
329
|
+
"(#{position} mod #{a})=0"
|
330
|
+
else
|
331
|
+
compare = a < 0 ? "<=" : ">="
|
332
|
+
if a.abs == 1
|
333
|
+
"#{position}#{compare}#{b}"
|
334
|
+
else
|
335
|
+
"(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
def read_a_and_positive_b(values)
|
341
|
+
op = values[2].strip
|
342
|
+
if op == "+"
|
343
|
+
a = values[0].to_i
|
344
|
+
b = values[3].to_i
|
345
|
+
elsif op == "-"
|
346
|
+
a = values[0].to_i
|
347
|
+
b = a - (values[3].to_i % a)
|
348
|
+
else
|
349
|
+
raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
|
350
|
+
end
|
351
|
+
[a, b]
|
352
|
+
end
|
353
|
+
|
354
|
+
def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
|
355
|
+
if node.type == :PSEUDO_CLASS
|
356
|
+
if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
|
357
|
+
node.value[0].value[0]
|
358
|
+
else
|
359
|
+
node.value[0]
|
360
|
+
end =~ /(nth|first|last|only)-of-type(\()?/
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def css_class(hay, needle)
|
365
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
366
|
+
# use the builtin implementation
|
367
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
368
|
+
else
|
369
|
+
# use only ordinary xpath functions
|
370
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
end
|
data/lib/nokogiri/css.rb
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
# Translate a CSS selector into an XPath 1.0 query
|
6
|
+
module CSS
|
7
|
+
class << self
|
8
|
+
# TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
|
9
|
+
# It is not used by Nokogiri and shouldn't be part of the public API.
|
10
|
+
def parse(selector) # :nodoc:
|
11
|
+
warn("Nokogiri::CSS.parse is deprecated and will be removed in a future version of Nokogiri. Use Nokogiri::CSS::Parser#parse instead.", uplevel: 1, category: :deprecated)
|
12
|
+
Parser.new.parse(selector)
|
13
|
+
end
|
14
|
+
|
15
|
+
# :call-seq:
|
16
|
+
# xpath_for(selector_list) → Array<String>
|
17
|
+
# xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
|
18
|
+
#
|
19
|
+
# Translate a CSS selector list to the equivalent XPath expressions.
|
20
|
+
#
|
21
|
+
# 💡 Note that translated queries are cached by default for performance concerns.
|
22
|
+
#
|
23
|
+
# ⚠ Users should prefer Nokogiri::XML::Searchable#css, which is mixed into all document and
|
24
|
+
# node classes, for querying documents with CSS selectors. This method is the underlying
|
25
|
+
# mechanism used by XML::Searchable and is provided solely for advanced users to translate
|
26
|
+
# \CSS selectors to XPath directly.
|
27
|
+
#
|
28
|
+
# Also see Nokogiri::XML::Searchable#css for documentation on supported CSS selector features,
|
29
|
+
# some extended syntax that Nokogiri supports, and advanced CSS features like pseudo-class
|
30
|
+
# functions.
|
31
|
+
#
|
32
|
+
# [Parameters]
|
33
|
+
# - +selector_list+ (String)
|
34
|
+
#
|
35
|
+
# The CSS selector to be translated into XPath. This is always a String, but that string
|
36
|
+
# value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
|
37
|
+
# examples).
|
38
|
+
#
|
39
|
+
# [Keyword arguments]
|
40
|
+
# - +prefix:+ (String)
|
41
|
+
#
|
42
|
+
# The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
|
43
|
+
# for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
|
44
|
+
#
|
45
|
+
# - +ns:+ (Hash<String ⇒ String>, nil)
|
46
|
+
#
|
47
|
+
# Namespaces that are referenced in the query, if any. This is a hash where the keys are the
|
48
|
+
# namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
|
49
|
+
# empty set of namespaces.
|
50
|
+
#
|
51
|
+
# - +visitor:+ (Nokogiri::CSS::XPathVisitor)
|
52
|
+
#
|
53
|
+
# Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
|
54
|
+
# Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
|
55
|
+
# be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
|
56
|
+
#
|
57
|
+
# ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
|
58
|
+
# provided, +prefix+ and +ns+ must not be present.
|
59
|
+
#
|
60
|
+
# - +cache:+ (Boolean)
|
61
|
+
#
|
62
|
+
# Whether to use the SelectorCache for the translated query to ensure that repeated queries
|
63
|
+
# don't incur the overhead of re-parsing the selector. Default is +true+.
|
64
|
+
#
|
65
|
+
# [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
|
66
|
+
#
|
67
|
+
# *Example* with a simple selector:
|
68
|
+
#
|
69
|
+
# Nokogiri::CSS.xpath_for("div") # => ["//div"]
|
70
|
+
#
|
71
|
+
# *Example* with a compound selector:
|
72
|
+
#
|
73
|
+
# Nokogiri::CSS.xpath_for("div.xl") # => ["//div[contains(concat(' ',normalize-space(@class),' '),' xl ')]"]
|
74
|
+
#
|
75
|
+
# *Example* with a complex selector:
|
76
|
+
#
|
77
|
+
# Nokogiri::CSS.xpath_for("h1 + div") # => ["//h1/following-sibling::*[1]/self::div"]
|
78
|
+
#
|
79
|
+
# *Example* with a selector list:
|
80
|
+
#
|
81
|
+
# Nokogiri::CSS.xpath_for("h1, h2, h3") # => ["//h1", "//h2", "//h3"]
|
82
|
+
#
|
83
|
+
def xpath_for(
|
84
|
+
selector, options = nil,
|
85
|
+
prefix: options&.delete(:prefix),
|
86
|
+
visitor: options&.delete(:visitor),
|
87
|
+
ns: options&.delete(:ns),
|
88
|
+
cache: true
|
89
|
+
)
|
90
|
+
unless options.nil?
|
91
|
+
warn("Nokogiri::CSS.xpath_for: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
|
92
|
+
end
|
93
|
+
|
94
|
+
raise(TypeError, "no implicit conversion of #{selector.inspect} to String") unless selector.respond_to?(:to_str)
|
95
|
+
|
96
|
+
selector = selector.to_str
|
97
|
+
raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?
|
98
|
+
|
99
|
+
if visitor
|
100
|
+
raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
|
101
|
+
raise ArgumentError, "cannot provide both :ns and :visitor" if ns
|
102
|
+
end
|
103
|
+
|
104
|
+
visitor ||= begin
|
105
|
+
visitor_kw = {}
|
106
|
+
visitor_kw[:prefix] = prefix if prefix
|
107
|
+
visitor_kw[:namespaces] = ns if ns
|
108
|
+
|
109
|
+
Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
|
110
|
+
end
|
111
|
+
|
112
|
+
if cache
|
113
|
+
key = SelectorCache.key(selector: selector, visitor: visitor)
|
114
|
+
SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
|
115
|
+
else
|
116
|
+
Parser.new.xpath_for(selector, visitor)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
require_relative "css/selector_cache"
|
124
|
+
require_relative "css/node"
|
125
|
+
require_relative "css/xpath_visitor"
|
126
|
+
x = $-w
|
127
|
+
$-w = false
|
128
|
+
require_relative "css/parser"
|
129
|
+
$-w = x
|
130
|
+
|
131
|
+
require_relative "css/tokenizer"
|
132
|
+
require_relative "css/syntax_error"
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module Decorators
|
5
|
+
###
|
6
|
+
# The Slop decorator implements method missing such that a methods may be
|
7
|
+
# used instead of XPath or CSS. See Nokogiri.Slop
|
8
|
+
module Slop
|
9
|
+
# The default XPath search context for Slop
|
10
|
+
XPATH_PREFIX = "./"
|
11
|
+
|
12
|
+
###
|
13
|
+
# look for node with +name+. See Nokogiri.Slop
|
14
|
+
def method_missing(name, *args, &block)
|
15
|
+
if args.empty?
|
16
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
17
|
+
elsif args.first.is_a?(Hash)
|
18
|
+
hash = args.first
|
19
|
+
if hash[:css]
|
20
|
+
list = css("#{name}#{hash[:css]}")
|
21
|
+
elsif hash[:xpath]
|
22
|
+
conds = Array(hash[:xpath]).join(" and ")
|
23
|
+
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
24
|
+
end
|
25
|
+
else
|
26
|
+
list = xpath(
|
27
|
+
*CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
|
28
|
+
)
|
29
|
+
end
|
30
|
+
|
31
|
+
super if list.empty?
|
32
|
+
list.length == 1 ? list.first : list
|
33
|
+
end
|
34
|
+
|
35
|
+
def respond_to_missing?(name, include_private = false)
|
36
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
37
|
+
|
38
|
+
!list.empty?
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
class EncodingHandler
|
6
|
+
# Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
|
7
|
+
USEFUL_ALIASES = {
|
8
|
+
# alias_name => true_name
|
9
|
+
"ISO-2022-JP" => "ISO-2022-JP", # only for JRuby tests, this is a no-op in CRuby
|
10
|
+
"NOKOGIRI-SENTINEL" => "ISO-2022-JP", # indicating the Nokogiri has installed aliases
|
11
|
+
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
12
|
+
}
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def install_default_aliases
|
16
|
+
USEFUL_ALIASES.each do |alias_name, name|
|
17
|
+
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# :stopdoc:
|
23
|
+
if Nokogiri.jruby?
|
24
|
+
class << self
|
25
|
+
def [](name)
|
26
|
+
storage.key?(name) ? new(storage[name]) : nil
|
27
|
+
end
|
28
|
+
|
29
|
+
def alias(name, alias_name)
|
30
|
+
storage[alias_name] = name
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete(name)
|
34
|
+
storage.delete(name)
|
35
|
+
end
|
36
|
+
|
37
|
+
def clear_aliases!
|
38
|
+
storage.clear
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def storage
|
44
|
+
@storage ||= {}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize(name)
|
49
|
+
@name = name
|
50
|
+
end
|
51
|
+
|
52
|
+
attr_reader :name
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
Nokogiri::EncodingHandler.install_default_aliases
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# load the C or Java extension
|
4
|
+
begin
|
5
|
+
# native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
|
6
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
7
|
+
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
8
|
+
rescue LoadError => e
|
9
|
+
if e.message.include?("GLIBC")
|
10
|
+
warn(<<~EOM)
|
11
|
+
|
12
|
+
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
|
13
|
+
with an unsupported version of glibc.
|
14
|
+
|
15
|
+
#{e.message}
|
16
|
+
|
17
|
+
If that's the case, then please install Nokogiri via the `ruby` platform gem:
|
18
|
+
gem install nokogiri --platform=ruby
|
19
|
+
or:
|
20
|
+
bundle config set force_ruby_platform true
|
21
|
+
|
22
|
+
Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
|
23
|
+
|
24
|
+
EOM
|
25
|
+
raise e
|
26
|
+
end
|
27
|
+
|
28
|
+
# use "require" instead of "require_relative" because non-native gems will place C extension files
|
29
|
+
# in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
|
30
|
+
# is in $LOAD_PATH but not necessarily relative to this file (see #2300)
|
31
|
+
require "nokogiri/nokogiri"
|
32
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module Gumbo
|
5
|
+
# The default maximum number of attributes per element.
|
6
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
7
|
+
|
8
|
+
# The default maximum number of errors for parsing a document or a fragment.
|
9
|
+
DEFAULT_MAX_ERRORS = 0
|
10
|
+
|
11
|
+
# The default maximum depth of the DOM tree produced by parsing a document
|
12
|
+
# or fragment.
|
13
|
+
DEFAULT_MAX_TREE_DEPTH = 400
|
14
|
+
end
|
15
|
+
end
|