nokogiri 1.18.0.rc1-x86_64-linux-gnu
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
@@ -0,0 +1,375 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module CSS
|
6
|
+
# When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
|
7
|
+
# class allows for changing some of the behaviors related to builtin xpath functions and quirks
|
8
|
+
# of HTML5.
|
9
|
+
class XPathVisitor
|
10
|
+
WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
|
11
|
+
|
12
|
+
# Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
|
13
|
+
module BuiltinsConfig
|
14
|
+
# Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
|
15
|
+
# the default when calling Nokogiri::CSS.xpath_for directly.
|
16
|
+
NEVER = :never
|
17
|
+
|
18
|
+
# Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
|
19
|
+
ALWAYS = :always
|
20
|
+
|
21
|
+
# Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
|
22
|
+
# the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
|
23
|
+
# node.
|
24
|
+
OPTIMAL = :optimal
|
25
|
+
|
26
|
+
# :nodoc: array of values for validation
|
27
|
+
VALUES = [NEVER, ALWAYS, OPTIMAL]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
|
31
|
+
# being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
|
32
|
+
# node will choose the correct option automatically.
|
33
|
+
module DoctypeConfig
|
34
|
+
# The document being searched is an XML document. This is the default.
|
35
|
+
XML = :xml
|
36
|
+
|
37
|
+
# The document being searched is an HTML4 document.
|
38
|
+
HTML4 = :html4
|
39
|
+
|
40
|
+
# The document being searched is an HTML5 document.
|
41
|
+
HTML5 = :html5
|
42
|
+
|
43
|
+
# :nodoc: array of values for validation
|
44
|
+
VALUES = [XML, HTML4, HTML5]
|
45
|
+
end
|
46
|
+
|
47
|
+
# The visitor configuration set via the +builtins:+ keyword argument to XPathVisitor.new.
|
48
|
+
attr_reader :builtins
|
49
|
+
|
50
|
+
# The visitor configuration set via the +doctype:+ keyword argument to XPathVisitor.new.
|
51
|
+
attr_reader :doctype
|
52
|
+
|
53
|
+
# The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
|
54
|
+
attr_reader :prefix
|
55
|
+
|
56
|
+
# The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
|
57
|
+
attr_reader :namespaces
|
58
|
+
|
59
|
+
# :call-seq:
|
60
|
+
# new() → XPathVisitor
|
61
|
+
# new(builtins:, doctype:) → XPathVisitor
|
62
|
+
#
|
63
|
+
# [Parameters]
|
64
|
+
# - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
|
65
|
+
# - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
|
66
|
+
#
|
67
|
+
# [Returns] XPathVisitor
|
68
|
+
#
|
69
|
+
def initialize(
|
70
|
+
builtins: BuiltinsConfig::NEVER,
|
71
|
+
doctype: DoctypeConfig::XML,
|
72
|
+
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
|
73
|
+
namespaces: nil
|
74
|
+
)
|
75
|
+
unless BuiltinsConfig::VALUES.include?(builtins)
|
76
|
+
raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
|
77
|
+
end
|
78
|
+
unless DoctypeConfig::VALUES.include?(doctype)
|
79
|
+
raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
|
80
|
+
end
|
81
|
+
|
82
|
+
@builtins = builtins
|
83
|
+
@doctype = doctype
|
84
|
+
@prefix = prefix
|
85
|
+
@namespaces = namespaces
|
86
|
+
end
|
87
|
+
|
88
|
+
# :call-seq: config() → Hash
|
89
|
+
#
|
90
|
+
# [Returns]
|
91
|
+
# a Hash representing the configuration of the XPathVisitor, suitable for use as
|
92
|
+
# part of the CSS cache key.
|
93
|
+
def config
|
94
|
+
{ builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
|
95
|
+
end
|
96
|
+
|
97
|
+
# :stopdoc:
|
98
|
+
def visit_function(node)
|
99
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
|
100
|
+
return send(msg, node) if respond_to?(msg)
|
101
|
+
|
102
|
+
case node.value.first
|
103
|
+
when /^text\(/
|
104
|
+
"child::text()"
|
105
|
+
when /^self\(/
|
106
|
+
"self::#{node.value[1]}"
|
107
|
+
when /^eq\(/
|
108
|
+
"position()=#{node.value[1]}"
|
109
|
+
when /^(nth|nth-of-type)\(/
|
110
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
111
|
+
nth(node.value[1])
|
112
|
+
else
|
113
|
+
"position()=#{node.value[1]}"
|
114
|
+
end
|
115
|
+
when /^nth-child\(/
|
116
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
117
|
+
nth(node.value[1], child: true)
|
118
|
+
else
|
119
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
|
120
|
+
end
|
121
|
+
when /^nth-last-of-type\(/
|
122
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
123
|
+
nth(node.value[1], last: true)
|
124
|
+
else
|
125
|
+
index = node.value[1].to_i - 1
|
126
|
+
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
127
|
+
end
|
128
|
+
when /^nth-last-child\(/
|
129
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
130
|
+
nth(node.value[1], last: true, child: true)
|
131
|
+
else
|
132
|
+
"count(following-sibling::*)=#{node.value[1].to_i - 1}"
|
133
|
+
end
|
134
|
+
when /^(first|first-of-type)\(/
|
135
|
+
"position()=1"
|
136
|
+
when /^(last|last-of-type)\(/
|
137
|
+
"position()=last()"
|
138
|
+
when /^contains\(/
|
139
|
+
"contains(.,#{node.value[1]})"
|
140
|
+
when /^gt\(/
|
141
|
+
"position()>#{node.value[1]}"
|
142
|
+
when /^only-child\(/
|
143
|
+
"last()=1"
|
144
|
+
when /^comment\(/
|
145
|
+
"comment()"
|
146
|
+
when /^has\(/
|
147
|
+
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
148
|
+
".#{"//" unless is_direct}#{node.value[1].accept(self)}"
|
149
|
+
else
|
150
|
+
validate_xpath_function_name(node.value.first)
|
151
|
+
|
152
|
+
# xpath function call, let's marshal those arguments
|
153
|
+
args = ["."]
|
154
|
+
args += node.value[1..-1].map do |n|
|
155
|
+
n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
|
156
|
+
end
|
157
|
+
"nokogiri:#{node.value.first}#{args.join(",")})"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def visit_not(node)
|
162
|
+
child = node.value.first
|
163
|
+
if :ELEMENT_NAME == child.type
|
164
|
+
"not(self::#{child.accept(self)})"
|
165
|
+
else
|
166
|
+
"not(#{child.accept(self)})"
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def visit_id(node)
|
171
|
+
node.value.first =~ /^#(.*)$/
|
172
|
+
"@id='#{Regexp.last_match(1)}'"
|
173
|
+
end
|
174
|
+
|
175
|
+
def visit_attribute_condition(node)
|
176
|
+
attribute = node.value.first.accept(self)
|
177
|
+
return attribute if node.value.length == 1
|
178
|
+
|
179
|
+
value = node.value.last
|
180
|
+
value = "'#{value}'" unless /^['"]/.match?(value)
|
181
|
+
|
182
|
+
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
183
|
+
if (value[0] == value[-1]) && %q{"'}.include?(value[0])
|
184
|
+
str_value = value[1..-2]
|
185
|
+
if str_value.include?(value[0])
|
186
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
case node.value[1]
|
191
|
+
when :equal
|
192
|
+
attribute + "=" + value.to_s
|
193
|
+
when :not_equal
|
194
|
+
attribute + "!=" + value.to_s
|
195
|
+
when :substring_match
|
196
|
+
"contains(#{attribute},#{value})"
|
197
|
+
when :prefix_match
|
198
|
+
"starts-with(#{attribute},#{value})"
|
199
|
+
when :dash_match
|
200
|
+
"#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
|
201
|
+
when :includes
|
202
|
+
value = value[1..-2] # strip quotes
|
203
|
+
css_class(attribute, value)
|
204
|
+
when :suffix_match
|
205
|
+
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
206
|
+
else
|
207
|
+
attribute + " #{node.value[1]} " + value.to_s
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def visit_pseudo_class(node)
|
212
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
|
213
|
+
node.value.first.accept(self)
|
214
|
+
else
|
215
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
|
216
|
+
return send(msg, node) if respond_to?(msg)
|
217
|
+
|
218
|
+
case node.value.first
|
219
|
+
when "first" then "position()=1"
|
220
|
+
when "first-child" then "count(preceding-sibling::*)=0"
|
221
|
+
when "last" then "position()=last()"
|
222
|
+
when "last-child" then "count(following-sibling::*)=0"
|
223
|
+
when "first-of-type" then "position()=1"
|
224
|
+
when "last-of-type" then "position()=last()"
|
225
|
+
when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
|
226
|
+
when "only-of-type" then "last()=1"
|
227
|
+
when "empty" then "not(node())"
|
228
|
+
when "parent" then "node()"
|
229
|
+
when "root" then "not(parent::*)"
|
230
|
+
else
|
231
|
+
validate_xpath_function_name(node.value.first)
|
232
|
+
"nokogiri:#{node.value.first}(.)"
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def visit_class_condition(node)
|
238
|
+
css_class("@class", node.value.first)
|
239
|
+
end
|
240
|
+
|
241
|
+
def visit_combinator(node)
|
242
|
+
if is_of_type_pseudo_class?(node.value.last)
|
243
|
+
"#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
|
244
|
+
else
|
245
|
+
"#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
{
|
250
|
+
"direct_adjacent_selector" => "/following-sibling::*[1]/self::",
|
251
|
+
"following_selector" => "/following-sibling::",
|
252
|
+
"descendant_selector" => "//",
|
253
|
+
"child_selector" => "/",
|
254
|
+
}.each do |k, v|
|
255
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
256
|
+
def visit_#{k} node
|
257
|
+
"\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
|
258
|
+
end
|
259
|
+
RUBY
|
260
|
+
end
|
261
|
+
|
262
|
+
def visit_conditional_selector(node)
|
263
|
+
node.value.first.accept(self) + "[" +
|
264
|
+
node.value.last.accept(self) + "]"
|
265
|
+
end
|
266
|
+
|
267
|
+
def visit_element_name(node)
|
268
|
+
if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
|
269
|
+
# HTML5 has namespaces that should be ignored in CSS queries
|
270
|
+
# https://github.com/sparklemotion/nokogiri/issues/2376
|
271
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
272
|
+
if WILDCARD_NAMESPACES
|
273
|
+
"*:#{node.value.first}"
|
274
|
+
else
|
275
|
+
"*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
|
276
|
+
end
|
277
|
+
else
|
278
|
+
"*[local-name()='#{node.value.first}']"
|
279
|
+
end
|
280
|
+
elsif node.value.length == 2 # has a namespace prefix
|
281
|
+
if node.value.first.nil? # namespace prefix is empty
|
282
|
+
node.value.last
|
283
|
+
else
|
284
|
+
node.value.join(":")
|
285
|
+
end
|
286
|
+
elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
|
287
|
+
"xmlns:#{node.value.first}"
|
288
|
+
else
|
289
|
+
node.value.first
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def visit_attrib_name(node)
|
294
|
+
"@#{node.value.first}"
|
295
|
+
end
|
296
|
+
|
297
|
+
def accept(node)
|
298
|
+
node.accept(self)
|
299
|
+
end
|
300
|
+
|
301
|
+
private
|
302
|
+
|
303
|
+
def validate_xpath_function_name(name)
|
304
|
+
if name.start_with?("-")
|
305
|
+
raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def html5_element_name_needs_namespace_handling(node)
|
310
|
+
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
|
311
|
+
node.value.length == 1 &&
|
312
|
+
# if this is the wildcard selector "*", use it as normal
|
313
|
+
node.value.first != "*"
|
314
|
+
end
|
315
|
+
|
316
|
+
def nth(node, options = {})
|
317
|
+
unless node.value.size == 4
|
318
|
+
raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
|
319
|
+
end
|
320
|
+
|
321
|
+
a, b = read_a_and_positive_b(node.value)
|
322
|
+
position = if options[:child]
|
323
|
+
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
324
|
+
else
|
325
|
+
options[:last] ? "(last()-position()+1)" : "position()"
|
326
|
+
end
|
327
|
+
|
328
|
+
if b.zero?
|
329
|
+
"(#{position} mod #{a})=0"
|
330
|
+
else
|
331
|
+
compare = a < 0 ? "<=" : ">="
|
332
|
+
if a.abs == 1
|
333
|
+
"#{position}#{compare}#{b}"
|
334
|
+
else
|
335
|
+
"(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
def read_a_and_positive_b(values)
|
341
|
+
op = values[2].strip
|
342
|
+
if op == "+"
|
343
|
+
a = values[0].to_i
|
344
|
+
b = values[3].to_i
|
345
|
+
elsif op == "-"
|
346
|
+
a = values[0].to_i
|
347
|
+
b = a - (values[3].to_i % a)
|
348
|
+
else
|
349
|
+
raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
|
350
|
+
end
|
351
|
+
[a, b]
|
352
|
+
end
|
353
|
+
|
354
|
+
def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
|
355
|
+
if node.type == :PSEUDO_CLASS
|
356
|
+
if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
|
357
|
+
node.value[0].value[0]
|
358
|
+
else
|
359
|
+
node.value[0]
|
360
|
+
end =~ /(nth|first|last|only)-of-type(\()?/
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def css_class(hay, needle)
|
365
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
366
|
+
# use the builtin implementation
|
367
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
368
|
+
else
|
369
|
+
# use only ordinary xpath functions
|
370
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
end
|
data/lib/nokogiri/css.rb
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
# Translate a CSS selector into an XPath 1.0 query
|
6
|
+
module CSS
|
7
|
+
class << self
|
8
|
+
# TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
|
9
|
+
# It is not used by Nokogiri and shouldn't be part of the public API.
|
10
|
+
def parse(selector) # :nodoc:
|
11
|
+
warn("Nokogiri::CSS.parse is deprecated and will be removed in a future version of Nokogiri. Use Nokogiri::CSS::Parser#parse instead.", uplevel: 1, category: :deprecated)
|
12
|
+
Parser.new.parse(selector)
|
13
|
+
end
|
14
|
+
|
15
|
+
# :call-seq:
|
16
|
+
# xpath_for(selector_list) → Array<String>
|
17
|
+
# xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
|
18
|
+
#
|
19
|
+
# Translate a CSS selector list to the equivalent XPath expressions.
|
20
|
+
#
|
21
|
+
# 💡 Note that translated queries are cached by default for performance concerns.
|
22
|
+
#
|
23
|
+
# ⚠ Users should prefer Nokogiri::XML::Searchable#css, which is mixed into all document and
|
24
|
+
# node classes, for querying documents with CSS selectors. This method is the underlying
|
25
|
+
# mechanism used by XML::Searchable and is provided solely for advanced users to translate
|
26
|
+
# \CSS selectors to XPath directly.
|
27
|
+
#
|
28
|
+
# Also see Nokogiri::XML::Searchable#css for documentation on supported CSS selector features,
|
29
|
+
# some extended syntax that Nokogiri supports, and advanced CSS features like pseudo-class
|
30
|
+
# functions.
|
31
|
+
#
|
32
|
+
# [Parameters]
|
33
|
+
# - +selector_list+ (String)
|
34
|
+
#
|
35
|
+
# The CSS selector to be translated into XPath. This is always a String, but that string
|
36
|
+
# value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
|
37
|
+
# examples).
|
38
|
+
#
|
39
|
+
# [Keyword arguments]
|
40
|
+
# - +prefix:+ (String)
|
41
|
+
#
|
42
|
+
# The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
|
43
|
+
# for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
|
44
|
+
#
|
45
|
+
# - +ns:+ (Hash<String ⇒ String>, nil)
|
46
|
+
#
|
47
|
+
# Namespaces that are referenced in the query, if any. This is a hash where the keys are the
|
48
|
+
# namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
|
49
|
+
# empty set of namespaces.
|
50
|
+
#
|
51
|
+
# - +visitor:+ (Nokogiri::CSS::XPathVisitor)
|
52
|
+
#
|
53
|
+
# Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
|
54
|
+
# Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
|
55
|
+
# be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
|
56
|
+
#
|
57
|
+
# ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
|
58
|
+
# provided, +prefix+ and +ns+ must not be present.
|
59
|
+
#
|
60
|
+
# - +cache:+ (Boolean)
|
61
|
+
#
|
62
|
+
# Whether to use the SelectorCache for the translated query to ensure that repeated queries
|
63
|
+
# don't incur the overhead of re-parsing the selector. Default is +true+.
|
64
|
+
#
|
65
|
+
# [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
|
66
|
+
#
|
67
|
+
# *Example* with a simple selector:
|
68
|
+
#
|
69
|
+
# Nokogiri::CSS.xpath_for("div") # => ["//div"]
|
70
|
+
#
|
71
|
+
# *Example* with a compound selector:
|
72
|
+
#
|
73
|
+
# Nokogiri::CSS.xpath_for("div.xl") # => ["//div[contains(concat(' ',normalize-space(@class),' '),' xl ')]"]
|
74
|
+
#
|
75
|
+
# *Example* with a complex selector:
|
76
|
+
#
|
77
|
+
# Nokogiri::CSS.xpath_for("h1 + div") # => ["//h1/following-sibling::*[1]/self::div"]
|
78
|
+
#
|
79
|
+
# *Example* with a selector list:
|
80
|
+
#
|
81
|
+
# Nokogiri::CSS.xpath_for("h1, h2, h3") # => ["//h1", "//h2", "//h3"]
|
82
|
+
#
|
83
|
+
def xpath_for(
|
84
|
+
selector, options = nil,
|
85
|
+
prefix: options&.delete(:prefix),
|
86
|
+
visitor: options&.delete(:visitor),
|
87
|
+
ns: options&.delete(:ns),
|
88
|
+
cache: true
|
89
|
+
)
|
90
|
+
unless options.nil?
|
91
|
+
warn("Nokogiri::CSS.xpath_for: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
|
92
|
+
end
|
93
|
+
|
94
|
+
raise(TypeError, "no implicit conversion of #{selector.inspect} to String") unless selector.respond_to?(:to_str)
|
95
|
+
|
96
|
+
selector = selector.to_str
|
97
|
+
raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?
|
98
|
+
|
99
|
+
if visitor
|
100
|
+
raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
|
101
|
+
raise ArgumentError, "cannot provide both :ns and :visitor" if ns
|
102
|
+
end
|
103
|
+
|
104
|
+
visitor ||= begin
|
105
|
+
visitor_kw = {}
|
106
|
+
visitor_kw[:prefix] = prefix if prefix
|
107
|
+
visitor_kw[:namespaces] = ns if ns
|
108
|
+
|
109
|
+
Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
|
110
|
+
end
|
111
|
+
|
112
|
+
if cache
|
113
|
+
key = SelectorCache.key(selector: selector, visitor: visitor)
|
114
|
+
SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
|
115
|
+
else
|
116
|
+
Parser.new.xpath_for(selector, visitor)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
require_relative "css/selector_cache"
|
124
|
+
require_relative "css/node"
|
125
|
+
require_relative "css/xpath_visitor"
|
126
|
+
x = $-w
|
127
|
+
$-w = false
|
128
|
+
require_relative "css/parser"
|
129
|
+
$-w = x
|
130
|
+
|
131
|
+
require_relative "css/tokenizer"
|
132
|
+
require_relative "css/syntax_error"
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module Decorators
|
5
|
+
###
|
6
|
+
# The Slop decorator implements method missing such that a methods may be
|
7
|
+
# used instead of XPath or CSS. See Nokogiri.Slop
|
8
|
+
module Slop
|
9
|
+
# The default XPath search context for Slop
|
10
|
+
XPATH_PREFIX = "./"
|
11
|
+
|
12
|
+
###
|
13
|
+
# look for node with +name+. See Nokogiri.Slop
|
14
|
+
def method_missing(name, *args, &block)
|
15
|
+
if args.empty?
|
16
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
17
|
+
elsif args.first.is_a?(Hash)
|
18
|
+
hash = args.first
|
19
|
+
if hash[:css]
|
20
|
+
list = css("#{name}#{hash[:css]}")
|
21
|
+
elsif hash[:xpath]
|
22
|
+
conds = Array(hash[:xpath]).join(" and ")
|
23
|
+
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
24
|
+
end
|
25
|
+
else
|
26
|
+
list = xpath(
|
27
|
+
*CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
|
28
|
+
)
|
29
|
+
end
|
30
|
+
|
31
|
+
super if list.empty?
|
32
|
+
list.length == 1 ? list.first : list
|
33
|
+
end
|
34
|
+
|
35
|
+
def respond_to_missing?(name, include_private = false)
|
36
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
37
|
+
|
38
|
+
!list.empty?
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
class EncodingHandler
|
6
|
+
# Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
|
7
|
+
USEFUL_ALIASES = {
|
8
|
+
# alias_name => true_name
|
9
|
+
"ISO-2022-JP" => "ISO-2022-JP", # only for JRuby tests, this is a no-op in CRuby
|
10
|
+
"NOKOGIRI-SENTINEL" => "ISO-2022-JP", # indicating the Nokogiri has installed aliases
|
11
|
+
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
12
|
+
}
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def install_default_aliases
|
16
|
+
USEFUL_ALIASES.each do |alias_name, name|
|
17
|
+
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# :stopdoc:
|
23
|
+
if Nokogiri.jruby?
|
24
|
+
class << self
|
25
|
+
def [](name)
|
26
|
+
storage.key?(name) ? new(storage[name]) : nil
|
27
|
+
end
|
28
|
+
|
29
|
+
def alias(name, alias_name)
|
30
|
+
storage[alias_name] = name
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete(name)
|
34
|
+
storage.delete(name)
|
35
|
+
end
|
36
|
+
|
37
|
+
def clear_aliases!
|
38
|
+
storage.clear
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def storage
|
44
|
+
@storage ||= {}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize(name)
|
49
|
+
@name = name
|
50
|
+
end
|
51
|
+
|
52
|
+
attr_reader :name
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
Nokogiri::EncodingHandler.install_default_aliases
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# load the C or Java extension
|
4
|
+
begin
|
5
|
+
# native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
|
6
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
7
|
+
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
8
|
+
rescue LoadError => e
|
9
|
+
if e.message.include?("GLIBC")
|
10
|
+
warn(<<~EOM)
|
11
|
+
|
12
|
+
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
|
13
|
+
with an unsupported version of glibc.
|
14
|
+
|
15
|
+
#{e.message}
|
16
|
+
|
17
|
+
If that's the case, then please install Nokogiri via the `ruby` platform gem:
|
18
|
+
gem install nokogiri --platform=ruby
|
19
|
+
or:
|
20
|
+
bundle config set force_ruby_platform true
|
21
|
+
|
22
|
+
Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
|
23
|
+
|
24
|
+
EOM
|
25
|
+
raise e
|
26
|
+
end
|
27
|
+
|
28
|
+
# use "require" instead of "require_relative" because non-native gems will place C extension files
|
29
|
+
# in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
|
30
|
+
# is in $LOAD_PATH but not necessarily relative to this file (see #2300)
|
31
|
+
require "nokogiri/nokogiri"
|
32
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module Gumbo
|
5
|
+
# The default maximum number of attributes per element.
|
6
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
7
|
+
|
8
|
+
# The default maximum number of errors for parsing a document or a fragment.
|
9
|
+
DEFAULT_MAX_ERRORS = 0
|
10
|
+
|
11
|
+
# The default maximum depth of the DOM tree produced by parsing a document
|
12
|
+
# or fragment.
|
13
|
+
DEFAULT_MAX_TREE_DEPTH = 400
|
14
|
+
end
|
15
|
+
end
|