nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#--
|
2
3
|
# DO NOT MODIFY!!!!
|
3
4
|
# This file is automatically generated by rex 1.0.7
|
@@ -6,7 +7,8 @@
|
|
6
7
|
|
7
8
|
module Nokogiri
|
8
9
|
module CSS
|
9
|
-
|
10
|
+
# :nodoc: all
|
11
|
+
class Tokenizer
|
10
12
|
require 'strscan'
|
11
13
|
|
12
14
|
class ScanError < StandardError ; end
|
@@ -61,13 +63,13 @@ class Tokenizer # :nodoc:
|
|
61
63
|
when (text = @ss.scan(/has\([\s]*/))
|
62
64
|
action { [:HAS, text] }
|
63
65
|
|
64
|
-
when (text = @ss.scan(
|
66
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/))
|
65
67
|
action { [:FUNCTION, text] }
|
66
68
|
|
67
|
-
when (text = @ss.scan(
|
69
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/))
|
68
70
|
action { [:IDENT, text] }
|
69
71
|
|
70
|
-
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]
|
72
|
+
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/))
|
71
73
|
action { [:HASH, text] }
|
72
74
|
|
73
75
|
when (text = @ss.scan(/[\s]*~=[\s]*/))
|
@@ -130,7 +132,7 @@ class Tokenizer # :nodoc:
|
|
130
132
|
when (text = @ss.scan(/[\s]+/))
|
131
133
|
action { [:S, text] }
|
132
134
|
|
133
|
-
when (text = @ss.scan(/"([^\n\r\f"]
|
135
|
+
when (text = @ss.scan(/("([^\n\r\f"]|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*')/))
|
134
136
|
action { [:STRING, text] }
|
135
137
|
|
136
138
|
when (text = @ss.scan(/./))
|
@@ -1,22 +1,24 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module CSS
|
3
|
-
|
3
|
+
# :nodoc: all
|
4
|
+
class Tokenizer
|
4
5
|
|
5
6
|
macro
|
6
|
-
nl \n|\r\n|\r|\f
|
7
|
+
nl (\n|\r\n|\r|\f)
|
7
8
|
w [\s]*
|
8
9
|
nonascii [^\0-\177]
|
9
10
|
num -?([0-9]+|[0-9]*\.[0-9]+)
|
10
11
|
unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
|
11
12
|
|
12
|
-
escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
|
13
|
-
nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
|
14
|
-
nmstart [_A-Za-z]|{nonascii}|{escape}
|
15
|
-
|
16
|
-
name
|
13
|
+
escape ({unicode}|\\[^\n\r\f0-9A-Fa-f])
|
14
|
+
nmchar ([_A-Za-z0-9-]|{nonascii}|{escape})
|
15
|
+
nmstart ([_A-Za-z]|{nonascii}|{escape})
|
16
|
+
name {nmstart}{nmchar}*
|
17
|
+
ident -?{name}
|
18
|
+
charref {nmchar}+
|
17
19
|
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
|
18
20
|
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
|
19
|
-
string {string1}|{string2}
|
21
|
+
string ({string1}|{string2})
|
20
22
|
|
21
23
|
rule
|
22
24
|
|
@@ -25,7 +27,7 @@ rule
|
|
25
27
|
has\({w} { [:HAS, text] }
|
26
28
|
{ident}\({w} { [:FUNCTION, text] }
|
27
29
|
{ident} { [:IDENT, text] }
|
28
|
-
\#{
|
30
|
+
\#{charref} { [:HASH, text] }
|
29
31
|
{w}~={w} { [:INCLUDES, text] }
|
30
32
|
{w}\|={w} { [:DASHMATCH, text] }
|
31
33
|
{w}\^={w} { [:PREFIXMATCH, text] }
|
@@ -1,64 +1,164 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
module Nokogiri
|
2
5
|
module CSS
|
3
|
-
|
4
|
-
|
6
|
+
# When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
|
7
|
+
# class allows for changing some of the behaviors related to builtin xpath functions and quirks
|
8
|
+
# of HTML5.
|
9
|
+
class XPathVisitor
|
10
|
+
WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
|
11
|
+
|
12
|
+
# Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
|
13
|
+
module BuiltinsConfig
|
14
|
+
# Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
|
15
|
+
# the default when calling Nokogiri::CSS.xpath_for directly.
|
16
|
+
NEVER = :never
|
17
|
+
|
18
|
+
# Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
|
19
|
+
ALWAYS = :always
|
20
|
+
|
21
|
+
# Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
|
22
|
+
# the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
|
23
|
+
# node.
|
24
|
+
OPTIMAL = :optimal
|
25
|
+
|
26
|
+
# :nodoc: array of values for validation
|
27
|
+
VALUES = [NEVER, ALWAYS, OPTIMAL]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
|
31
|
+
# being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
|
32
|
+
# node will choose the correct option automatically.
|
33
|
+
module DoctypeConfig
|
34
|
+
# The document being searched is an XML document. This is the default.
|
35
|
+
XML = :xml
|
36
|
+
|
37
|
+
# The document being searched is an HTML4 document.
|
38
|
+
HTML4 = :html4
|
39
|
+
|
40
|
+
# The document being searched is an HTML5 document.
|
41
|
+
HTML5 = :html5
|
42
|
+
|
43
|
+
# :nodoc: array of values for validation
|
44
|
+
VALUES = [XML, HTML4, HTML5]
|
45
|
+
end
|
46
|
+
|
47
|
+
# The visitor configuration set via the +builtins:+ keyword argument to XPathVisitor.new.
|
48
|
+
attr_reader :builtins
|
5
49
|
|
6
|
-
|
7
|
-
|
50
|
+
# The visitor configuration set via the +doctype:+ keyword argument to XPathVisitor.new.
|
51
|
+
attr_reader :doctype
|
52
|
+
|
53
|
+
# The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
|
54
|
+
attr_reader :prefix
|
55
|
+
|
56
|
+
# The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
|
57
|
+
attr_reader :namespaces
|
58
|
+
|
59
|
+
# :call-seq:
|
60
|
+
# new() → XPathVisitor
|
61
|
+
# new(builtins:, doctype:) → XPathVisitor
|
62
|
+
#
|
63
|
+
# [Parameters]
|
64
|
+
# - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
|
65
|
+
# - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
|
66
|
+
#
|
67
|
+
# [Returns] XPathVisitor
|
68
|
+
#
|
69
|
+
def initialize(
|
70
|
+
builtins: BuiltinsConfig::NEVER,
|
71
|
+
doctype: DoctypeConfig::XML,
|
72
|
+
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
|
73
|
+
namespaces: nil
|
74
|
+
)
|
75
|
+
unless BuiltinsConfig::VALUES.include?(builtins)
|
76
|
+
raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
|
77
|
+
end
|
78
|
+
unless DoctypeConfig::VALUES.include?(doctype)
|
79
|
+
raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
|
80
|
+
end
|
81
|
+
|
82
|
+
@builtins = builtins
|
83
|
+
@doctype = doctype
|
84
|
+
@prefix = prefix
|
85
|
+
@namespaces = namespaces
|
86
|
+
end
|
87
|
+
|
88
|
+
# :call-seq: config() → Hash
|
89
|
+
#
|
90
|
+
# [Returns]
|
91
|
+
# a Hash representing the configuration of the XPathVisitor, suitable for use as
|
92
|
+
# part of the CSS cache key.
|
93
|
+
def config
|
94
|
+
{ builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
|
95
|
+
end
|
96
|
+
|
97
|
+
# :stopdoc:
|
98
|
+
def visit_function(node)
|
99
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
|
100
|
+
return send(msg, node) if respond_to?(msg)
|
8
101
|
|
9
102
|
case node.value.first
|
10
103
|
when /^text\(/
|
11
|
-
|
104
|
+
"child::text()"
|
12
105
|
when /^self\(/
|
13
106
|
"self::#{node.value[1]}"
|
14
107
|
when /^eq\(/
|
15
|
-
"position()
|
108
|
+
"position()=#{node.value[1]}"
|
16
109
|
when /^(nth|nth-of-type)\(/
|
17
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
110
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
18
111
|
nth(node.value[1])
|
19
112
|
else
|
20
|
-
"position()
|
113
|
+
"position()=#{node.value[1]}"
|
21
114
|
end
|
22
115
|
when /^nth-child\(/
|
23
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
24
|
-
nth(node.value[1], :
|
116
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
117
|
+
nth(node.value[1], child: true)
|
25
118
|
else
|
26
|
-
"count(preceding-sibling::*)
|
119
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
|
27
120
|
end
|
28
121
|
when /^nth-last-of-type\(/
|
29
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
30
|
-
nth(node.value[1], :
|
122
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
123
|
+
nth(node.value[1], last: true)
|
31
124
|
else
|
32
125
|
index = node.value[1].to_i - 1
|
33
|
-
index == 0 ? "position()
|
126
|
+
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
34
127
|
end
|
35
128
|
when /^nth-last-child\(/
|
36
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
37
|
-
nth(node.value[1], :
|
129
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
130
|
+
nth(node.value[1], last: true, child: true)
|
38
131
|
else
|
39
|
-
"count(following-sibling::*)
|
132
|
+
"count(following-sibling::*)=#{node.value[1].to_i - 1}"
|
40
133
|
end
|
41
134
|
when /^(first|first-of-type)\(/
|
42
|
-
"position()
|
135
|
+
"position()=1"
|
43
136
|
when /^(last|last-of-type)\(/
|
44
|
-
"position()
|
137
|
+
"position()=last()"
|
45
138
|
when /^contains\(/
|
46
|
-
"contains(
|
139
|
+
"contains(.,#{node.value[1]})"
|
47
140
|
when /^gt\(/
|
48
|
-
"position()
|
141
|
+
"position()>#{node.value[1]}"
|
49
142
|
when /^only-child\(/
|
50
|
-
"last()
|
143
|
+
"last()=1"
|
51
144
|
when /^comment\(/
|
52
145
|
"comment()"
|
53
146
|
when /^has\(/
|
54
|
-
|
147
|
+
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
148
|
+
".#{"//" unless is_direct}#{node.value[1].accept(self)}"
|
55
149
|
else
|
56
|
-
|
57
|
-
|
150
|
+
validate_xpath_function_name(node.value.first)
|
151
|
+
|
152
|
+
# xpath function call, let's marshal those arguments
|
153
|
+
args = ["."]
|
154
|
+
args += node.value[1..-1].map do |n|
|
155
|
+
n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
|
156
|
+
end
|
157
|
+
"nokogiri:#{node.value.first}#{args.join(",")})"
|
58
158
|
end
|
59
159
|
end
|
60
160
|
|
61
|
-
def visit_not
|
161
|
+
def visit_not(node)
|
62
162
|
child = node.value.first
|
63
163
|
if :ELEMENT_NAME == child.type
|
64
164
|
"not(self::#{child.accept(self)})"
|
@@ -67,143 +167,179 @@ module Nokogiri
|
|
67
167
|
end
|
68
168
|
end
|
69
169
|
|
70
|
-
def visit_id
|
170
|
+
def visit_id(node)
|
71
171
|
node.value.first =~ /^#(.*)$/
|
72
|
-
"@id
|
172
|
+
"@id='#{Regexp.last_match(1)}'"
|
73
173
|
end
|
74
174
|
|
75
|
-
def visit_attribute_condition
|
76
|
-
|
77
|
-
|
78
|
-
else
|
79
|
-
'@'
|
80
|
-
end
|
81
|
-
attribute += node.value.first.accept(self)
|
82
|
-
|
83
|
-
# Support non-standard css
|
84
|
-
attribute.gsub!(/^@@/, '@')
|
85
|
-
|
86
|
-
return attribute unless node.value.length == 3
|
175
|
+
def visit_attribute_condition(node)
|
176
|
+
attribute = node.value.first.accept(self)
|
177
|
+
return attribute if node.value.length == 1
|
87
178
|
|
88
179
|
value = node.value.last
|
89
|
-
value = "'#{value}'"
|
180
|
+
value = "'#{value}'" unless /^['"]/.match?(value)
|
90
181
|
|
91
|
-
|
182
|
+
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
183
|
+
if (value[0] == value[-1]) && %q{"'}.include?(value[0])
|
92
184
|
str_value = value[1..-2]
|
93
185
|
if str_value.include?(value[0])
|
94
|
-
value = 'concat("' + str_value.split('"', -1).join(%q{",
|
186
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
95
187
|
end
|
96
188
|
end
|
97
189
|
|
98
190
|
case node.value[1]
|
99
191
|
when :equal
|
100
|
-
attribute + "
|
192
|
+
attribute + "=" + value.to_s
|
101
193
|
when :not_equal
|
102
|
-
attribute + "
|
194
|
+
attribute + "!=" + value.to_s
|
103
195
|
when :substring_match
|
104
|
-
"contains(#{attribute}
|
196
|
+
"contains(#{attribute},#{value})"
|
105
197
|
when :prefix_match
|
106
|
-
"starts-with(#{attribute}
|
198
|
+
"starts-with(#{attribute},#{value})"
|
107
199
|
when :dash_match
|
108
|
-
"#{attribute}
|
200
|
+
"#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
|
109
201
|
when :includes
|
110
|
-
|
202
|
+
value = value[1..-2] # strip quotes
|
203
|
+
css_class(attribute, value)
|
111
204
|
when :suffix_match
|
112
|
-
"substring(#{attribute},
|
113
|
-
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
205
|
+
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
114
206
|
else
|
115
|
-
attribute + " #{node.value[1]} " +
|
207
|
+
attribute + " #{node.value[1]} " + value.to_s
|
116
208
|
end
|
117
209
|
end
|
118
210
|
|
119
|
-
def visit_pseudo_class
|
120
|
-
if node.value.first.is_a?(Nokogiri::CSS::Node)
|
211
|
+
def visit_pseudo_class(node)
|
212
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
|
121
213
|
node.value.first.accept(self)
|
122
214
|
else
|
123
|
-
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/,
|
124
|
-
return
|
215
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
|
216
|
+
return send(msg, node) if respond_to?(msg)
|
125
217
|
|
126
218
|
case node.value.first
|
127
|
-
when "first" then "position()
|
128
|
-
when "first-child" then "count(preceding-sibling::*)
|
129
|
-
when "last" then "position()
|
130
|
-
when "last-child" then "count(following-sibling::*)
|
131
|
-
when "first-of-type" then "position()
|
132
|
-
when "last-of-type" then "position()
|
133
|
-
when "only-child" then "count(preceding-sibling::*)
|
134
|
-
when "only-of-type" then "last()
|
219
|
+
when "first" then "position()=1"
|
220
|
+
when "first-child" then "count(preceding-sibling::*)=0"
|
221
|
+
when "last" then "position()=last()"
|
222
|
+
when "last-child" then "count(following-sibling::*)=0"
|
223
|
+
when "first-of-type" then "position()=1"
|
224
|
+
when "last-of-type" then "position()=last()"
|
225
|
+
when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
|
226
|
+
when "only-of-type" then "last()=1"
|
135
227
|
when "empty" then "not(node())"
|
136
228
|
when "parent" then "node()"
|
137
229
|
when "root" then "not(parent::*)"
|
138
230
|
else
|
139
|
-
node.value.first
|
231
|
+
validate_xpath_function_name(node.value.first)
|
232
|
+
"nokogiri:#{node.value.first}(.)"
|
140
233
|
end
|
141
234
|
end
|
142
235
|
end
|
143
236
|
|
144
|
-
def visit_class_condition
|
145
|
-
"
|
237
|
+
def visit_class_condition(node)
|
238
|
+
css_class("@class", node.value.first)
|
146
239
|
end
|
147
240
|
|
148
|
-
def visit_combinator
|
241
|
+
def visit_combinator(node)
|
149
242
|
if is_of_type_pseudo_class?(node.value.last)
|
150
|
-
"#{node.value.first
|
243
|
+
"#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
|
151
244
|
else
|
152
|
-
"#{node.value.first
|
245
|
+
"#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
|
153
246
|
end
|
154
247
|
end
|
155
248
|
|
156
249
|
{
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
}.each do |k,v|
|
162
|
-
class_eval
|
250
|
+
"direct_adjacent_selector" => "/following-sibling::*[1]/self::",
|
251
|
+
"following_selector" => "/following-sibling::",
|
252
|
+
"descendant_selector" => "//",
|
253
|
+
"child_selector" => "/",
|
254
|
+
}.each do |k, v|
|
255
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
163
256
|
def visit_#{k} node
|
164
257
|
"\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
|
165
258
|
end
|
166
|
-
|
259
|
+
RUBY
|
167
260
|
end
|
168
261
|
|
169
|
-
def visit_conditional_selector
|
170
|
-
node.value.first.accept(self) +
|
171
|
-
|
262
|
+
def visit_conditional_selector(node)
|
263
|
+
node.value.first.accept(self) + "[" +
|
264
|
+
node.value.last.accept(self) + "]"
|
265
|
+
end
|
266
|
+
|
267
|
+
def visit_element_name(node)
|
268
|
+
if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
|
269
|
+
# HTML5 has namespaces that should be ignored in CSS queries
|
270
|
+
# https://github.com/sparklemotion/nokogiri/issues/2376
|
271
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
272
|
+
if WILDCARD_NAMESPACES
|
273
|
+
"*:#{node.value.first}"
|
274
|
+
else
|
275
|
+
"*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
|
276
|
+
end
|
277
|
+
else
|
278
|
+
"*[local-name()='#{node.value.first}']"
|
279
|
+
end
|
280
|
+
elsif node.value.length == 2 # has a namespace prefix
|
281
|
+
if node.value.first.nil? # namespace prefix is empty
|
282
|
+
node.value.last
|
283
|
+
else
|
284
|
+
node.value.join(":")
|
285
|
+
end
|
286
|
+
elsif node.value.first != "*" && @namespaces&.key?("xmlns")
|
287
|
+
# apply the default namespace (if one is present) to a non-wildcard selector
|
288
|
+
"xmlns:#{node.value.first}"
|
289
|
+
else
|
290
|
+
node.value.first
|
291
|
+
end
|
172
292
|
end
|
173
293
|
|
174
|
-
def
|
175
|
-
node.value.first
|
294
|
+
def visit_attrib_name(node)
|
295
|
+
"@#{node.value.first}"
|
176
296
|
end
|
177
297
|
|
178
|
-
def accept
|
298
|
+
def accept(node)
|
179
299
|
node.accept(self)
|
180
300
|
end
|
181
301
|
|
182
|
-
|
183
|
-
def nth node, options={}
|
184
|
-
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
302
|
+
private
|
185
303
|
|
186
|
-
|
304
|
+
def validate_xpath_function_name(name)
|
305
|
+
if name.start_with?("-")
|
306
|
+
raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
def html5_element_name_needs_namespace_handling(node)
|
311
|
+
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
|
312
|
+
node.value.length == 1 &&
|
313
|
+
# if this is the wildcard selector "*", use it as normal
|
314
|
+
node.value.first != "*"
|
315
|
+
end
|
316
|
+
|
317
|
+
def nth(node, options = {})
|
318
|
+
unless node.value.size == 4
|
319
|
+
raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
|
320
|
+
end
|
321
|
+
|
322
|
+
a, b = read_a_and_positive_b(node.value)
|
187
323
|
position = if options[:child]
|
188
|
-
options[:last] ? "(count(following-sibling::*)
|
324
|
+
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
189
325
|
else
|
190
326
|
options[:last] ? "(last()-position()+1)" : "position()"
|
191
327
|
end
|
192
328
|
|
193
329
|
if b.zero?
|
194
|
-
"(#{position} mod #{a})
|
330
|
+
"(#{position} mod #{a})=0"
|
195
331
|
else
|
196
332
|
compare = a < 0 ? "<=" : ">="
|
197
333
|
if a.abs == 1
|
198
|
-
"#{position}
|
334
|
+
"#{position}#{compare}#{b}"
|
199
335
|
else
|
200
|
-
"(#{position}
|
336
|
+
"(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
|
201
337
|
end
|
202
338
|
end
|
203
339
|
end
|
204
340
|
|
205
|
-
def read_a_and_positive_b
|
206
|
-
op = values[2]
|
341
|
+
def read_a_and_positive_b(values)
|
342
|
+
op = values[2].strip
|
207
343
|
if op == "+"
|
208
344
|
a = values[0].to_i
|
209
345
|
b = values[3].to_i
|
@@ -216,15 +352,25 @@ module Nokogiri
|
|
216
352
|
[a, b]
|
217
353
|
end
|
218
354
|
|
219
|
-
def is_of_type_pseudo_class?
|
220
|
-
if node.type
|
221
|
-
if node.value[0].is_a?(Nokogiri::CSS::Node)
|
355
|
+
def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
|
356
|
+
if node.type == :PSEUDO_CLASS
|
357
|
+
if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
|
222
358
|
node.value[0].value[0]
|
223
359
|
else
|
224
360
|
node.value[0]
|
225
361
|
end =~ /(nth|first|last|only)-of-type(\()?/
|
226
362
|
end
|
227
363
|
end
|
364
|
+
|
365
|
+
def css_class(hay, needle)
|
366
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
367
|
+
# use the builtin implementation
|
368
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
369
|
+
else
|
370
|
+
# use only ordinary xpath functions
|
371
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
372
|
+
end
|
373
|
+
end
|
228
374
|
end
|
229
375
|
end
|
230
376
|
end
|