nokogiri 1.10.7 → 1.16.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +42 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +188 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +862 -421
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +222 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +39 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +408 -243
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1343 -674
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +305 -213
- data/ext/nokogiri/xml_relax_ng.c +87 -78
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +149 -103
- data/ext/nokogiri/xml_sax_push_parser.c +65 -37
- data/ext/nokogiri/xml_schema.c +138 -82
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +35 -26
- data/ext/nokogiri/xml_xpath_context.c +363 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +126 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +5 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +205 -96
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +326 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +75 -34
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -127
- data/lib/nokogiri/xml/document_fragment.rb +93 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1096 -419
- data/lib/nokogiri/xml/node_set.rb +137 -61
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +7 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +39 -38
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +121 -291
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#--
|
2
3
|
# DO NOT MODIFY!!!!
|
3
4
|
# This file is automatically generated by rex 1.0.7
|
@@ -6,7 +7,8 @@
|
|
6
7
|
|
7
8
|
module Nokogiri
|
8
9
|
module CSS
|
9
|
-
|
10
|
+
# :nodoc: all
|
11
|
+
class Tokenizer
|
10
12
|
require 'strscan'
|
11
13
|
|
12
14
|
class ScanError < StandardError ; end
|
@@ -61,10 +63,10 @@ class Tokenizer # :nodoc:
|
|
61
63
|
when (text = @ss.scan(/has\([\s]*/))
|
62
64
|
action { [:HAS, text] }
|
63
65
|
|
64
|
-
when (text = @ss.scan(
|
66
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
|
65
67
|
action { [:FUNCTION, text] }
|
66
68
|
|
67
|
-
when (text = @ss.scan(
|
69
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
|
68
70
|
action { [:IDENT, text] }
|
69
71
|
|
70
72
|
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
|
@@ -1,6 +1,7 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module CSS
|
3
|
-
|
3
|
+
# :nodoc: all
|
4
|
+
class Tokenizer
|
4
5
|
|
5
6
|
macro
|
6
7
|
nl \n|\r\n|\r|\f
|
@@ -12,7 +13,7 @@ macro
|
|
12
13
|
escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
|
13
14
|
nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
|
14
15
|
nmstart [_A-Za-z]|{nonascii}|{escape}
|
15
|
-
ident
|
16
|
+
ident -?({nmstart})({nmchar})*
|
16
17
|
name ({nmchar})+
|
17
18
|
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
|
18
19
|
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
|
@@ -1,64 +1,143 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
module Nokogiri
|
2
5
|
module CSS
|
3
|
-
|
4
|
-
|
6
|
+
# When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
|
7
|
+
# class allows for changing some of the behaviors related to builtin xpath functions and quirks
|
8
|
+
# of HTML5.
|
9
|
+
class XPathVisitor
|
10
|
+
WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
|
11
|
+
|
12
|
+
# Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
|
13
|
+
module BuiltinsConfig
|
14
|
+
# Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
|
15
|
+
# the default when calling Nokogiri::CSS.xpath_for directly.
|
16
|
+
NEVER = :never
|
17
|
+
|
18
|
+
# Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
|
19
|
+
ALWAYS = :always
|
20
|
+
|
21
|
+
# Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
|
22
|
+
# the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
|
23
|
+
# node.
|
24
|
+
OPTIMAL = :optimal
|
25
|
+
|
26
|
+
# :nodoc: array of values for validation
|
27
|
+
VALUES = [NEVER, ALWAYS, OPTIMAL]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
|
31
|
+
# being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
|
32
|
+
# node will choose the correct option automatically.
|
33
|
+
module DoctypeConfig
|
34
|
+
# The document being searched is an XML document. This is the default.
|
35
|
+
XML = :xml
|
36
|
+
|
37
|
+
# The document being searched is an HTML4 document.
|
38
|
+
HTML4 = :html4
|
39
|
+
|
40
|
+
# The document being searched is an HTML5 document.
|
41
|
+
HTML5 = :html5
|
42
|
+
|
43
|
+
# :nodoc: array of values for validation
|
44
|
+
VALUES = [XML, HTML4, HTML5]
|
45
|
+
end
|
46
|
+
|
47
|
+
# :call-seq:
|
48
|
+
# new() → XPathVisitor
|
49
|
+
# new(builtins:, doctype:) → XPathVisitor
|
50
|
+
#
|
51
|
+
# [Parameters]
|
52
|
+
# - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
|
53
|
+
# - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
|
54
|
+
#
|
55
|
+
# [Returns] XPathVisitor
|
56
|
+
#
|
57
|
+
def initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML)
|
58
|
+
unless BuiltinsConfig::VALUES.include?(builtins)
|
59
|
+
raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
|
60
|
+
end
|
61
|
+
unless DoctypeConfig::VALUES.include?(doctype)
|
62
|
+
raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
|
63
|
+
end
|
64
|
+
|
65
|
+
@builtins = builtins
|
66
|
+
@doctype = doctype
|
67
|
+
end
|
68
|
+
|
69
|
+
# :call-seq: config() → Hash
|
70
|
+
#
|
71
|
+
# [Returns]
|
72
|
+
# a Hash representing the configuration of the XPathVisitor, suitable for use as
|
73
|
+
# part of the CSS cache key.
|
74
|
+
def config
|
75
|
+
{ builtins: @builtins, doctype: @doctype }
|
76
|
+
end
|
5
77
|
|
6
|
-
|
7
|
-
|
78
|
+
# :stopdoc:
|
79
|
+
def visit_function(node)
|
80
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
|
81
|
+
return send(msg, node) if respond_to?(msg)
|
8
82
|
|
9
83
|
case node.value.first
|
10
84
|
when /^text\(/
|
11
|
-
|
85
|
+
"child::text()"
|
12
86
|
when /^self\(/
|
13
87
|
"self::#{node.value[1]}"
|
14
88
|
when /^eq\(/
|
15
|
-
"position()
|
89
|
+
"position()=#{node.value[1]}"
|
16
90
|
when /^(nth|nth-of-type)\(/
|
17
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
91
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
18
92
|
nth(node.value[1])
|
19
93
|
else
|
20
|
-
"position()
|
94
|
+
"position()=#{node.value[1]}"
|
21
95
|
end
|
22
96
|
when /^nth-child\(/
|
23
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
24
|
-
nth(node.value[1], :
|
97
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
98
|
+
nth(node.value[1], child: true)
|
25
99
|
else
|
26
|
-
"count(preceding-sibling::*)
|
100
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
|
27
101
|
end
|
28
102
|
when /^nth-last-of-type\(/
|
29
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
30
|
-
nth(node.value[1], :
|
103
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
104
|
+
nth(node.value[1], last: true)
|
31
105
|
else
|
32
106
|
index = node.value[1].to_i - 1
|
33
|
-
index == 0 ? "position()
|
107
|
+
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
34
108
|
end
|
35
109
|
when /^nth-last-child\(/
|
36
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
37
|
-
nth(node.value[1], :
|
110
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
111
|
+
nth(node.value[1], last: true, child: true)
|
38
112
|
else
|
39
|
-
"count(following-sibling::*)
|
113
|
+
"count(following-sibling::*)=#{node.value[1].to_i - 1}"
|
40
114
|
end
|
41
115
|
when /^(first|first-of-type)\(/
|
42
|
-
"position()
|
116
|
+
"position()=1"
|
43
117
|
when /^(last|last-of-type)\(/
|
44
|
-
"position()
|
118
|
+
"position()=last()"
|
45
119
|
when /^contains\(/
|
46
|
-
"contains(
|
120
|
+
"contains(.,#{node.value[1]})"
|
47
121
|
when /^gt\(/
|
48
|
-
"position()
|
122
|
+
"position()>#{node.value[1]}"
|
49
123
|
when /^only-child\(/
|
50
|
-
"last()
|
124
|
+
"last()=1"
|
51
125
|
when /^comment\(/
|
52
126
|
"comment()"
|
53
127
|
when /^has\(/
|
54
|
-
|
128
|
+
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
129
|
+
".#{"//" unless is_direct}#{node.value[1].accept(self)}"
|
55
130
|
else
|
56
|
-
|
57
|
-
|
131
|
+
# xpath function call, let's marshal those arguments
|
132
|
+
args = ["."]
|
133
|
+
args += node.value[1..-1].map do |n|
|
134
|
+
n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
|
135
|
+
end
|
136
|
+
"nokogiri:#{node.value.first}#{args.join(",")})"
|
58
137
|
end
|
59
138
|
end
|
60
139
|
|
61
|
-
def visit_not
|
140
|
+
def visit_not(node)
|
62
141
|
child = node.value.first
|
63
142
|
if :ELEMENT_NAME == child.type
|
64
143
|
"not(self::#{child.accept(self)})"
|
@@ -67,143 +146,163 @@ module Nokogiri
|
|
67
146
|
end
|
68
147
|
end
|
69
148
|
|
70
|
-
def visit_id
|
149
|
+
def visit_id(node)
|
71
150
|
node.value.first =~ /^#(.*)$/
|
72
|
-
"@id
|
151
|
+
"@id='#{Regexp.last_match(1)}'"
|
73
152
|
end
|
74
153
|
|
75
|
-
def visit_attribute_condition
|
76
|
-
|
77
|
-
|
78
|
-
else
|
79
|
-
'@'
|
80
|
-
end
|
81
|
-
attribute += node.value.first.accept(self)
|
82
|
-
|
83
|
-
# Support non-standard css
|
84
|
-
attribute.gsub!(/^@@/, '@')
|
85
|
-
|
86
|
-
return attribute unless node.value.length == 3
|
154
|
+
def visit_attribute_condition(node)
|
155
|
+
attribute = node.value.first.accept(self)
|
156
|
+
return attribute if node.value.length == 1
|
87
157
|
|
88
158
|
value = node.value.last
|
89
|
-
value = "'#{value}'"
|
159
|
+
value = "'#{value}'" unless /^['"]/.match?(value)
|
90
160
|
|
91
|
-
|
161
|
+
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
162
|
+
if (value[0] == value[-1]) && %q{"'}.include?(value[0])
|
92
163
|
str_value = value[1..-2]
|
93
164
|
if str_value.include?(value[0])
|
94
|
-
value = 'concat("' + str_value.split('"', -1).join(%q{",
|
165
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
95
166
|
end
|
96
167
|
end
|
97
168
|
|
98
169
|
case node.value[1]
|
99
170
|
when :equal
|
100
|
-
attribute + "
|
171
|
+
attribute + "=" + value.to_s
|
101
172
|
when :not_equal
|
102
|
-
attribute + "
|
173
|
+
attribute + "!=" + value.to_s
|
103
174
|
when :substring_match
|
104
|
-
"contains(#{attribute}
|
175
|
+
"contains(#{attribute},#{value})"
|
105
176
|
when :prefix_match
|
106
|
-
"starts-with(#{attribute}
|
177
|
+
"starts-with(#{attribute},#{value})"
|
107
178
|
when :dash_match
|
108
|
-
"#{attribute}
|
179
|
+
"#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
|
109
180
|
when :includes
|
110
|
-
|
181
|
+
value = value[1..-2] # strip quotes
|
182
|
+
css_class(attribute, value)
|
111
183
|
when :suffix_match
|
112
|
-
"substring(#{attribute},
|
113
|
-
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
184
|
+
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
114
185
|
else
|
115
|
-
attribute + " #{node.value[1]} " +
|
186
|
+
attribute + " #{node.value[1]} " + value.to_s
|
116
187
|
end
|
117
188
|
end
|
118
189
|
|
119
|
-
def visit_pseudo_class
|
120
|
-
if node.value.first.is_a?(Nokogiri::CSS::Node)
|
190
|
+
def visit_pseudo_class(node)
|
191
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
|
121
192
|
node.value.first.accept(self)
|
122
193
|
else
|
123
|
-
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/,
|
124
|
-
return
|
194
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
|
195
|
+
return send(msg, node) if respond_to?(msg)
|
125
196
|
|
126
197
|
case node.value.first
|
127
|
-
when "first" then "position()
|
128
|
-
when "first-child" then "count(preceding-sibling::*)
|
129
|
-
when "last" then "position()
|
130
|
-
when "last-child" then "count(following-sibling::*)
|
131
|
-
when "first-of-type" then "position()
|
132
|
-
when "last-of-type" then "position()
|
133
|
-
when "only-child" then "count(preceding-sibling::*)
|
134
|
-
when "only-of-type" then "last()
|
198
|
+
when "first" then "position()=1"
|
199
|
+
when "first-child" then "count(preceding-sibling::*)=0"
|
200
|
+
when "last" then "position()=last()"
|
201
|
+
when "last-child" then "count(following-sibling::*)=0"
|
202
|
+
when "first-of-type" then "position()=1"
|
203
|
+
when "last-of-type" then "position()=last()"
|
204
|
+
when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
|
205
|
+
when "only-of-type" then "last()=1"
|
135
206
|
when "empty" then "not(node())"
|
136
207
|
when "parent" then "node()"
|
137
208
|
when "root" then "not(parent::*)"
|
138
209
|
else
|
139
|
-
node.value.first
|
210
|
+
"nokogiri:#{node.value.first}(.)"
|
140
211
|
end
|
141
212
|
end
|
142
213
|
end
|
143
214
|
|
144
|
-
def visit_class_condition
|
145
|
-
"
|
215
|
+
def visit_class_condition(node)
|
216
|
+
css_class("@class", node.value.first)
|
146
217
|
end
|
147
218
|
|
148
|
-
def visit_combinator
|
219
|
+
def visit_combinator(node)
|
149
220
|
if is_of_type_pseudo_class?(node.value.last)
|
150
|
-
"#{node.value.first
|
221
|
+
"#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
|
151
222
|
else
|
152
|
-
"#{node.value.first
|
223
|
+
"#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
|
153
224
|
end
|
154
225
|
end
|
155
226
|
|
156
227
|
{
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
}.each do |k,v|
|
162
|
-
class_eval
|
228
|
+
"direct_adjacent_selector" => "/following-sibling::*[1]/self::",
|
229
|
+
"following_selector" => "/following-sibling::",
|
230
|
+
"descendant_selector" => "//",
|
231
|
+
"child_selector" => "/",
|
232
|
+
}.each do |k, v|
|
233
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
163
234
|
def visit_#{k} node
|
164
235
|
"\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
|
165
236
|
end
|
166
|
-
|
237
|
+
RUBY
|
238
|
+
end
|
239
|
+
|
240
|
+
def visit_conditional_selector(node)
|
241
|
+
node.value.first.accept(self) + "[" +
|
242
|
+
node.value.last.accept(self) + "]"
|
167
243
|
end
|
168
244
|
|
169
|
-
def
|
170
|
-
|
171
|
-
|
245
|
+
def visit_element_name(node)
|
246
|
+
if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
|
247
|
+
# HTML5 has namespaces that should be ignored in CSS queries
|
248
|
+
# https://github.com/sparklemotion/nokogiri/issues/2376
|
249
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
250
|
+
if WILDCARD_NAMESPACES
|
251
|
+
"*:#{node.value.first}"
|
252
|
+
else
|
253
|
+
"*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
|
254
|
+
end
|
255
|
+
else
|
256
|
+
"*[local-name()='#{node.value.first}']"
|
257
|
+
end
|
258
|
+
else
|
259
|
+
node.value.first
|
260
|
+
end
|
172
261
|
end
|
173
262
|
|
174
|
-
def
|
175
|
-
node.value.first
|
263
|
+
def visit_attrib_name(node)
|
264
|
+
"@#{node.value.first}"
|
176
265
|
end
|
177
266
|
|
178
|
-
def accept
|
267
|
+
def accept(node)
|
179
268
|
node.accept(self)
|
180
269
|
end
|
181
270
|
|
182
|
-
|
183
|
-
def nth node, options={}
|
184
|
-
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
271
|
+
private
|
185
272
|
|
186
|
-
|
273
|
+
def html5_element_name_needs_namespace_handling(node)
|
274
|
+
# if this is the wildcard selector "*", use it as normal
|
275
|
+
node.value.first != "*" &&
|
276
|
+
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
|
277
|
+
!node.value.first.include?(":")
|
278
|
+
end
|
279
|
+
|
280
|
+
def nth(node, options = {})
|
281
|
+
unless node.value.size == 4
|
282
|
+
raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
|
283
|
+
end
|
284
|
+
|
285
|
+
a, b = read_a_and_positive_b(node.value)
|
187
286
|
position = if options[:child]
|
188
|
-
options[:last] ? "(count(following-sibling::*)
|
287
|
+
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
189
288
|
else
|
190
289
|
options[:last] ? "(last()-position()+1)" : "position()"
|
191
290
|
end
|
192
291
|
|
193
292
|
if b.zero?
|
194
|
-
"(#{position} mod #{a})
|
293
|
+
"(#{position} mod #{a})=0"
|
195
294
|
else
|
196
295
|
compare = a < 0 ? "<=" : ">="
|
197
296
|
if a.abs == 1
|
198
|
-
"#{position}
|
297
|
+
"#{position}#{compare}#{b}"
|
199
298
|
else
|
200
|
-
"(#{position}
|
299
|
+
"(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
|
201
300
|
end
|
202
301
|
end
|
203
302
|
end
|
204
303
|
|
205
|
-
def read_a_and_positive_b
|
206
|
-
op = values[2]
|
304
|
+
def read_a_and_positive_b(values)
|
305
|
+
op = values[2].strip
|
207
306
|
if op == "+"
|
208
307
|
a = values[0].to_i
|
209
308
|
b = values[3].to_i
|
@@ -216,15 +315,25 @@ module Nokogiri
|
|
216
315
|
[a, b]
|
217
316
|
end
|
218
317
|
|
219
|
-
def is_of_type_pseudo_class?
|
220
|
-
if node.type
|
221
|
-
if node.value[0].is_a?(Nokogiri::CSS::Node)
|
318
|
+
def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
|
319
|
+
if node.type == :PSEUDO_CLASS
|
320
|
+
if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
|
222
321
|
node.value[0].value[0]
|
223
322
|
else
|
224
323
|
node.value[0]
|
225
324
|
end =~ /(nth|first|last|only)-of-type(\()?/
|
226
325
|
end
|
227
326
|
end
|
327
|
+
|
328
|
+
def css_class(hay, needle)
|
329
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
330
|
+
# use the builtin implementation
|
331
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
332
|
+
else
|
333
|
+
# use only ordinary xpath functions
|
334
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
335
|
+
end
|
336
|
+
end
|
228
337
|
end
|
229
338
|
end
|
230
339
|
end
|
data/lib/nokogiri/css.rb
CHANGED
@@ -1,27 +1,66 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
x = $-w
|
4
|
-
$-w = false
|
5
|
-
require 'nokogiri/css/parser'
|
6
|
-
$-w = x
|
7
|
-
|
8
|
-
require 'nokogiri/css/tokenizer'
|
9
|
-
require 'nokogiri/css/syntax_error'
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
10
3
|
|
11
4
|
module Nokogiri
|
5
|
+
# Translate a CSS selector into an XPath 1.0 query
|
12
6
|
module CSS
|
13
7
|
class << self
|
14
|
-
|
15
|
-
#
|
16
|
-
def parse
|
17
|
-
Parser.new.parse
|
8
|
+
# TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
|
9
|
+
# It is not used by Nokogiri and shouldn't be part of the public API.
|
10
|
+
def parse(selector) # :nodoc:
|
11
|
+
Parser.new.parse(selector)
|
18
12
|
end
|
19
13
|
|
20
|
-
|
21
|
-
#
|
22
|
-
|
23
|
-
|
14
|
+
# :call-seq:
|
15
|
+
# xpath_for(selector) → String
|
16
|
+
# xpath_for(selector [, prefix:] [, visitor:] [, ns:]) → String
|
17
|
+
#
|
18
|
+
# Translate a CSS selector to the equivalent XPath query.
|
19
|
+
#
|
20
|
+
# [Parameters]
|
21
|
+
# - +selector+ (String) The CSS selector to be translated into XPath
|
22
|
+
#
|
23
|
+
# - +prefix:+ (String)
|
24
|
+
#
|
25
|
+
# The XPath prefix for the query, see Nokogiri::XML::XPath for some options. Default is
|
26
|
+
# +XML::XPath::GLOBAL_SEARCH_PREFIX+.
|
27
|
+
#
|
28
|
+
# - +visitor:+ (Nokogiri::CSS::XPathVisitor)
|
29
|
+
#
|
30
|
+
# The visitor class to use to transform the AST into XPath. Default is
|
31
|
+
# +Nokogiri::CSS::XPathVisitor.new+.
|
32
|
+
#
|
33
|
+
# - +ns:+ (Hash<String ⇒ String>)
|
34
|
+
#
|
35
|
+
# The namespaces that are referenced in the query, if any. This is a hash where the keys are
|
36
|
+
# the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
|
37
|
+
#
|
38
|
+
# [Returns] (String) The equivalent XPath query for +selector+
|
39
|
+
#
|
40
|
+
# 💡 Note that translated queries are cached for performance concerns.
|
41
|
+
#
|
42
|
+
def xpath_for(selector, options = {})
|
43
|
+
raise TypeError, "no implicit conversion of #{selector.inspect} to String" unless selector.respond_to?(:to_str)
|
44
|
+
|
45
|
+
selector = selector.to_str
|
46
|
+
raise Nokogiri::CSS::SyntaxError, "empty CSS selector" if selector.empty?
|
47
|
+
|
48
|
+
prefix = options.fetch(:prefix, Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX)
|
49
|
+
visitor = options.fetch(:visitor) { Nokogiri::CSS::XPathVisitor.new }
|
50
|
+
ns = options.fetch(:ns, {})
|
51
|
+
|
52
|
+
Parser.new(ns).xpath_for(selector, prefix, visitor)
|
24
53
|
end
|
25
54
|
end
|
26
55
|
end
|
27
56
|
end
|
57
|
+
|
58
|
+
require_relative "css/node"
|
59
|
+
require_relative "css/xpath_visitor"
|
60
|
+
x = $-w
|
61
|
+
$-w = false
|
62
|
+
require_relative "css/parser"
|
63
|
+
$-w = x
|
64
|
+
|
65
|
+
require_relative "css/tokenizer"
|
66
|
+
require_relative "css/syntax_error"
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module Decorators
|
3
5
|
###
|
@@ -9,21 +11,21 @@ module Nokogiri
|
|
9
11
|
|
10
12
|
###
|
11
13
|
# look for node with +name+. See Nokogiri.Slop
|
12
|
-
def method_missing
|
14
|
+
def method_missing(name, *args, &block)
|
13
15
|
if args.empty?
|
14
|
-
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/,
|
15
|
-
elsif args.first.is_a?
|
16
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
17
|
+
elsif args.first.is_a?(Hash)
|
16
18
|
hash = args.first
|
17
19
|
if hash[:css]
|
18
20
|
list = css("#{name}#{hash[:css]}")
|
19
21
|
elsif hash[:xpath]
|
20
|
-
conds = Array(hash[:xpath]).join(
|
22
|
+
conds = Array(hash[:xpath]).join(" and ")
|
21
23
|
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
22
24
|
end
|
23
25
|
else
|
24
26
|
CSS::Parser.without_cache do
|
25
27
|
list = xpath(
|
26
|
-
*CSS.xpath_for("#{name}#{args.first}", :
|
28
|
+
*CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX),
|
27
29
|
)
|
28
30
|
end
|
29
31
|
end
|
@@ -32,8 +34,8 @@ module Nokogiri
|
|
32
34
|
list.length == 1 ? list.first : list
|
33
35
|
end
|
34
36
|
|
35
|
-
def respond_to_missing?
|
36
|
-
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/,
|
37
|
+
def respond_to_missing?(name, include_private = false)
|
38
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
37
39
|
|
38
40
|
!list.empty?
|
39
41
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
class EncodingHandler
|
6
|
+
# Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
|
7
|
+
USEFUL_ALIASES = {
|
8
|
+
# alias_name => true_name
|
9
|
+
"NOKOGIRI-SENTINEL" => "UTF-8", # indicating the Nokogiri has installed aliases
|
10
|
+
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
11
|
+
"UTF-8" => "UTF-8", # for JRuby tests, this is a no-op in CRuby
|
12
|
+
}
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def install_default_aliases
|
16
|
+
USEFUL_ALIASES.each do |alias_name, name|
|
17
|
+
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# :stopdoc:
|
23
|
+
if Nokogiri.jruby?
|
24
|
+
class << self
|
25
|
+
def [](name)
|
26
|
+
storage.key?(name) ? new(storage[name]) : nil
|
27
|
+
end
|
28
|
+
|
29
|
+
def alias(name, alias_name)
|
30
|
+
storage[alias_name] = name
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete(name)
|
34
|
+
storage.delete(name)
|
35
|
+
end
|
36
|
+
|
37
|
+
def clear_aliases!
|
38
|
+
storage.clear
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def storage
|
44
|
+
@storage ||= {}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize(name)
|
49
|
+
@name = name
|
50
|
+
end
|
51
|
+
|
52
|
+
attr_reader :name
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
Nokogiri::EncodingHandler.install_default_aliases
|