nokogiri 1.10.3 → 1.12.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +176 -96
- data/dependencies.yml +28 -26
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +716 -414
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +191 -89
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +267 -195
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +28 -17
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +60 -51
- data/ext/nokogiri/xml_node.c +493 -407
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +197 -172
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +105 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +158 -73
- data/ext/nokogiri/xslt_stylesheet.c +158 -164
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +64 -63
- data/lib/nokogiri/css/parser.y +3 -3
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +105 -103
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/css.rb +15 -14
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +32 -27
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +17 -30
- data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +215 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +74 -32
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +138 -41
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node/save_options.rb +2 -1
- data/lib/nokogiri/xml/node.rb +629 -293
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +12 -3
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +9 -12
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax/document.rb +25 -30
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +4 -5
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xml.rb +36 -36
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/nokogiri/xslt.rb +17 -16
- data/lib/nokogiri.rb +32 -51
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- metadata +151 -153
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,151 +1,153 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#--
|
2
3
|
# DO NOT MODIFY!!!!
|
3
|
-
# This file is automatically generated by rex 1.0.
|
4
|
+
# This file is automatically generated by rex 1.0.7
|
4
5
|
# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
|
5
6
|
#++
|
6
7
|
|
7
8
|
module Nokogiri
|
8
9
|
module CSS
|
9
10
|
class Tokenizer # :nodoc:
|
10
|
-
|
11
|
+
require 'strscan'
|
11
12
|
|
12
|
-
|
13
|
+
class ScanError < StandardError ; end
|
13
14
|
|
14
|
-
|
15
|
-
|
16
|
-
|
15
|
+
attr_reader :lineno
|
16
|
+
attr_reader :filename
|
17
|
+
attr_accessor :state
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
def scan_setup(str)
|
20
|
+
@ss = StringScanner.new(str)
|
21
|
+
@lineno = 1
|
22
|
+
@state = nil
|
23
|
+
end
|
23
24
|
|
24
|
-
|
25
|
-
|
26
|
-
|
25
|
+
def action
|
26
|
+
yield
|
27
|
+
end
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
def scan_str(str)
|
30
|
+
scan_setup(str)
|
31
|
+
do_parse
|
32
|
+
end
|
33
|
+
alias :scan :scan_str
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
35
|
+
def load_file( filename )
|
36
|
+
@filename = filename
|
37
|
+
File.open(filename, "r") do |f|
|
38
|
+
scan_setup(f.read)
|
39
|
+
end
|
40
|
+
end
|
40
41
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
def scan_file( filename )
|
43
|
+
load_file(filename)
|
44
|
+
do_parse
|
45
|
+
end
|
45
46
|
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
# skips empty actions
|
51
|
-
until token = _next_token or @ss.eos?; end
|
52
|
-
token
|
53
|
-
end
|
48
|
+
def next_token
|
49
|
+
return if @ss.eos?
|
54
50
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
when nil
|
60
|
-
case
|
61
|
-
when (text = @ss.scan(/has\([\s]*/))
|
62
|
-
action { [:HAS, text] }
|
51
|
+
# skips empty actions
|
52
|
+
until token = _next_token or @ss.eos?; end
|
53
|
+
token
|
54
|
+
end
|
63
55
|
|
64
|
-
|
65
|
-
|
56
|
+
def _next_token
|
57
|
+
text = @ss.peek(1)
|
58
|
+
@lineno += 1 if text == "\n"
|
59
|
+
token = case @state
|
60
|
+
when nil
|
61
|
+
case
|
62
|
+
when (text = @ss.scan(/has\([\s]*/))
|
63
|
+
action { [:HAS, text] }
|
66
64
|
|
67
|
-
|
68
|
-
|
65
|
+
when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
|
66
|
+
action { [:FUNCTION, text] }
|
69
67
|
|
70
|
-
|
71
|
-
|
68
|
+
when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
|
69
|
+
action { [:IDENT, text] }
|
72
70
|
|
73
|
-
|
74
|
-
|
71
|
+
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
|
72
|
+
action { [:HASH, text] }
|
75
73
|
|
76
|
-
|
77
|
-
|
74
|
+
when (text = @ss.scan(/[\s]*~=[\s]*/))
|
75
|
+
action { [:INCLUDES, text] }
|
78
76
|
|
79
|
-
|
80
|
-
|
77
|
+
when (text = @ss.scan(/[\s]*\|=[\s]*/))
|
78
|
+
action { [:DASHMATCH, text] }
|
81
79
|
|
82
|
-
|
83
|
-
|
80
|
+
when (text = @ss.scan(/[\s]*\^=[\s]*/))
|
81
|
+
action { [:PREFIXMATCH, text] }
|
84
82
|
|
85
|
-
|
86
|
-
|
83
|
+
when (text = @ss.scan(/[\s]*\$=[\s]*/))
|
84
|
+
action { [:SUFFIXMATCH, text] }
|
87
85
|
|
88
|
-
|
89
|
-
|
86
|
+
when (text = @ss.scan(/[\s]*\*=[\s]*/))
|
87
|
+
action { [:SUBSTRINGMATCH, text] }
|
90
88
|
|
91
|
-
|
92
|
-
|
89
|
+
when (text = @ss.scan(/[\s]*!=[\s]*/))
|
90
|
+
action { [:NOT_EQUAL, text] }
|
93
91
|
|
94
|
-
|
95
|
-
|
92
|
+
when (text = @ss.scan(/[\s]*=[\s]*/))
|
93
|
+
action { [:EQUAL, text] }
|
96
94
|
|
97
|
-
|
98
|
-
|
95
|
+
when (text = @ss.scan(/[\s]*\)/))
|
96
|
+
action { [:RPAREN, text] }
|
99
97
|
|
100
|
-
|
101
|
-
|
98
|
+
when (text = @ss.scan(/\[[\s]*/))
|
99
|
+
action { [:LSQUARE, text] }
|
102
100
|
|
103
|
-
|
104
|
-
|
101
|
+
when (text = @ss.scan(/[\s]*\]/))
|
102
|
+
action { [:RSQUARE, text] }
|
105
103
|
|
106
|
-
|
107
|
-
|
104
|
+
when (text = @ss.scan(/[\s]*\+[\s]*/))
|
105
|
+
action { [:PLUS, text] }
|
108
106
|
|
109
|
-
|
110
|
-
|
107
|
+
when (text = @ss.scan(/[\s]*>[\s]*/))
|
108
|
+
action { [:GREATER, text] }
|
111
109
|
|
112
|
-
|
113
|
-
|
110
|
+
when (text = @ss.scan(/[\s]*,[\s]*/))
|
111
|
+
action { [:COMMA, text] }
|
114
112
|
|
115
|
-
|
116
|
-
|
113
|
+
when (text = @ss.scan(/[\s]*~[\s]*/))
|
114
|
+
action { [:TILDE, text] }
|
117
115
|
|
118
|
-
|
119
|
-
|
116
|
+
when (text = @ss.scan(/\:not\([\s]*/))
|
117
|
+
action { [:NOT, text] }
|
120
118
|
|
121
|
-
|
122
|
-
|
119
|
+
when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
|
120
|
+
action { [:NUMBER, text] }
|
123
121
|
|
124
|
-
|
125
|
-
|
122
|
+
when (text = @ss.scan(/[\s]*\/\/[\s]*/))
|
123
|
+
action { [:DOUBLESLASH, text] }
|
126
124
|
|
127
|
-
|
128
|
-
|
125
|
+
when (text = @ss.scan(/[\s]*\/[\s]*/))
|
126
|
+
action { [:SLASH, text] }
|
129
127
|
|
130
|
-
|
131
|
-
|
128
|
+
when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
|
129
|
+
action {[:UNICODE_RANGE, text] }
|
132
130
|
|
133
|
-
|
134
|
-
|
131
|
+
when (text = @ss.scan(/[\s]+/))
|
132
|
+
action { [:S, text] }
|
135
133
|
|
136
|
-
|
137
|
-
|
134
|
+
when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
|
135
|
+
action { [:STRING, text] }
|
138
136
|
|
139
|
-
|
140
|
-
|
141
|
-
raise ScanError, "can not match: '" + text + "'"
|
142
|
-
end # if
|
137
|
+
when (text = @ss.scan(/./))
|
138
|
+
action { [text, text] }
|
143
139
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
140
|
+
|
141
|
+
else
|
142
|
+
text = @ss.string[@ss.pos .. -1]
|
143
|
+
raise ScanError, "can not match: '" + text + "'"
|
144
|
+
end # if
|
145
|
+
|
146
|
+
else
|
147
|
+
raise ScanError, "undefined state: '" + state.to_s + "'"
|
148
|
+
end # case state
|
149
|
+
token
|
150
|
+
end # def _next_token
|
149
151
|
|
150
152
|
end # class
|
151
153
|
end
|
@@ -1,8 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module CSS
|
3
4
|
class XPathVisitor # :nodoc:
|
4
5
|
def visit_function node
|
5
|
-
|
6
6
|
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
7
|
return self.send(msg, node) if self.respond_to?(msg)
|
8
8
|
|
@@ -12,49 +12,51 @@ module Nokogiri
|
|
12
12
|
when /^self\(/
|
13
13
|
"self::#{node.value[1]}"
|
14
14
|
when /^eq\(/
|
15
|
-
"position()
|
15
|
+
"position()=#{node.value[1]}"
|
16
16
|
when /^(nth|nth-of-type)\(/
|
17
17
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
18
18
|
nth(node.value[1])
|
19
19
|
else
|
20
|
-
"position()
|
20
|
+
"position()=#{node.value[1]}"
|
21
21
|
end
|
22
22
|
when /^nth-child\(/
|
23
23
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
24
24
|
nth(node.value[1], :child => true)
|
25
25
|
else
|
26
|
-
"count(preceding-sibling::*)
|
26
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i-1}"
|
27
27
|
end
|
28
28
|
when /^nth-last-of-type\(/
|
29
29
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
30
30
|
nth(node.value[1], :last => true)
|
31
31
|
else
|
32
32
|
index = node.value[1].to_i - 1
|
33
|
-
index == 0 ? "position()
|
33
|
+
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
34
34
|
end
|
35
35
|
when /^nth-last-child\(/
|
36
36
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
37
37
|
nth(node.value[1], :last => true, :child => true)
|
38
38
|
else
|
39
|
-
"count(following-sibling::*)
|
39
|
+
"count(following-sibling::*)=#{node.value[1].to_i-1}"
|
40
40
|
end
|
41
41
|
when /^(first|first-of-type)\(/
|
42
|
-
"position()
|
42
|
+
"position()=1"
|
43
43
|
when /^(last|last-of-type)\(/
|
44
|
-
"position()
|
44
|
+
"position()=last()"
|
45
45
|
when /^contains\(/
|
46
|
-
"contains(
|
46
|
+
"contains(.,#{node.value[1]})"
|
47
47
|
when /^gt\(/
|
48
|
-
"position()
|
48
|
+
"position()>#{node.value[1]}"
|
49
49
|
when /^only-child\(/
|
50
|
-
"last()
|
50
|
+
"last()=1"
|
51
51
|
when /^comment\(/
|
52
52
|
"comment()"
|
53
53
|
when /^has\(/
|
54
|
-
|
54
|
+
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
55
|
+
".#{"//" if !is_direct}#{node.value[1].accept(self)}"
|
55
56
|
else
|
57
|
+
# non-standard. this looks like a function call.
|
56
58
|
args = ['.'] + node.value[1..-1]
|
57
|
-
"#{node.value.first}#{args.join(',
|
59
|
+
"#{node.value.first}#{args.join(',')})"
|
58
60
|
end
|
59
61
|
end
|
60
62
|
|
@@ -69,18 +71,18 @@ module Nokogiri
|
|
69
71
|
|
70
72
|
def visit_id node
|
71
73
|
node.value.first =~ /^#(.*)$/
|
72
|
-
"@id
|
74
|
+
"@id='#{$1}'"
|
73
75
|
end
|
74
76
|
|
75
77
|
def visit_attribute_condition node
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
78
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
|
79
|
+
''
|
80
|
+
else
|
81
|
+
'@'
|
82
|
+
end
|
81
83
|
attribute += node.value.first.accept(self)
|
82
84
|
|
83
|
-
#
|
85
|
+
# non-standard. attributes starting with '@'
|
84
86
|
attribute.gsub!(/^@@/, '@')
|
85
87
|
|
86
88
|
return attribute unless node.value.length == 3
|
@@ -88,29 +90,30 @@ module Nokogiri
|
|
88
90
|
value = node.value.last
|
89
91
|
value = "'#{value}'" if value !~ /^['"]/
|
90
92
|
|
93
|
+
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
91
94
|
if (value[0]==value[-1]) && %q{"'}.include?(value[0])
|
92
95
|
str_value = value[1..-2]
|
93
96
|
if str_value.include?(value[0])
|
94
|
-
value = 'concat("' + str_value.split('"', -1).join(%q{",
|
97
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
95
98
|
end
|
96
99
|
end
|
97
100
|
|
98
101
|
case node.value[1]
|
99
102
|
when :equal
|
100
|
-
attribute + "
|
103
|
+
attribute + "=" + "#{value}"
|
101
104
|
when :not_equal
|
102
|
-
attribute + "
|
105
|
+
attribute + "!=" + "#{value}"
|
103
106
|
when :substring_match
|
104
|
-
"contains(#{attribute}
|
107
|
+
"contains(#{attribute},#{value})"
|
105
108
|
when :prefix_match
|
106
|
-
"starts-with(#{attribute}
|
109
|
+
"starts-with(#{attribute},#{value})"
|
107
110
|
when :dash_match
|
108
|
-
"#{attribute}
|
111
|
+
"#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
|
109
112
|
when :includes
|
110
|
-
|
113
|
+
value = value[1..-2] # strip quotes
|
114
|
+
css_class(attribute, value)
|
111
115
|
when :suffix_match
|
112
|
-
"substring(#{attribute},
|
113
|
-
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
116
|
+
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
114
117
|
else
|
115
118
|
attribute + " #{node.value[1]} " + "#{value}"
|
116
119
|
end
|
@@ -124,14 +127,14 @@ module Nokogiri
|
|
124
127
|
return self.send(msg, node) if self.respond_to?(msg)
|
125
128
|
|
126
129
|
case node.value.first
|
127
|
-
when "first" then "position()
|
128
|
-
when "first-child" then "count(preceding-sibling::*)
|
129
|
-
when "last" then "position()
|
130
|
-
when "last-child" then "count(following-sibling::*)
|
131
|
-
when "first-of-type" then "position()
|
132
|
-
when "last-of-type" then "position()
|
133
|
-
when "only-child" then "count(preceding-sibling::*)
|
134
|
-
when "only-of-type" then "last()
|
130
|
+
when "first" then "position()=1"
|
131
|
+
when "first-child" then "count(preceding-sibling::*)=0"
|
132
|
+
when "last" then "position()=last()"
|
133
|
+
when "last-child" then "count(following-sibling::*)=0"
|
134
|
+
when "first-of-type" then "position()=1"
|
135
|
+
when "last-of-type" then "position()=last()"
|
136
|
+
when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
|
137
|
+
when "only-of-type" then "last()=1"
|
135
138
|
when "empty" then "not(node())"
|
136
139
|
when "parent" then "node()"
|
137
140
|
when "root" then "not(parent::*)"
|
@@ -142,7 +145,7 @@ module Nokogiri
|
|
142
145
|
end
|
143
146
|
|
144
147
|
def visit_class_condition node
|
145
|
-
"
|
148
|
+
css_class("@class", node.value.first)
|
146
149
|
end
|
147
150
|
|
148
151
|
def visit_combinator node
|
@@ -179,25 +182,26 @@ module Nokogiri
|
|
179
182
|
node.accept(self)
|
180
183
|
end
|
181
184
|
|
182
|
-
|
185
|
+
private
|
186
|
+
|
183
187
|
def nth node, options={}
|
184
188
|
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
185
189
|
|
186
190
|
a, b = read_a_and_positive_b node.value
|
187
191
|
position = if options[:child]
|
188
|
-
options[:last] ? "(count(following-sibling::*)
|
192
|
+
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
189
193
|
else
|
190
194
|
options[:last] ? "(last()-position()+1)" : "position()"
|
191
195
|
end
|
192
196
|
|
193
197
|
if b.zero?
|
194
|
-
"(#{position} mod #{a})
|
198
|
+
"(#{position} mod #{a})=0"
|
195
199
|
else
|
196
200
|
compare = a < 0 ? "<=" : ">="
|
197
201
|
if a.abs == 1
|
198
|
-
"#{position}
|
202
|
+
"#{position}#{compare}#{b}"
|
199
203
|
else
|
200
|
-
"(#{position}
|
204
|
+
"(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
|
201
205
|
end
|
202
206
|
end
|
203
207
|
end
|
@@ -225,6 +229,32 @@ module Nokogiri
|
|
225
229
|
end =~ /(nth|first|last|only)-of-type(\()?/
|
226
230
|
end
|
227
231
|
end
|
232
|
+
|
233
|
+
# use only ordinary xpath functions
|
234
|
+
def css_class_standard(hay, needle)
|
235
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
236
|
+
end
|
237
|
+
|
238
|
+
# use the builtin implementation
|
239
|
+
def css_class_builtin(hay, needle)
|
240
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
241
|
+
end
|
242
|
+
|
243
|
+
alias_method :css_class, :css_class_standard
|
244
|
+
end
|
245
|
+
|
246
|
+
class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
|
247
|
+
private
|
248
|
+
alias_method :css_class, :css_class_builtin
|
249
|
+
end
|
250
|
+
|
251
|
+
class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
|
252
|
+
private
|
253
|
+
if Nokogiri.uses_libxml?
|
254
|
+
alias_method :css_class, :css_class_builtin
|
255
|
+
else
|
256
|
+
alias_method :css_class, :css_class_standard
|
257
|
+
end
|
228
258
|
end
|
229
259
|
end
|
230
260
|
end
|
data/lib/nokogiri/css.rb
CHANGED
@@ -1,27 +1,28 @@
|
|
1
|
-
|
2
|
-
require 'nokogiri/css/xpath_visitor'
|
3
|
-
x = $-w
|
4
|
-
$-w = false
|
5
|
-
require 'nokogiri/css/parser'
|
6
|
-
$-w = x
|
7
|
-
|
8
|
-
require 'nokogiri/css/tokenizer'
|
9
|
-
require 'nokogiri/css/syntax_error'
|
10
|
-
|
1
|
+
# frozen_string_literal: true
|
11
2
|
module Nokogiri
|
12
3
|
module CSS
|
13
4
|
class << self
|
14
5
|
###
|
15
6
|
# Parse this CSS selector in +selector+. Returns an AST.
|
16
|
-
def parse
|
17
|
-
Parser.new.parse
|
7
|
+
def parse(selector)
|
8
|
+
Parser.new.parse(selector)
|
18
9
|
end
|
19
10
|
|
20
11
|
###
|
21
12
|
# Get the XPath for +selector+.
|
22
|
-
def xpath_for
|
23
|
-
Parser.new(options[:ns] || {}).xpath_for
|
13
|
+
def xpath_for(selector, options = {})
|
14
|
+
Parser.new(options[:ns] || {}).xpath_for(selector, options)
|
24
15
|
end
|
25
16
|
end
|
26
17
|
end
|
27
18
|
end
|
19
|
+
|
20
|
+
require_relative "css/node"
|
21
|
+
require_relative "css/xpath_visitor"
|
22
|
+
x = $-w
|
23
|
+
$-w = false
|
24
|
+
require_relative "css/parser"
|
25
|
+
$-w = x
|
26
|
+
|
27
|
+
require_relative "css/tokenizer"
|
28
|
+
require_relative "css/syntax_error"
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# load the C or Java extension
|
4
|
+
begin
|
5
|
+
# native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
|
6
|
+
::RUBY_VERSION =~ /(\d+\.\d+)/
|
7
|
+
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
8
|
+
rescue LoadError => e
|
9
|
+
if e.message =~ /GLIBC/
|
10
|
+
warn(<<~EOM)
|
11
|
+
|
12
|
+
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
|
13
|
+
|
14
|
+
#{e.message}
|
15
|
+
|
16
|
+
If that's the case, then please install Nokogiri via the `ruby` platform gem:
|
17
|
+
gem install nokogiri --platform=ruby
|
18
|
+
or:
|
19
|
+
bundle config set force_ruby_platform true
|
20
|
+
|
21
|
+
Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
|
22
|
+
|
23
|
+
EOM
|
24
|
+
raise e
|
25
|
+
end
|
26
|
+
|
27
|
+
# use "require" instead of "require_relative" because non-native gems will place C extension files
|
28
|
+
# in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
|
29
|
+
# is in $LOAD_PATH but not necessarily relative to this file (see #2300)
|
30
|
+
require "nokogiri/nokogiri"
|
31
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module Gumbo
|
4
|
+
# The default maximum number of attributes per element.
|
5
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
6
|
+
|
7
|
+
# The default maximum number of errors for parsing a document or a fragment.
|
8
|
+
DEFAULT_MAX_ERRORS = 0
|
9
|
+
|
10
|
+
# The default maximum depth of the DOM tree produced by parsing a document
|
11
|
+
# or fragment.
|
12
|
+
DEFAULT_MAX_TREE_DEPTH = 400
|
13
|
+
end
|
14
|
+
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,37 +1,42 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'nokogiri/html/document_fragment'
|
4
|
-
require 'nokogiri/html/sax/parser_context'
|
5
|
-
require 'nokogiri/html/sax/parser'
|
6
|
-
require 'nokogiri/html/sax/push_parser'
|
7
|
-
require 'nokogiri/html/element_description'
|
8
|
-
require 'nokogiri/html/element_description_defaults'
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative "html4"
|
9
3
|
|
10
4
|
module Nokogiri
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
5
|
+
HTML = Nokogiri::HTML4
|
6
|
+
|
7
|
+
# @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
8
|
+
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
9
|
+
# @!scope class
|
10
|
+
define_singleton_method(:HTML, Nokogiri.method(:HTML4))
|
18
11
|
|
12
|
+
# @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
|
13
|
+
# {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
|
14
|
+
# classes.
|
19
15
|
module HTML
|
20
|
-
class
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
16
|
+
# @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
|
17
|
+
class Document < Nokogiri::XML::Document
|
18
|
+
end
|
19
|
+
|
20
|
+
# @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
|
21
|
+
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
22
|
+
end
|
23
|
+
|
24
|
+
# @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
|
25
|
+
class Builder < Nokogiri::XML::Builder
|
26
|
+
end
|
27
|
+
|
28
|
+
module SAX
|
29
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
|
30
|
+
class Parser < Nokogiri::XML::SAX::Parser
|
25
31
|
end
|
26
32
|
|
27
|
-
|
28
|
-
|
29
|
-
def fragment string, encoding = nil
|
30
|
-
HTML::DocumentFragment.parse string, encoding
|
33
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
|
34
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
31
35
|
end
|
32
|
-
end
|
33
36
|
|
34
|
-
|
35
|
-
|
37
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
|
38
|
+
class PushParser
|
39
|
+
end
|
40
|
+
end
|
36
41
|
end
|
37
42
|
end
|