nokogiri 1.10.9 → 1.12.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +176 -96
- data/dependencies.yml +12 -12
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +716 -414
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +191 -89
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +267 -195
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +28 -17
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +60 -51
- data/ext/nokogiri/xml_node.c +493 -407
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +197 -172
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +105 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +96 -46
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +158 -73
- data/ext/nokogiri/xslt_stylesheet.c +158 -164
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +64 -63
- data/lib/nokogiri/css/parser.y +3 -3
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/css.rb +15 -14
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +32 -27
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +17 -30
- data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +215 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +41 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +138 -41
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node/save_options.rb +2 -1
- data/lib/nokogiri/xml/node.rb +629 -293
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +12 -3
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +9 -12
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax/document.rb +25 -30
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +4 -5
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xml.rb +36 -36
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/nokogiri/xslt.rb +17 -16
- data/lib/nokogiri.rb +32 -51
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- metadata +139 -161
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,8 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module CSS
|
3
4
|
class XPathVisitor # :nodoc:
|
4
5
|
def visit_function node
|
5
|
-
|
6
6
|
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
7
|
return self.send(msg, node) if self.respond_to?(msg)
|
8
8
|
|
@@ -12,49 +12,51 @@ module Nokogiri
|
|
12
12
|
when /^self\(/
|
13
13
|
"self::#{node.value[1]}"
|
14
14
|
when /^eq\(/
|
15
|
-
"position()
|
15
|
+
"position()=#{node.value[1]}"
|
16
16
|
when /^(nth|nth-of-type)\(/
|
17
17
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
18
18
|
nth(node.value[1])
|
19
19
|
else
|
20
|
-
"position()
|
20
|
+
"position()=#{node.value[1]}"
|
21
21
|
end
|
22
22
|
when /^nth-child\(/
|
23
23
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
24
24
|
nth(node.value[1], :child => true)
|
25
25
|
else
|
26
|
-
"count(preceding-sibling::*)
|
26
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i-1}"
|
27
27
|
end
|
28
28
|
when /^nth-last-of-type\(/
|
29
29
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
30
30
|
nth(node.value[1], :last => true)
|
31
31
|
else
|
32
32
|
index = node.value[1].to_i - 1
|
33
|
-
index == 0 ? "position()
|
33
|
+
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
34
34
|
end
|
35
35
|
when /^nth-last-child\(/
|
36
36
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
37
37
|
nth(node.value[1], :last => true, :child => true)
|
38
38
|
else
|
39
|
-
"count(following-sibling::*)
|
39
|
+
"count(following-sibling::*)=#{node.value[1].to_i-1}"
|
40
40
|
end
|
41
41
|
when /^(first|first-of-type)\(/
|
42
|
-
"position()
|
42
|
+
"position()=1"
|
43
43
|
when /^(last|last-of-type)\(/
|
44
|
-
"position()
|
44
|
+
"position()=last()"
|
45
45
|
when /^contains\(/
|
46
|
-
"contains(
|
46
|
+
"contains(.,#{node.value[1]})"
|
47
47
|
when /^gt\(/
|
48
|
-
"position()
|
48
|
+
"position()>#{node.value[1]}"
|
49
49
|
when /^only-child\(/
|
50
|
-
"last()
|
50
|
+
"last()=1"
|
51
51
|
when /^comment\(/
|
52
52
|
"comment()"
|
53
53
|
when /^has\(/
|
54
|
-
|
54
|
+
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
55
|
+
".#{"//" if !is_direct}#{node.value[1].accept(self)}"
|
55
56
|
else
|
57
|
+
# non-standard. this looks like a function call.
|
56
58
|
args = ['.'] + node.value[1..-1]
|
57
|
-
"#{node.value.first}#{args.join(',
|
59
|
+
"#{node.value.first}#{args.join(',')})"
|
58
60
|
end
|
59
61
|
end
|
60
62
|
|
@@ -69,18 +71,18 @@ module Nokogiri
|
|
69
71
|
|
70
72
|
def visit_id node
|
71
73
|
node.value.first =~ /^#(.*)$/
|
72
|
-
"@id
|
74
|
+
"@id='#{$1}'"
|
73
75
|
end
|
74
76
|
|
75
77
|
def visit_attribute_condition node
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
78
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
|
79
|
+
''
|
80
|
+
else
|
81
|
+
'@'
|
82
|
+
end
|
81
83
|
attribute += node.value.first.accept(self)
|
82
84
|
|
83
|
-
#
|
85
|
+
# non-standard. attributes starting with '@'
|
84
86
|
attribute.gsub!(/^@@/, '@')
|
85
87
|
|
86
88
|
return attribute unless node.value.length == 3
|
@@ -88,29 +90,30 @@ module Nokogiri
|
|
88
90
|
value = node.value.last
|
89
91
|
value = "'#{value}'" if value !~ /^['"]/
|
90
92
|
|
93
|
+
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
91
94
|
if (value[0]==value[-1]) && %q{"'}.include?(value[0])
|
92
95
|
str_value = value[1..-2]
|
93
96
|
if str_value.include?(value[0])
|
94
|
-
value = 'concat("' + str_value.split('"', -1).join(%q{",
|
97
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
95
98
|
end
|
96
99
|
end
|
97
100
|
|
98
101
|
case node.value[1]
|
99
102
|
when :equal
|
100
|
-
attribute + "
|
103
|
+
attribute + "=" + "#{value}"
|
101
104
|
when :not_equal
|
102
|
-
attribute + "
|
105
|
+
attribute + "!=" + "#{value}"
|
103
106
|
when :substring_match
|
104
|
-
"contains(#{attribute}
|
107
|
+
"contains(#{attribute},#{value})"
|
105
108
|
when :prefix_match
|
106
|
-
"starts-with(#{attribute}
|
109
|
+
"starts-with(#{attribute},#{value})"
|
107
110
|
when :dash_match
|
108
|
-
"#{attribute}
|
111
|
+
"#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
|
109
112
|
when :includes
|
110
|
-
|
113
|
+
value = value[1..-2] # strip quotes
|
114
|
+
css_class(attribute, value)
|
111
115
|
when :suffix_match
|
112
|
-
"substring(#{attribute},
|
113
|
-
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
116
|
+
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
114
117
|
else
|
115
118
|
attribute + " #{node.value[1]} " + "#{value}"
|
116
119
|
end
|
@@ -124,14 +127,14 @@ module Nokogiri
|
|
124
127
|
return self.send(msg, node) if self.respond_to?(msg)
|
125
128
|
|
126
129
|
case node.value.first
|
127
|
-
when "first" then "position()
|
128
|
-
when "first-child" then "count(preceding-sibling::*)
|
129
|
-
when "last" then "position()
|
130
|
-
when "last-child" then "count(following-sibling::*)
|
131
|
-
when "first-of-type" then "position()
|
132
|
-
when "last-of-type" then "position()
|
133
|
-
when "only-child" then "count(preceding-sibling::*)
|
134
|
-
when "only-of-type" then "last()
|
130
|
+
when "first" then "position()=1"
|
131
|
+
when "first-child" then "count(preceding-sibling::*)=0"
|
132
|
+
when "last" then "position()=last()"
|
133
|
+
when "last-child" then "count(following-sibling::*)=0"
|
134
|
+
when "first-of-type" then "position()=1"
|
135
|
+
when "last-of-type" then "position()=last()"
|
136
|
+
when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
|
137
|
+
when "only-of-type" then "last()=1"
|
135
138
|
when "empty" then "not(node())"
|
136
139
|
when "parent" then "node()"
|
137
140
|
when "root" then "not(parent::*)"
|
@@ -142,7 +145,7 @@ module Nokogiri
|
|
142
145
|
end
|
143
146
|
|
144
147
|
def visit_class_condition node
|
145
|
-
"
|
148
|
+
css_class("@class", node.value.first)
|
146
149
|
end
|
147
150
|
|
148
151
|
def visit_combinator node
|
@@ -179,25 +182,26 @@ module Nokogiri
|
|
179
182
|
node.accept(self)
|
180
183
|
end
|
181
184
|
|
182
|
-
|
185
|
+
private
|
186
|
+
|
183
187
|
def nth node, options={}
|
184
188
|
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
185
189
|
|
186
190
|
a, b = read_a_and_positive_b node.value
|
187
191
|
position = if options[:child]
|
188
|
-
options[:last] ? "(count(following-sibling::*)
|
192
|
+
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
189
193
|
else
|
190
194
|
options[:last] ? "(last()-position()+1)" : "position()"
|
191
195
|
end
|
192
196
|
|
193
197
|
if b.zero?
|
194
|
-
"(#{position} mod #{a})
|
198
|
+
"(#{position} mod #{a})=0"
|
195
199
|
else
|
196
200
|
compare = a < 0 ? "<=" : ">="
|
197
201
|
if a.abs == 1
|
198
|
-
"#{position}
|
202
|
+
"#{position}#{compare}#{b}"
|
199
203
|
else
|
200
|
-
"(#{position}
|
204
|
+
"(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
|
201
205
|
end
|
202
206
|
end
|
203
207
|
end
|
@@ -225,6 +229,32 @@ module Nokogiri
|
|
225
229
|
end =~ /(nth|first|last|only)-of-type(\()?/
|
226
230
|
end
|
227
231
|
end
|
232
|
+
|
233
|
+
# use only ordinary xpath functions
|
234
|
+
def css_class_standard(hay, needle)
|
235
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
236
|
+
end
|
237
|
+
|
238
|
+
# use the builtin implementation
|
239
|
+
def css_class_builtin(hay, needle)
|
240
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
241
|
+
end
|
242
|
+
|
243
|
+
alias_method :css_class, :css_class_standard
|
244
|
+
end
|
245
|
+
|
246
|
+
class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
|
247
|
+
private
|
248
|
+
alias_method :css_class, :css_class_builtin
|
249
|
+
end
|
250
|
+
|
251
|
+
class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
|
252
|
+
private
|
253
|
+
if Nokogiri.uses_libxml?
|
254
|
+
alias_method :css_class, :css_class_builtin
|
255
|
+
else
|
256
|
+
alias_method :css_class, :css_class_standard
|
257
|
+
end
|
228
258
|
end
|
229
259
|
end
|
230
260
|
end
|
data/lib/nokogiri/css.rb
CHANGED
@@ -1,27 +1,28 @@
|
|
1
|
-
|
2
|
-
require 'nokogiri/css/xpath_visitor'
|
3
|
-
x = $-w
|
4
|
-
$-w = false
|
5
|
-
require 'nokogiri/css/parser'
|
6
|
-
$-w = x
|
7
|
-
|
8
|
-
require 'nokogiri/css/tokenizer'
|
9
|
-
require 'nokogiri/css/syntax_error'
|
10
|
-
|
1
|
+
# frozen_string_literal: true
|
11
2
|
module Nokogiri
|
12
3
|
module CSS
|
13
4
|
class << self
|
14
5
|
###
|
15
6
|
# Parse this CSS selector in +selector+. Returns an AST.
|
16
|
-
def parse
|
17
|
-
Parser.new.parse
|
7
|
+
def parse(selector)
|
8
|
+
Parser.new.parse(selector)
|
18
9
|
end
|
19
10
|
|
20
11
|
###
|
21
12
|
# Get the XPath for +selector+.
|
22
|
-
def xpath_for
|
23
|
-
Parser.new(options[:ns] || {}).xpath_for
|
13
|
+
def xpath_for(selector, options = {})
|
14
|
+
Parser.new(options[:ns] || {}).xpath_for(selector, options)
|
24
15
|
end
|
25
16
|
end
|
26
17
|
end
|
27
18
|
end
|
19
|
+
|
20
|
+
require_relative "css/node"
|
21
|
+
require_relative "css/xpath_visitor"
|
22
|
+
x = $-w
|
23
|
+
$-w = false
|
24
|
+
require_relative "css/parser"
|
25
|
+
$-w = x
|
26
|
+
|
27
|
+
require_relative "css/tokenizer"
|
28
|
+
require_relative "css/syntax_error"
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# load the C or Java extension
|
4
|
+
begin
|
5
|
+
# native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
|
6
|
+
::RUBY_VERSION =~ /(\d+\.\d+)/
|
7
|
+
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
8
|
+
rescue LoadError => e
|
9
|
+
if e.message =~ /GLIBC/
|
10
|
+
warn(<<~EOM)
|
11
|
+
|
12
|
+
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
|
13
|
+
|
14
|
+
#{e.message}
|
15
|
+
|
16
|
+
If that's the case, then please install Nokogiri via the `ruby` platform gem:
|
17
|
+
gem install nokogiri --platform=ruby
|
18
|
+
or:
|
19
|
+
bundle config set force_ruby_platform true
|
20
|
+
|
21
|
+
Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
|
22
|
+
|
23
|
+
EOM
|
24
|
+
raise e
|
25
|
+
end
|
26
|
+
|
27
|
+
# use "require" instead of "require_relative" because non-native gems will place C extension files
|
28
|
+
# in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
|
29
|
+
# is in $LOAD_PATH but not necessarily relative to this file (see #2300)
|
30
|
+
require "nokogiri/nokogiri"
|
31
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module Gumbo
|
4
|
+
# The default maximum number of attributes per element.
|
5
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
6
|
+
|
7
|
+
# The default maximum number of errors for parsing a document or a fragment.
|
8
|
+
DEFAULT_MAX_ERRORS = 0
|
9
|
+
|
10
|
+
# The default maximum depth of the DOM tree produced by parsing a document
|
11
|
+
# or fragment.
|
12
|
+
DEFAULT_MAX_TREE_DEPTH = 400
|
13
|
+
end
|
14
|
+
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,37 +1,42 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'nokogiri/html/document_fragment'
|
4
|
-
require 'nokogiri/html/sax/parser_context'
|
5
|
-
require 'nokogiri/html/sax/parser'
|
6
|
-
require 'nokogiri/html/sax/push_parser'
|
7
|
-
require 'nokogiri/html/element_description'
|
8
|
-
require 'nokogiri/html/element_description_defaults'
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative "html4"
|
9
3
|
|
10
4
|
module Nokogiri
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
5
|
+
HTML = Nokogiri::HTML4
|
6
|
+
|
7
|
+
# @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
8
|
+
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
9
|
+
# @!scope class
|
10
|
+
define_singleton_method(:HTML, Nokogiri.method(:HTML4))
|
18
11
|
|
12
|
+
# @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
|
13
|
+
# {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
|
14
|
+
# classes.
|
19
15
|
module HTML
|
20
|
-
class
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
16
|
+
# @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
|
17
|
+
class Document < Nokogiri::XML::Document
|
18
|
+
end
|
19
|
+
|
20
|
+
# @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
|
21
|
+
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
22
|
+
end
|
23
|
+
|
24
|
+
# @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
|
25
|
+
class Builder < Nokogiri::XML::Builder
|
26
|
+
end
|
27
|
+
|
28
|
+
module SAX
|
29
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
|
30
|
+
class Parser < Nokogiri::XML::SAX::Parser
|
25
31
|
end
|
26
32
|
|
27
|
-
|
28
|
-
|
29
|
-
def fragment string, encoding = nil
|
30
|
-
HTML::DocumentFragment.parse string, encoding
|
33
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
|
34
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
31
35
|
end
|
32
|
-
end
|
33
36
|
|
34
|
-
|
35
|
-
|
37
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
|
38
|
+
class PushParser
|
39
|
+
end
|
40
|
+
end
|
36
41
|
end
|
37
42
|
end
|
@@ -1,5 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
|
-
module
|
3
|
+
module HTML4
|
3
4
|
###
|
4
5
|
# Nokogiri HTML builder is used for building HTML documents. It is very
|
5
6
|
# similar to the Nokogiri::XML::Builder. In fact, you should go read the
|
@@ -11,7 +12,7 @@ module Nokogiri
|
|
11
12
|
# Create an HTML document with a body that has an onload attribute, and a
|
12
13
|
# span tag with a class of "bold" that has content of "Hello world".
|
13
14
|
#
|
14
|
-
# builder = Nokogiri::
|
15
|
+
# builder = Nokogiri::HTML4::Builder.new do |doc|
|
15
16
|
# doc.html {
|
16
17
|
# doc.body(:onload => 'some_func();') {
|
17
18
|
# doc.span.bold {
|
@@ -1,5 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
|
1
5
|
module Nokogiri
|
2
|
-
module
|
6
|
+
module HTML4
|
3
7
|
class Document < Nokogiri::XML::Document
|
4
8
|
###
|
5
9
|
# Get the meta tag encoding for this document. If there is no meta tag,
|
@@ -160,11 +164,12 @@ module Nokogiri
|
|
160
164
|
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
161
165
|
# Nokogiri::XML::ParseOptions.
|
162
166
|
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
|
163
|
-
|
164
167
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
165
|
-
|
168
|
+
|
166
169
|
yield options if block_given?
|
167
170
|
|
171
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
172
|
+
|
168
173
|
if string_or_io.respond_to?(:encoding)
|
169
174
|
unless string_or_io.encoding.name == "ASCII-8BIT"
|
170
175
|
encoding ||= string_or_io.encoding.name
|
@@ -172,7 +177,12 @@ module Nokogiri
|
|
172
177
|
end
|
173
178
|
|
174
179
|
if string_or_io.respond_to?(:read)
|
175
|
-
|
180
|
+
if string_or_io.is_a?(Pathname)
|
181
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
182
|
+
string_or_io = string_or_io.expand_path.open
|
183
|
+
url ||= string_or_io.path
|
184
|
+
end
|
185
|
+
|
176
186
|
unless encoding
|
177
187
|
# Libxml2's parser has poor support for encoding
|
178
188
|
# detection. First, it does not recognize the HTML5
|
@@ -251,9 +261,6 @@ module Nokogiri
|
|
251
261
|
end
|
252
262
|
|
253
263
|
def self.detect_encoding(chunk)
|
254
|
-
if Nokogiri.jruby? && EncodingReader.is_jruby_without_fix?
|
255
|
-
return EncodingReader.detect_encoding_for_jruby_without_fix(chunk)
|
256
|
-
end
|
257
264
|
m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
|
258
265
|
return Nokogiri.XML(m[1]).encoding
|
259
266
|
|
@@ -261,37 +268,17 @@ module Nokogiri
|
|
261
268
|
m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
|
262
269
|
return m[4]
|
263
270
|
catch(:encoding_found) {
|
264
|
-
Nokogiri::
|
271
|
+
Nokogiri::HTML4::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
|
265
272
|
nil
|
266
273
|
}
|
267
274
|
else
|
268
275
|
handler = SAXHandler.new
|
269
|
-
parser = Nokogiri::
|
276
|
+
parser = Nokogiri::HTML4::SAX::PushParser.new(handler)
|
270
277
|
parser << chunk rescue Nokogiri::SyntaxError
|
271
278
|
handler.encoding
|
272
279
|
end
|
273
280
|
end
|
274
281
|
|
275
|
-
def self.is_jruby_without_fix?
|
276
|
-
JRUBY_VERSION.split('.').join.to_i < 165
|
277
|
-
end
|
278
|
-
|
279
|
-
def self.detect_encoding_for_jruby_without_fix(chunk)
|
280
|
-
m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
|
281
|
-
return Nokogiri.XML(m[1]).encoding
|
282
|
-
|
283
|
-
m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
|
284
|
-
return m[4]
|
285
|
-
|
286
|
-
catch(:encoding_found) {
|
287
|
-
Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found.to_s)).parse(chunk)
|
288
|
-
nil
|
289
|
-
}
|
290
|
-
rescue Nokogiri::SyntaxError, RuntimeError
|
291
|
-
# Ignore parser errors that nokogiri may raise
|
292
|
-
nil
|
293
|
-
end
|
294
|
-
|
295
282
|
def initialize(io)
|
296
283
|
@io = io
|
297
284
|
@firstchunk = nil
|
@@ -299,7 +286,7 @@ module Nokogiri
|
|
299
286
|
end
|
300
287
|
|
301
288
|
# This method is used by the C extension so that
|
302
|
-
# Nokogiri::
|
289
|
+
# Nokogiri::HTML4::Document#read_io() does not leak memory when
|
303
290
|
# EncodingFound is raised.
|
304
291
|
attr_reader :encoding_found
|
305
292
|
|
@@ -1,28 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
|
-
module
|
3
|
+
module HTML4
|
3
4
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
4
5
|
####
|
5
6
|
# Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
|
6
|
-
def self.parse
|
7
|
-
doc =
|
7
|
+
def self.parse(tags, encoding = nil)
|
8
|
+
doc = HTML4::Document.new
|
8
9
|
|
9
10
|
encoding ||= if tags.respond_to?(:encoding)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
11
|
+
encoding = tags.encoding
|
12
|
+
if encoding == ::Encoding::ASCII_8BIT
|
13
|
+
'UTF-8'
|
14
|
+
else
|
15
|
+
encoding.name
|
16
|
+
end
|
17
|
+
else
|
18
|
+
'UTF-8'
|
19
|
+
end
|
19
20
|
|
20
21
|
doc.encoding = encoding
|
21
22
|
|
22
23
|
new(doc, tags)
|
23
24
|
end
|
24
25
|
|
25
|
-
def initialize
|
26
|
+
def initialize(document, tags = nil, ctx = nil)
|
26
27
|
return self unless tags
|
27
28
|
|
28
29
|
if ctx
|
@@ -32,13 +33,13 @@ module Nokogiri
|
|
32
33
|
self.errors = document.errors - preexisting_errors
|
33
34
|
else
|
34
35
|
# This is a horrible hack, but I don't care
|
35
|
-
|
36
|
-
|
36
|
+
path = if /^\s*?<body/i.match?(tags)
|
37
|
+
"/html/body"
|
37
38
|
else
|
38
|
-
|
39
|
+
"/html/body/node()"
|
39
40
|
end
|
40
41
|
|
41
|
-
temp_doc =
|
42
|
+
temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding)
|
42
43
|
temp_doc.xpath(path).each { |child| child.parent = self }
|
43
44
|
self.errors = temp_doc.errors
|
44
45
|
end
|