nokogiri 1.10.7 → 1.16.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +42 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +188 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +862 -421
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +222 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +39 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +408 -243
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1343 -674
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +305 -213
- data/ext/nokogiri/xml_relax_ng.c +87 -78
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +149 -103
- data/ext/nokogiri/xml_sax_push_parser.c +65 -37
- data/ext/nokogiri/xml_schema.c +138 -82
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +35 -26
- data/ext/nokogiri/xml_xpath_context.c +363 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +126 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +5 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +205 -96
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +326 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +75 -34
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -127
- data/lib/nokogiri/xml/document_fragment.rb +93 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1096 -419
- data/lib/nokogiri/xml/node_set.rb +137 -61
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +7 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +39 -38
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +121 -291
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
module Nokogiri
|
2
5
|
module XML
|
3
6
|
####
|
@@ -11,10 +14,10 @@ module Nokogiri
|
|
11
14
|
# The Document this NodeSet is associated with
|
12
15
|
attr_accessor :document
|
13
16
|
|
14
|
-
|
17
|
+
alias_method :clone, :dup
|
15
18
|
|
16
19
|
# Create a NodeSet with +document+ defaulting to +list+
|
17
|
-
def initialize
|
20
|
+
def initialize(document, list = [])
|
18
21
|
@document = document
|
19
22
|
document.decorate(self)
|
20
23
|
list.each { |x| self << x }
|
@@ -23,8 +26,9 @@ module Nokogiri
|
|
23
26
|
|
24
27
|
###
|
25
28
|
# Get the first element of the NodeSet.
|
26
|
-
def first
|
29
|
+
def first(n = nil)
|
27
30
|
return self[0] unless n
|
31
|
+
|
28
32
|
list = []
|
29
33
|
[n, length].min.times { |i| list << self[i] }
|
30
34
|
list
|
@@ -46,7 +50,7 @@ module Nokogiri
|
|
46
50
|
# Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
|
47
51
|
def index(node = nil)
|
48
52
|
if node
|
49
|
-
warn
|
53
|
+
warn("given block not used") if block_given?
|
50
54
|
each_with_index { |member, j| return j if member == node }
|
51
55
|
elsif block_given?
|
52
56
|
each_with_index { |member, j| return j if yield(member) }
|
@@ -56,18 +60,18 @@ module Nokogiri
|
|
56
60
|
|
57
61
|
###
|
58
62
|
# Insert +datum+ before the first Node in this NodeSet
|
59
|
-
def before
|
60
|
-
first.before
|
63
|
+
def before(datum)
|
64
|
+
first.before(datum)
|
61
65
|
end
|
62
66
|
|
63
67
|
###
|
64
68
|
# Insert +datum+ after the last Node in this NodeSet
|
65
|
-
def after
|
66
|
-
last.after
|
69
|
+
def after(datum)
|
70
|
+
last.after(datum)
|
67
71
|
end
|
68
72
|
|
69
|
-
|
70
|
-
|
73
|
+
alias_method :<<, :push
|
74
|
+
alias_method :remove, :unlink
|
71
75
|
|
72
76
|
###
|
73
77
|
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
@@ -76,7 +80,7 @@ module Nokogiri
|
|
76
80
|
# selectors. For example:
|
77
81
|
#
|
78
82
|
# For more information see Nokogiri::XML::Searchable#css
|
79
|
-
def css
|
83
|
+
def css(*args)
|
80
84
|
rules, handler, ns, _ = extract_params(args)
|
81
85
|
paths = css_rules_to_xpath(rules, ns)
|
82
86
|
|
@@ -92,7 +96,7 @@ module Nokogiri
|
|
92
96
|
# queries.
|
93
97
|
#
|
94
98
|
# For more information see Nokogiri::XML::Searchable#xpath
|
95
|
-
def xpath
|
99
|
+
def xpath(*args)
|
96
100
|
paths, handler, ns, binds = extract_params(args)
|
97
101
|
|
98
102
|
inject(NodeSet.new(document)) do |set, node|
|
@@ -100,13 +104,6 @@ module Nokogiri
|
|
100
104
|
end
|
101
105
|
end
|
102
106
|
|
103
|
-
###
|
104
|
-
# Search this NodeSet's nodes' immediate children using CSS selector +selector+
|
105
|
-
def > selector
|
106
|
-
ns = document.root.namespaces
|
107
|
-
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
108
|
-
end
|
109
|
-
|
110
107
|
###
|
111
108
|
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
112
109
|
#
|
@@ -119,18 +116,18 @@ module Nokogiri
|
|
119
116
|
#
|
120
117
|
# node_set.at(3) # same as node_set[3]
|
121
118
|
#
|
122
|
-
def at
|
119
|
+
def at(*args)
|
123
120
|
if args.length == 1 && args.first.is_a?(Numeric)
|
124
121
|
return self[args.first]
|
125
122
|
end
|
126
123
|
|
127
124
|
super(*args)
|
128
125
|
end
|
129
|
-
|
126
|
+
alias_method :%, :at
|
130
127
|
|
131
128
|
###
|
132
129
|
# Filter this list for nodes that match +expr+
|
133
|
-
def filter
|
130
|
+
def filter(expr)
|
134
131
|
find_all { |node| node.matches?(expr) }
|
135
132
|
end
|
136
133
|
|
@@ -139,7 +136,7 @@ module Nokogiri
|
|
139
136
|
# NodeSet.
|
140
137
|
#
|
141
138
|
# See Nokogiri::XML::Node#add_class for more information.
|
142
|
-
def add_class
|
139
|
+
def add_class(name)
|
143
140
|
each do |el|
|
144
141
|
el.add_class(name)
|
145
142
|
end
|
@@ -151,7 +148,7 @@ module Nokogiri
|
|
151
148
|
# NodeSet.
|
152
149
|
#
|
153
150
|
# See Nokogiri::XML::Node#append_class for more information.
|
154
|
-
def append_class
|
151
|
+
def append_class(name)
|
155
152
|
each do |el|
|
156
153
|
el.append_class(name)
|
157
154
|
end
|
@@ -163,7 +160,7 @@ module Nokogiri
|
|
163
160
|
# NodeSet.
|
164
161
|
#
|
165
162
|
# See Nokogiri::XML::Node#remove_class for more information.
|
166
|
-
def remove_class
|
163
|
+
def remove_class(name = nil)
|
167
164
|
each do |el|
|
168
165
|
el.remove_class(name)
|
169
166
|
end
|
@@ -203,31 +200,31 @@ module Nokogiri
|
|
203
200
|
#
|
204
201
|
# node_set.attr("class") { |node| node.name }
|
205
202
|
#
|
206
|
-
def attr
|
203
|
+
def attr(key, value = nil, &block)
|
207
204
|
unless key.is_a?(Hash) || (key && (value || block))
|
208
|
-
return first
|
205
|
+
return first&.attribute(key)
|
209
206
|
end
|
210
207
|
|
211
208
|
hash = key.is_a?(Hash) ? key : { key => value }
|
212
209
|
|
213
|
-
hash.each do |k,v|
|
210
|
+
hash.each do |k, v|
|
214
211
|
each do |node|
|
215
|
-
node[k] = v ||
|
212
|
+
node[k] = v || yield(node)
|
216
213
|
end
|
217
214
|
end
|
218
215
|
|
219
216
|
self
|
220
217
|
end
|
221
|
-
|
222
|
-
|
218
|
+
alias_method :set, :attr
|
219
|
+
alias_method :attribute, :attr
|
223
220
|
|
224
221
|
###
|
225
222
|
# Remove the attributed named +name+ from all Node objects in the NodeSet
|
226
|
-
def remove_attr
|
227
|
-
each { |el| el.delete
|
223
|
+
def remove_attr(name)
|
224
|
+
each { |el| el.delete(name) }
|
228
225
|
self
|
229
226
|
end
|
230
|
-
|
227
|
+
alias_method :remove_attribute, :remove_attr
|
231
228
|
|
232
229
|
###
|
233
230
|
# Iterate over each node, yielding to +block+
|
@@ -254,20 +251,83 @@ module Nokogiri
|
|
254
251
|
#
|
255
252
|
# See Nokogiri::XML::Node#content for more information.
|
256
253
|
def inner_text
|
257
|
-
collect(&:inner_text).join(
|
254
|
+
collect(&:inner_text).join("")
|
258
255
|
end
|
259
|
-
|
256
|
+
alias_method :text, :inner_text
|
260
257
|
|
261
258
|
###
|
262
259
|
# Get the inner html of all contained Node objects
|
263
|
-
def inner_html
|
264
|
-
collect{|j| j.inner_html(*args) }.join(
|
260
|
+
def inner_html(*args)
|
261
|
+
collect { |j| j.inner_html(*args) }.join("")
|
265
262
|
end
|
266
263
|
|
267
|
-
|
268
|
-
#
|
269
|
-
|
270
|
-
|
264
|
+
# :call-seq:
|
265
|
+
# wrap(markup) -> self
|
266
|
+
# wrap(node) -> self
|
267
|
+
#
|
268
|
+
# Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+.
|
269
|
+
#
|
270
|
+
# [Parameters]
|
271
|
+
# - *markup* (String)
|
272
|
+
# Markup that is parsed, once per member of the NodeSet, and used as the wrapper. Each
|
273
|
+
# node's parent, if it exists, is used as the context node for parsing; otherwise the
|
274
|
+
# associated document is used. If the parsed fragment has multiple roots, the first root
|
275
|
+
# node is used as the wrapper.
|
276
|
+
# - *node* (Nokogiri::XML::Node)
|
277
|
+
# An element that is `#dup`ed and used as the wrapper.
|
278
|
+
#
|
279
|
+
# [Returns] +self+, to support chaining.
|
280
|
+
#
|
281
|
+
# ⚠ Note that if a +String+ is passed, the markup will be parsed <b>once per node</b> in the
|
282
|
+
# NodeSet. You can avoid this overhead in cases where you know exactly the wrapper you wish to
|
283
|
+
# use by passing a +Node+ instead.
|
284
|
+
#
|
285
|
+
# Also see Node#wrap
|
286
|
+
#
|
287
|
+
# *Example* with a +String+ argument:
|
288
|
+
#
|
289
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
290
|
+
# <html><body>
|
291
|
+
# <a>a</a>
|
292
|
+
# <a>b</a>
|
293
|
+
# <a>c</a>
|
294
|
+
# <a>d</a>
|
295
|
+
# </body></html>
|
296
|
+
# HTML
|
297
|
+
# doc.css("a").wrap("<div></div>")
|
298
|
+
# doc.to_html
|
299
|
+
# # => <html><head></head><body>
|
300
|
+
# # <div><a>a</a></div>
|
301
|
+
# # <div><a>b</a></div>
|
302
|
+
# # <div><a>c</a></div>
|
303
|
+
# # <div><a>d</a></div>
|
304
|
+
# # </body></html>
|
305
|
+
#
|
306
|
+
# *Example* with a +Node+ argument
|
307
|
+
#
|
308
|
+
# 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids
|
309
|
+
# having to reparse the wrapper markup for each node.
|
310
|
+
#
|
311
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
312
|
+
# <html><body>
|
313
|
+
# <a>a</a>
|
314
|
+
# <a>b</a>
|
315
|
+
# <a>c</a>
|
316
|
+
# <a>d</a>
|
317
|
+
# </body></html>
|
318
|
+
# HTML
|
319
|
+
# doc.css("a").wrap(doc.create_element("div"))
|
320
|
+
# doc.to_html
|
321
|
+
# # => <html><head></head><body>
|
322
|
+
# # <div><a>a</a></div>
|
323
|
+
# # <div><a>b</a></div>
|
324
|
+
# # <div><a>c</a></div>
|
325
|
+
# # <div><a>d</a></div>
|
326
|
+
# # </body></html>
|
327
|
+
#
|
328
|
+
def wrap(node_or_tags)
|
329
|
+
map { |node| node.wrap(node_or_tags) }
|
330
|
+
self
|
271
331
|
end
|
272
332
|
|
273
333
|
###
|
@@ -278,55 +338,62 @@ module Nokogiri
|
|
278
338
|
|
279
339
|
###
|
280
340
|
# Convert this NodeSet to HTML
|
281
|
-
def to_html
|
341
|
+
def to_html(*args)
|
282
342
|
if Nokogiri.jruby?
|
283
343
|
options = args.first.is_a?(Hash) ? args.shift : {}
|
284
|
-
|
285
|
-
options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
|
286
|
-
end
|
344
|
+
options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
|
287
345
|
args.insert(0, options)
|
288
346
|
end
|
289
|
-
|
347
|
+
if empty?
|
348
|
+
encoding = (args.first.is_a?(Hash) ? args.first[:encoding] : nil)
|
349
|
+
encoding ||= document.encoding
|
350
|
+
encoding.nil? ? "" : "".encode(encoding)
|
351
|
+
else
|
352
|
+
map { |x| x.to_html(*args) }.join
|
353
|
+
end
|
290
354
|
end
|
291
355
|
|
292
356
|
###
|
293
357
|
# Convert this NodeSet to XHTML
|
294
|
-
def to_xhtml
|
358
|
+
def to_xhtml(*args)
|
295
359
|
map { |x| x.to_xhtml(*args) }.join
|
296
360
|
end
|
297
361
|
|
298
362
|
###
|
299
363
|
# Convert this NodeSet to XML
|
300
|
-
def to_xml
|
364
|
+
def to_xml(*args)
|
301
365
|
map { |x| x.to_xml(*args) }.join
|
302
366
|
end
|
303
367
|
|
304
|
-
|
305
|
-
|
368
|
+
alias_method :size, :length
|
369
|
+
alias_method :to_ary, :to_a
|
306
370
|
|
307
371
|
###
|
308
372
|
# Removes the last element from set and returns it, or +nil+ if
|
309
373
|
# the set is empty
|
310
374
|
def pop
|
311
|
-
return
|
312
|
-
|
375
|
+
return if length == 0
|
376
|
+
|
377
|
+
delete(last)
|
313
378
|
end
|
314
379
|
|
315
380
|
###
|
316
381
|
# Returns the first element of the NodeSet and removes it. Returns
|
317
382
|
# +nil+ if the set is empty.
|
318
383
|
def shift
|
319
|
-
return
|
320
|
-
|
384
|
+
return if length == 0
|
385
|
+
|
386
|
+
delete(first)
|
321
387
|
end
|
322
388
|
|
323
389
|
###
|
324
390
|
# Equality -- Two NodeSets are equal if the contain the same number
|
325
391
|
# of elements and if each element is equal to the corresponding
|
326
392
|
# element in the other NodeSet
|
327
|
-
def ==
|
393
|
+
def ==(other)
|
328
394
|
return false unless other.is_a?(Nokogiri::XML::NodeSet)
|
329
395
|
return false unless length == other.length
|
396
|
+
|
330
397
|
each_with_index do |node, i|
|
331
398
|
return false unless node == other[i]
|
332
399
|
end
|
@@ -350,7 +417,7 @@ module Nokogiri
|
|
350
417
|
def reverse
|
351
418
|
node_set = NodeSet.new(document)
|
352
419
|
(length - 1).downto(0) do |x|
|
353
|
-
node_set.push
|
420
|
+
node_set.push(self[x])
|
354
421
|
end
|
355
422
|
node_set
|
356
423
|
end
|
@@ -358,14 +425,23 @@ module Nokogiri
|
|
358
425
|
###
|
359
426
|
# Return a nicely formated string representation
|
360
427
|
def inspect
|
361
|
-
"[#{map(&:inspect).join
|
428
|
+
"[#{map(&:inspect).join(", ")}]"
|
362
429
|
end
|
363
430
|
|
364
|
-
|
431
|
+
alias_method :+, :|
|
365
432
|
|
366
|
-
#
|
367
|
-
|
433
|
+
#
|
434
|
+
# :call-seq: deconstruct() → Array
|
435
|
+
#
|
436
|
+
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
437
|
+
#
|
438
|
+
# Since v1.14.0
|
439
|
+
#
|
440
|
+
def deconstruct
|
441
|
+
to_a
|
442
|
+
end
|
368
443
|
|
444
|
+
IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
|
369
445
|
end
|
370
446
|
end
|
371
447
|
end
|
@@ -1,6 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
5
|
+
# Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
|
3
6
|
class Notation < Struct.new(:name, :public_id, :system_id)
|
7
|
+
# dead comment to ensure rdoc processing
|
8
|
+
|
9
|
+
# :attr: name (String)
|
10
|
+
# The name for the element.
|
11
|
+
|
12
|
+
# :attr: public_id (String)
|
13
|
+
# The URI corresponding to the public identifier
|
14
|
+
|
15
|
+
# :attr: system_id (String,nil)
|
16
|
+
# The URI corresponding to the system identifier
|
4
17
|
end
|
5
18
|
end
|
6
19
|
end
|
@@ -1,86 +1,175 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
module Nokogiri
|
2
5
|
module XML
|
3
|
-
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
# You can
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
6
|
+
# Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
|
7
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
8
|
+
#
|
9
|
+
# These options directly expose libxml2's parse options, which are all boolean in the sense that
|
10
|
+
# an option is "on" or "off".
|
11
|
+
#
|
12
|
+
# 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
|
13
|
+
# HTML5 specification. See Nokogiri::HTML5.
|
14
|
+
#
|
15
|
+
# ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
|
16
|
+
# behavior in Xerces/NekoHTML on JRuby when it's possible.
|
17
|
+
#
|
18
|
+
# == Setting and unsetting parse options
|
19
|
+
#
|
20
|
+
# You can build your own combinations of parse options by using any of the following methods:
|
21
|
+
#
|
22
|
+
# [ParseOptions method chaining]
|
23
|
+
#
|
24
|
+
# Every option has an equivalent method in lowercase. You can chain these methods together to
|
25
|
+
# set various combinations.
|
26
|
+
#
|
27
|
+
# # Set the HUGE & PEDANTIC options
|
28
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
29
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
30
|
+
#
|
31
|
+
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
|
32
|
+
# methods on an instance of ParseOptions to unset the option.
|
33
|
+
#
|
34
|
+
# # Set the HUGE & PEDANTIC options
|
35
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
36
|
+
#
|
37
|
+
# # later we want to modify the options
|
38
|
+
# po.nohuge # Unset the HUGE option
|
39
|
+
# po.nopedantic # Unset the PEDANTIC option
|
40
|
+
#
|
41
|
+
# 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
|
42
|
+
# double negative:
|
43
|
+
#
|
44
|
+
# po.nocdata # Set the NOCDATA parse option
|
45
|
+
# po.nonocdata # Unset the NOCDATA parse option
|
46
|
+
#
|
47
|
+
# 💡 Note that negation is not available for STRICT, which is itself a negation of all other
|
48
|
+
# features.
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# [Using Ruby Blocks]
|
52
|
+
#
|
53
|
+
# Most parsing methods will accept a block for configuration of parse options, and we
|
54
|
+
# recommend chaining the setter methods:
|
55
|
+
#
|
56
|
+
# doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
|
57
|
+
#
|
58
|
+
#
|
59
|
+
# [ParseOptions constants]
|
60
|
+
#
|
61
|
+
# You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
|
62
|
+
# combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
|
63
|
+
#
|
64
|
+
# po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
|
65
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
26
66
|
#
|
27
67
|
class ParseOptions
|
28
68
|
# Strict parsing
|
29
69
|
STRICT = 0
|
30
|
-
|
70
|
+
|
71
|
+
# Recover from errors. On by default for XML::Document, XML::DocumentFragment,
|
72
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
31
73
|
RECOVER = 1 << 0
|
32
|
-
|
74
|
+
|
75
|
+
# Substitute entities. Off by default.
|
76
|
+
#
|
77
|
+
# ⚠ This option enables entity substitution, contrary to what the name implies.
|
78
|
+
#
|
79
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
33
80
|
NOENT = 1 << 1
|
34
|
-
|
81
|
+
|
82
|
+
# Load external subsets. On by default for XSLT::Stylesheet.
|
83
|
+
#
|
84
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
35
85
|
DTDLOAD = 1 << 2
|
36
|
-
|
86
|
+
|
87
|
+
# Default DTD attributes. On by default for XSLT::Stylesheet.
|
37
88
|
DTDATTR = 1 << 3
|
38
|
-
|
89
|
+
|
90
|
+
# Validate with the DTD. Off by default.
|
39
91
|
DTDVALID = 1 << 4
|
40
|
-
|
92
|
+
|
93
|
+
# Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
41
94
|
NOERROR = 1 << 5
|
42
|
-
|
95
|
+
|
96
|
+
# Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
43
97
|
NOWARNING = 1 << 6
|
44
|
-
|
98
|
+
|
99
|
+
# Enable pedantic error reporting. Off by default.
|
45
100
|
PEDANTIC = 1 << 7
|
46
|
-
|
101
|
+
|
102
|
+
# Remove blank nodes. Off by default.
|
47
103
|
NOBLANKS = 1 << 8
|
48
|
-
|
104
|
+
|
105
|
+
# Use the SAX1 interface internally. Off by default.
|
49
106
|
SAX1 = 1 << 9
|
50
|
-
|
107
|
+
|
108
|
+
# Implement XInclude substitution. Off by default.
|
51
109
|
XINCLUDE = 1 << 10
|
52
|
-
|
110
|
+
|
111
|
+
# Forbid network access. On by default for XML::Document, XML::DocumentFragment,
|
112
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
113
|
+
#
|
114
|
+
# ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
|
53
115
|
NONET = 1 << 11
|
54
|
-
|
116
|
+
|
117
|
+
# Do not reuse the context dictionary. Off by default.
|
55
118
|
NODICT = 1 << 12
|
56
|
-
|
119
|
+
|
120
|
+
# Remove redundant namespaces declarations. Off by default.
|
57
121
|
NSCLEAN = 1 << 13
|
58
|
-
|
122
|
+
|
123
|
+
# Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
|
59
124
|
NOCDATA = 1 << 14
|
60
|
-
|
125
|
+
|
126
|
+
# Do not generate XInclude START/END nodes. Off by default.
|
61
127
|
NOXINCNODE = 1 << 15
|
62
|
-
|
128
|
+
|
129
|
+
# Compact small text nodes. Off by default.
|
130
|
+
#
|
131
|
+
# ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
|
132
|
+
# modify the tree.
|
63
133
|
COMPACT = 1 << 16
|
64
|
-
|
134
|
+
|
135
|
+
# Parse using XML-1.0 before update 5. Off by default
|
65
136
|
OLD10 = 1 << 17
|
66
|
-
|
137
|
+
|
138
|
+
# Do not fixup XInclude xml:base uris. Off by default
|
67
139
|
NOBASEFIX = 1 << 18
|
68
|
-
|
140
|
+
|
141
|
+
# Relax any hardcoded limit from the parser. Off by default.
|
142
|
+
#
|
143
|
+
# ⚠ There may be a performance penalty when this option is set.
|
69
144
|
HUGE = 1 << 19
|
70
145
|
|
71
|
-
#
|
72
|
-
|
73
|
-
#
|
74
|
-
|
146
|
+
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|
147
|
+
# by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
|
148
|
+
# HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
149
|
+
BIG_LINES = 1 << 22
|
150
|
+
|
151
|
+
# The options mask used by default for parsing XML::Document and XML::DocumentFragment
|
152
|
+
DEFAULT_XML = RECOVER | NONET | BIG_LINES
|
153
|
+
|
154
|
+
# The options mask used by default used for parsing XSLT::Stylesheet
|
155
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
|
156
|
+
|
157
|
+
# The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
|
158
|
+
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
|
159
|
+
|
160
|
+
# The options mask used by default used for parsing XML::Schema
|
161
|
+
DEFAULT_SCHEMA = NONET | BIG_LINES
|
75
162
|
|
76
163
|
attr_accessor :options
|
77
|
-
|
164
|
+
|
165
|
+
def initialize(options = STRICT)
|
78
166
|
@options = options
|
79
167
|
end
|
80
168
|
|
81
169
|
constants.each do |constant|
|
82
170
|
next if constant.to_sym == :STRICT
|
83
|
-
|
171
|
+
|
172
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
84
173
|
def #{constant.downcase}
|
85
174
|
@options |= #{constant}
|
86
175
|
self
|
@@ -94,7 +183,7 @@ module Nokogiri
|
|
94
183
|
def #{constant.downcase}?
|
95
184
|
#{constant} & @options == #{constant}
|
96
185
|
end
|
97
|
-
|
186
|
+
RUBY
|
98
187
|
end
|
99
188
|
|
100
189
|
def strict
|
@@ -106,14 +195,18 @@ module Nokogiri
|
|
106
195
|
@options & RECOVER == STRICT
|
107
196
|
end
|
108
197
|
|
109
|
-
|
198
|
+
def ==(other)
|
199
|
+
other.to_i == to_i
|
200
|
+
end
|
201
|
+
|
202
|
+
alias_method :to_i, :options
|
110
203
|
|
111
204
|
def inspect
|
112
205
|
options = []
|
113
206
|
self.class.constants.each do |k|
|
114
207
|
options << k.downcase if send(:"#{k.downcase}?")
|
115
208
|
end
|
116
|
-
super.sub(/>$/, " " + options.join(
|
209
|
+
super.sub(/>$/, " " + options.join(", ") + ">")
|
117
210
|
end
|
118
211
|
end
|
119
212
|
end
|