nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,9 +1,16 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
module Nokogiri
|
2
5
|
module XML
|
3
6
|
####
|
4
|
-
# A NodeSet contains a list of Nokogiri::XML::Node objects.
|
5
|
-
#
|
6
|
-
#
|
7
|
+
# A NodeSet is an Enumerable that contains a list of Nokogiri::XML::Node objects.
|
8
|
+
#
|
9
|
+
# Typically a NodeSet is returned as a result of searching a Document via
|
10
|
+
# Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath.
|
11
|
+
#
|
12
|
+
# Note that the `#dup` and `#clone` methods perform shallow copies; these methods do not copy
|
13
|
+
# the Nodes contained in the NodeSet (similar to how Array and other Enumerable classes work).
|
7
14
|
class NodeSet
|
8
15
|
include Nokogiri::XML::Searchable
|
9
16
|
include Enumerable
|
@@ -11,10 +18,8 @@ module Nokogiri
|
|
11
18
|
# The Document this NodeSet is associated with
|
12
19
|
attr_accessor :document
|
13
20
|
|
14
|
-
alias :clone :dup
|
15
|
-
|
16
21
|
# Create a NodeSet with +document+ defaulting to +list+
|
17
|
-
def initialize
|
22
|
+
def initialize(document, list = [])
|
18
23
|
@document = document
|
19
24
|
document.decorate(self)
|
20
25
|
list.each { |x| self << x }
|
@@ -23,8 +28,9 @@ module Nokogiri
|
|
23
28
|
|
24
29
|
###
|
25
30
|
# Get the first element of the NodeSet.
|
26
|
-
def first
|
31
|
+
def first(n = nil)
|
27
32
|
return self[0] unless n
|
33
|
+
|
28
34
|
list = []
|
29
35
|
[n, length].min.times { |i| list << self[i] }
|
30
36
|
list
|
@@ -46,7 +52,7 @@ module Nokogiri
|
|
46
52
|
# Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
|
47
53
|
def index(node = nil)
|
48
54
|
if node
|
49
|
-
warn
|
55
|
+
warn("given block not used") if block_given?
|
50
56
|
each_with_index { |member, j| return j if member == node }
|
51
57
|
elsif block_given?
|
52
58
|
each_with_index { |member, j| return j if yield(member) }
|
@@ -56,18 +62,18 @@ module Nokogiri
|
|
56
62
|
|
57
63
|
###
|
58
64
|
# Insert +datum+ before the first Node in this NodeSet
|
59
|
-
def before
|
60
|
-
first.before
|
65
|
+
def before(datum)
|
66
|
+
first.before(datum)
|
61
67
|
end
|
62
68
|
|
63
69
|
###
|
64
70
|
# Insert +datum+ after the last Node in this NodeSet
|
65
|
-
def after
|
66
|
-
last.after
|
71
|
+
def after(datum)
|
72
|
+
last.after(datum)
|
67
73
|
end
|
68
74
|
|
69
|
-
|
70
|
-
|
75
|
+
alias_method :<<, :push
|
76
|
+
alias_method :remove, :unlink
|
71
77
|
|
72
78
|
###
|
73
79
|
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
@@ -76,7 +82,7 @@ module Nokogiri
|
|
76
82
|
# selectors. For example:
|
77
83
|
#
|
78
84
|
# For more information see Nokogiri::XML::Searchable#css
|
79
|
-
def css
|
85
|
+
def css(*args)
|
80
86
|
rules, handler, ns, _ = extract_params(args)
|
81
87
|
paths = css_rules_to_xpath(rules, ns)
|
82
88
|
|
@@ -92,7 +98,7 @@ module Nokogiri
|
|
92
98
|
# queries.
|
93
99
|
#
|
94
100
|
# For more information see Nokogiri::XML::Searchable#xpath
|
95
|
-
def xpath
|
101
|
+
def xpath(*args)
|
96
102
|
paths, handler, ns, binds = extract_params(args)
|
97
103
|
|
98
104
|
inject(NodeSet.new(document)) do |set, node|
|
@@ -100,13 +106,6 @@ module Nokogiri
|
|
100
106
|
end
|
101
107
|
end
|
102
108
|
|
103
|
-
###
|
104
|
-
# Search this NodeSet's nodes' immediate children using CSS selector +selector+
|
105
|
-
def > selector
|
106
|
-
ns = document.root.namespaces
|
107
|
-
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
108
|
-
end
|
109
|
-
|
110
109
|
###
|
111
110
|
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
112
111
|
#
|
@@ -119,18 +118,18 @@ module Nokogiri
|
|
119
118
|
#
|
120
119
|
# node_set.at(3) # same as node_set[3]
|
121
120
|
#
|
122
|
-
def at
|
121
|
+
def at(*args)
|
123
122
|
if args.length == 1 && args.first.is_a?(Numeric)
|
124
123
|
return self[args.first]
|
125
124
|
end
|
126
125
|
|
127
|
-
super
|
126
|
+
super
|
128
127
|
end
|
129
|
-
|
128
|
+
alias_method :%, :at
|
130
129
|
|
131
130
|
###
|
132
131
|
# Filter this list for nodes that match +expr+
|
133
|
-
def filter
|
132
|
+
def filter(expr)
|
134
133
|
find_all { |node| node.matches?(expr) }
|
135
134
|
end
|
136
135
|
|
@@ -139,7 +138,7 @@ module Nokogiri
|
|
139
138
|
# NodeSet.
|
140
139
|
#
|
141
140
|
# See Nokogiri::XML::Node#add_class for more information.
|
142
|
-
def add_class
|
141
|
+
def add_class(name)
|
143
142
|
each do |el|
|
144
143
|
el.add_class(name)
|
145
144
|
end
|
@@ -151,7 +150,7 @@ module Nokogiri
|
|
151
150
|
# NodeSet.
|
152
151
|
#
|
153
152
|
# See Nokogiri::XML::Node#append_class for more information.
|
154
|
-
def append_class
|
153
|
+
def append_class(name)
|
155
154
|
each do |el|
|
156
155
|
el.append_class(name)
|
157
156
|
end
|
@@ -163,7 +162,7 @@ module Nokogiri
|
|
163
162
|
# NodeSet.
|
164
163
|
#
|
165
164
|
# See Nokogiri::XML::Node#remove_class for more information.
|
166
|
-
def remove_class
|
165
|
+
def remove_class(name = nil)
|
167
166
|
each do |el|
|
168
167
|
el.remove_class(name)
|
169
168
|
end
|
@@ -203,31 +202,31 @@ module Nokogiri
|
|
203
202
|
#
|
204
203
|
# node_set.attr("class") { |node| node.name }
|
205
204
|
#
|
206
|
-
def attr
|
205
|
+
def attr(key, value = nil, &block)
|
207
206
|
unless key.is_a?(Hash) || (key && (value || block))
|
208
|
-
return first
|
207
|
+
return first&.attribute(key)
|
209
208
|
end
|
210
209
|
|
211
210
|
hash = key.is_a?(Hash) ? key : { key => value }
|
212
211
|
|
213
|
-
hash.each do |k,v|
|
212
|
+
hash.each do |k, v|
|
214
213
|
each do |node|
|
215
|
-
node[k] = v ||
|
214
|
+
node[k] = v || yield(node)
|
216
215
|
end
|
217
216
|
end
|
218
217
|
|
219
218
|
self
|
220
219
|
end
|
221
|
-
|
222
|
-
|
220
|
+
alias_method :set, :attr
|
221
|
+
alias_method :attribute, :attr
|
223
222
|
|
224
223
|
###
|
225
224
|
# Remove the attributed named +name+ from all Node objects in the NodeSet
|
226
|
-
def remove_attr
|
227
|
-
each { |el| el.delete
|
225
|
+
def remove_attr(name)
|
226
|
+
each { |el| el.delete(name) }
|
228
227
|
self
|
229
228
|
end
|
230
|
-
|
229
|
+
alias_method :remove_attribute, :remove_attr
|
231
230
|
|
232
231
|
###
|
233
232
|
# Iterate over each node, yielding to +block+
|
@@ -254,20 +253,83 @@ module Nokogiri
|
|
254
253
|
#
|
255
254
|
# See Nokogiri::XML::Node#content for more information.
|
256
255
|
def inner_text
|
257
|
-
collect(&:inner_text).join(
|
256
|
+
collect(&:inner_text).join("")
|
258
257
|
end
|
259
|
-
|
258
|
+
alias_method :text, :inner_text
|
260
259
|
|
261
260
|
###
|
262
261
|
# Get the inner html of all contained Node objects
|
263
|
-
def inner_html
|
264
|
-
collect{|j| j.inner_html(*args) }.join(
|
262
|
+
def inner_html(*args)
|
263
|
+
collect { |j| j.inner_html(*args) }.join("")
|
265
264
|
end
|
266
265
|
|
267
|
-
|
268
|
-
#
|
269
|
-
|
270
|
-
|
266
|
+
# :call-seq:
|
267
|
+
# wrap(markup) -> self
|
268
|
+
# wrap(node) -> self
|
269
|
+
#
|
270
|
+
# Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+.
|
271
|
+
#
|
272
|
+
# [Parameters]
|
273
|
+
# - *markup* (String)
|
274
|
+
# Markup that is parsed, once per member of the NodeSet, and used as the wrapper. Each
|
275
|
+
# node's parent, if it exists, is used as the context node for parsing; otherwise the
|
276
|
+
# associated document is used. If the parsed fragment has multiple roots, the first root
|
277
|
+
# node is used as the wrapper.
|
278
|
+
# - *node* (Nokogiri::XML::Node)
|
279
|
+
# An element that is `#dup`ed and used as the wrapper.
|
280
|
+
#
|
281
|
+
# [Returns] +self+, to support chaining.
|
282
|
+
#
|
283
|
+
# ⚠ Note that if a +String+ is passed, the markup will be parsed <b>once per node</b> in the
|
284
|
+
# NodeSet. You can avoid this overhead in cases where you know exactly the wrapper you wish to
|
285
|
+
# use by passing a +Node+ instead.
|
286
|
+
#
|
287
|
+
# Also see Node#wrap
|
288
|
+
#
|
289
|
+
# *Example* with a +String+ argument:
|
290
|
+
#
|
291
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
292
|
+
# <html><body>
|
293
|
+
# <a>a</a>
|
294
|
+
# <a>b</a>
|
295
|
+
# <a>c</a>
|
296
|
+
# <a>d</a>
|
297
|
+
# </body></html>
|
298
|
+
# HTML
|
299
|
+
# doc.css("a").wrap("<div></div>")
|
300
|
+
# doc.to_html
|
301
|
+
# # => <html><head></head><body>
|
302
|
+
# # <div><a>a</a></div>
|
303
|
+
# # <div><a>b</a></div>
|
304
|
+
# # <div><a>c</a></div>
|
305
|
+
# # <div><a>d</a></div>
|
306
|
+
# # </body></html>
|
307
|
+
#
|
308
|
+
# *Example* with a +Node+ argument
|
309
|
+
#
|
310
|
+
# 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids
|
311
|
+
# having to reparse the wrapper markup for each node.
|
312
|
+
#
|
313
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
314
|
+
# <html><body>
|
315
|
+
# <a>a</a>
|
316
|
+
# <a>b</a>
|
317
|
+
# <a>c</a>
|
318
|
+
# <a>d</a>
|
319
|
+
# </body></html>
|
320
|
+
# HTML
|
321
|
+
# doc.css("a").wrap(doc.create_element("div"))
|
322
|
+
# doc.to_html
|
323
|
+
# # => <html><head></head><body>
|
324
|
+
# # <div><a>a</a></div>
|
325
|
+
# # <div><a>b</a></div>
|
326
|
+
# # <div><a>c</a></div>
|
327
|
+
# # <div><a>d</a></div>
|
328
|
+
# # </body></html>
|
329
|
+
#
|
330
|
+
def wrap(node_or_tags)
|
331
|
+
map { |node| node.wrap(node_or_tags) }
|
332
|
+
self
|
271
333
|
end
|
272
334
|
|
273
335
|
###
|
@@ -278,55 +340,62 @@ module Nokogiri
|
|
278
340
|
|
279
341
|
###
|
280
342
|
# Convert this NodeSet to HTML
|
281
|
-
def to_html
|
343
|
+
def to_html(*args)
|
282
344
|
if Nokogiri.jruby?
|
283
345
|
options = args.first.is_a?(Hash) ? args.shift : {}
|
284
|
-
|
285
|
-
options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
|
286
|
-
end
|
346
|
+
options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
|
287
347
|
args.insert(0, options)
|
288
348
|
end
|
289
|
-
|
349
|
+
if empty?
|
350
|
+
encoding = (args.first.is_a?(Hash) ? args.first[:encoding] : nil)
|
351
|
+
encoding ||= document.encoding
|
352
|
+
encoding.nil? ? "" : "".encode(encoding)
|
353
|
+
else
|
354
|
+
map { |x| x.to_html(*args) }.join
|
355
|
+
end
|
290
356
|
end
|
291
357
|
|
292
358
|
###
|
293
359
|
# Convert this NodeSet to XHTML
|
294
|
-
def to_xhtml
|
360
|
+
def to_xhtml(*args)
|
295
361
|
map { |x| x.to_xhtml(*args) }.join
|
296
362
|
end
|
297
363
|
|
298
364
|
###
|
299
365
|
# Convert this NodeSet to XML
|
300
|
-
def to_xml
|
366
|
+
def to_xml(*args)
|
301
367
|
map { |x| x.to_xml(*args) }.join
|
302
368
|
end
|
303
369
|
|
304
|
-
|
305
|
-
|
370
|
+
alias_method :size, :length
|
371
|
+
alias_method :to_ary, :to_a
|
306
372
|
|
307
373
|
###
|
308
374
|
# Removes the last element from set and returns it, or +nil+ if
|
309
375
|
# the set is empty
|
310
376
|
def pop
|
311
|
-
return
|
312
|
-
|
377
|
+
return if length == 0
|
378
|
+
|
379
|
+
delete(last)
|
313
380
|
end
|
314
381
|
|
315
382
|
###
|
316
383
|
# Returns the first element of the NodeSet and removes it. Returns
|
317
384
|
# +nil+ if the set is empty.
|
318
385
|
def shift
|
319
|
-
return
|
320
|
-
|
386
|
+
return if length == 0
|
387
|
+
|
388
|
+
delete(first)
|
321
389
|
end
|
322
390
|
|
323
391
|
###
|
324
392
|
# Equality -- Two NodeSets are equal if the contain the same number
|
325
393
|
# of elements and if each element is equal to the corresponding
|
326
394
|
# element in the other NodeSet
|
327
|
-
def ==
|
395
|
+
def ==(other)
|
328
396
|
return false unless other.is_a?(Nokogiri::XML::NodeSet)
|
329
397
|
return false unless length == other.length
|
398
|
+
|
330
399
|
each_with_index do |node, i|
|
331
400
|
return false unless node == other[i]
|
332
401
|
end
|
@@ -350,22 +419,31 @@ module Nokogiri
|
|
350
419
|
def reverse
|
351
420
|
node_set = NodeSet.new(document)
|
352
421
|
(length - 1).downto(0) do |x|
|
353
|
-
node_set.push
|
422
|
+
node_set.push(self[x])
|
354
423
|
end
|
355
424
|
node_set
|
356
425
|
end
|
357
426
|
|
358
427
|
###
|
359
|
-
# Return a nicely
|
428
|
+
# Return a nicely formatted string representation
|
360
429
|
def inspect
|
361
|
-
"[#{map(&:inspect).join
|
430
|
+
"[#{map(&:inspect).join(", ")}]"
|
362
431
|
end
|
363
432
|
|
364
|
-
|
433
|
+
alias_method :+, :|
|
365
434
|
|
366
|
-
#
|
367
|
-
|
435
|
+
#
|
436
|
+
# :call-seq: deconstruct() → Array
|
437
|
+
#
|
438
|
+
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
439
|
+
#
|
440
|
+
# Since v1.14.0
|
441
|
+
#
|
442
|
+
def deconstruct
|
443
|
+
to_a
|
444
|
+
end
|
368
445
|
|
446
|
+
IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
|
369
447
|
end
|
370
448
|
end
|
371
449
|
end
|
@@ -1,6 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
5
|
+
# Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
|
3
6
|
class Notation < Struct.new(:name, :public_id, :system_id)
|
7
|
+
# dead comment to ensure rdoc processing
|
8
|
+
|
9
|
+
# :attr: name (String)
|
10
|
+
# The name for the element.
|
11
|
+
|
12
|
+
# :attr: public_id (String)
|
13
|
+
# The URI corresponding to the public identifier
|
14
|
+
|
15
|
+
# :attr: system_id (String,nil)
|
16
|
+
# The URI corresponding to the system identifier
|
4
17
|
end
|
5
18
|
end
|
6
19
|
end
|
@@ -1,86 +1,175 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
module Nokogiri
|
2
5
|
module XML
|
3
|
-
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
# You can
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
6
|
+
# Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
|
7
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
8
|
+
#
|
9
|
+
# These options directly expose libxml2's parse options, which are all boolean in the sense that
|
10
|
+
# an option is "on" or "off".
|
11
|
+
#
|
12
|
+
# 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
|
13
|
+
# HTML5 specification. See Nokogiri::HTML5.
|
14
|
+
#
|
15
|
+
# ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
|
16
|
+
# behavior in Xerces/NekoHTML on JRuby when it's possible.
|
17
|
+
#
|
18
|
+
# == Setting and unsetting parse options
|
19
|
+
#
|
20
|
+
# You can build your own combinations of parse options by using any of the following methods:
|
21
|
+
#
|
22
|
+
# [ParseOptions method chaining]
|
23
|
+
#
|
24
|
+
# Every option has an equivalent method in lowercase. You can chain these methods together to
|
25
|
+
# set various combinations.
|
26
|
+
#
|
27
|
+
# # Set the HUGE & PEDANTIC options
|
28
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
29
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
30
|
+
#
|
31
|
+
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
|
32
|
+
# methods on an instance of ParseOptions to unset the option.
|
33
|
+
#
|
34
|
+
# # Set the HUGE & PEDANTIC options
|
35
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
36
|
+
#
|
37
|
+
# # later we want to modify the options
|
38
|
+
# po.nohuge # Unset the HUGE option
|
39
|
+
# po.nopedantic # Unset the PEDANTIC option
|
40
|
+
#
|
41
|
+
# 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
|
42
|
+
# double negative:
|
43
|
+
#
|
44
|
+
# po.nocdata # Set the NOCDATA parse option
|
45
|
+
# po.nonocdata # Unset the NOCDATA parse option
|
46
|
+
#
|
47
|
+
# 💡 Note that negation is not available for STRICT, which is itself a negation of all other
|
48
|
+
# features.
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# [Using Ruby Blocks]
|
52
|
+
#
|
53
|
+
# Most parsing methods will accept a block for configuration of parse options, and we
|
54
|
+
# recommend chaining the setter methods:
|
55
|
+
#
|
56
|
+
# doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
|
57
|
+
#
|
58
|
+
#
|
59
|
+
# [ParseOptions constants]
|
60
|
+
#
|
61
|
+
# You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
|
62
|
+
# combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
|
63
|
+
#
|
64
|
+
# po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
|
65
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
26
66
|
#
|
27
67
|
class ParseOptions
|
28
68
|
# Strict parsing
|
29
69
|
STRICT = 0
|
30
|
-
|
70
|
+
|
71
|
+
# Recover from errors. On by default for XML::Document, XML::DocumentFragment,
|
72
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
31
73
|
RECOVER = 1 << 0
|
32
|
-
|
74
|
+
|
75
|
+
# Substitute entities. Off by default.
|
76
|
+
#
|
77
|
+
# ⚠ This option enables entity substitution, contrary to what the name implies.
|
78
|
+
#
|
79
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
33
80
|
NOENT = 1 << 1
|
34
|
-
|
81
|
+
|
82
|
+
# Load external subsets. On by default for XSLT::Stylesheet.
|
83
|
+
#
|
84
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
35
85
|
DTDLOAD = 1 << 2
|
36
|
-
|
86
|
+
|
87
|
+
# Default DTD attributes. On by default for XSLT::Stylesheet.
|
37
88
|
DTDATTR = 1 << 3
|
38
|
-
|
89
|
+
|
90
|
+
# Validate with the DTD. Off by default.
|
39
91
|
DTDVALID = 1 << 4
|
40
|
-
|
92
|
+
|
93
|
+
# Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
41
94
|
NOERROR = 1 << 5
|
42
|
-
|
95
|
+
|
96
|
+
# Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
43
97
|
NOWARNING = 1 << 6
|
44
|
-
|
98
|
+
|
99
|
+
# Enable pedantic error reporting. Off by default.
|
45
100
|
PEDANTIC = 1 << 7
|
46
|
-
|
101
|
+
|
102
|
+
# Remove blank nodes. Off by default.
|
47
103
|
NOBLANKS = 1 << 8
|
48
|
-
|
104
|
+
|
105
|
+
# Use the SAX1 interface internally. Off by default.
|
49
106
|
SAX1 = 1 << 9
|
50
|
-
|
107
|
+
|
108
|
+
# Implement XInclude substitution. Off by default.
|
51
109
|
XINCLUDE = 1 << 10
|
52
|
-
|
110
|
+
|
111
|
+
# Forbid network access. On by default for XML::Document, XML::DocumentFragment,
|
112
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
113
|
+
#
|
114
|
+
# ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
|
53
115
|
NONET = 1 << 11
|
54
|
-
|
116
|
+
|
117
|
+
# Do not reuse the context dictionary. Off by default.
|
55
118
|
NODICT = 1 << 12
|
56
|
-
|
119
|
+
|
120
|
+
# Remove redundant namespaces declarations. Off by default.
|
57
121
|
NSCLEAN = 1 << 13
|
58
|
-
|
122
|
+
|
123
|
+
# Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
|
59
124
|
NOCDATA = 1 << 14
|
60
|
-
|
125
|
+
|
126
|
+
# Do not generate XInclude START/END nodes. Off by default.
|
61
127
|
NOXINCNODE = 1 << 15
|
62
|
-
|
128
|
+
|
129
|
+
# Compact small text nodes. Off by default.
|
130
|
+
#
|
131
|
+
# ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
|
132
|
+
# modify the tree.
|
63
133
|
COMPACT = 1 << 16
|
64
|
-
|
134
|
+
|
135
|
+
# Parse using XML-1.0 before update 5. Off by default
|
65
136
|
OLD10 = 1 << 17
|
66
|
-
|
137
|
+
|
138
|
+
# Do not fixup XInclude xml:base uris. Off by default
|
67
139
|
NOBASEFIX = 1 << 18
|
68
|
-
|
140
|
+
|
141
|
+
# Relax any hardcoded limit from the parser. Off by default.
|
142
|
+
#
|
143
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
69
144
|
HUGE = 1 << 19
|
70
145
|
|
71
|
-
#
|
72
|
-
|
73
|
-
#
|
74
|
-
|
146
|
+
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|
147
|
+
# by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
|
148
|
+
# HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
149
|
+
BIG_LINES = 1 << 22
|
150
|
+
|
151
|
+
# The options mask used by default for parsing XML::Document and XML::DocumentFragment
|
152
|
+
DEFAULT_XML = RECOVER | NONET | BIG_LINES
|
153
|
+
|
154
|
+
# The options mask used by default used for parsing XSLT::Stylesheet
|
155
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
|
156
|
+
|
157
|
+
# The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
|
158
|
+
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
|
159
|
+
|
160
|
+
# The options mask used by default used for parsing XML::Schema
|
161
|
+
DEFAULT_SCHEMA = NONET | BIG_LINES
|
75
162
|
|
76
163
|
attr_accessor :options
|
77
|
-
|
164
|
+
|
165
|
+
def initialize(options = STRICT)
|
78
166
|
@options = options
|
79
167
|
end
|
80
168
|
|
81
169
|
constants.each do |constant|
|
82
170
|
next if constant.to_sym == :STRICT
|
83
|
-
|
171
|
+
|
172
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
84
173
|
def #{constant.downcase}
|
85
174
|
@options |= #{constant}
|
86
175
|
self
|
@@ -94,7 +183,7 @@ module Nokogiri
|
|
94
183
|
def #{constant.downcase}?
|
95
184
|
#{constant} & @options == #{constant}
|
96
185
|
end
|
97
|
-
|
186
|
+
RUBY
|
98
187
|
end
|
99
188
|
|
100
189
|
def strict
|
@@ -106,14 +195,18 @@ module Nokogiri
|
|
106
195
|
@options & RECOVER == STRICT
|
107
196
|
end
|
108
197
|
|
109
|
-
|
198
|
+
def ==(other)
|
199
|
+
other.to_i == to_i
|
200
|
+
end
|
201
|
+
|
202
|
+
alias_method :to_i, :options
|
110
203
|
|
111
204
|
def inspect
|
112
205
|
options = []
|
113
206
|
self.class.constants.each do |k|
|
114
207
|
options << k.downcase if send(:"#{k.downcase}?")
|
115
208
|
end
|
116
|
-
super.sub(/>$/, " " + options.join(
|
209
|
+
super.sub(/>$/, " " + options.join(", ") + ">")
|
117
210
|
end
|
118
211
|
end
|
119
212
|
end
|