nokogiri 1.10.10-java → 1.11.0-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/README.md +165 -91
- data/ext/java/nokogiri/HtmlDocument.java +34 -46
- data/ext/java/nokogiri/HtmlSaxParserContext.java +88 -58
- data/ext/java/nokogiri/HtmlSaxPushParser.java +1 -1
- data/ext/java/nokogiri/NokogiriService.java +1 -1
- data/ext/java/nokogiri/XmlAttr.java +13 -20
- data/ext/java/nokogiri/XmlAttributeDecl.java +11 -12
- data/ext/java/nokogiri/XmlCdata.java +3 -4
- data/ext/java/nokogiri/XmlComment.java +1 -1
- data/ext/java/nokogiri/XmlDocument.java +148 -175
- data/ext/java/nokogiri/XmlDocumentFragment.java +13 -31
- data/ext/java/nokogiri/XmlDtd.java +5 -8
- data/ext/java/nokogiri/XmlElement.java +1 -20
- data/ext/java/nokogiri/XmlElementDecl.java +23 -28
- data/ext/java/nokogiri/XmlEntityDecl.java +23 -27
- data/ext/java/nokogiri/XmlEntityReference.java +2 -2
- data/ext/java/nokogiri/XmlNamespace.java +72 -89
- data/ext/java/nokogiri/XmlNode.java +303 -406
- data/ext/java/nokogiri/XmlNodeSet.java +70 -76
- data/ext/java/nokogiri/XmlReader.java +12 -13
- data/ext/java/nokogiri/XmlRelaxng.java +10 -3
- data/ext/java/nokogiri/XmlSaxParserContext.java +15 -10
- data/ext/java/nokogiri/XmlSchema.java +87 -27
- data/ext/java/nokogiri/XmlSyntaxError.java +2 -6
- data/ext/java/nokogiri/XmlText.java +12 -9
- data/ext/java/nokogiri/XmlXpathContext.java +55 -25
- data/ext/java/nokogiri/XsltStylesheet.java +7 -15
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +52 -46
- data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +71 -135
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +90 -58
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +9 -2
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +67 -10
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +4 -2
- data/ext/java/nokogiri/internals/ParserContext.java +27 -73
- data/ext/java/nokogiri/internals/ReaderNode.java +2 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +18 -33
- data/ext/nokogiri/depend +476 -357
- data/ext/nokogiri/extconf.rb +507 -357
- data/ext/nokogiri/html_document.c +79 -78
- data/ext/nokogiri/html_sax_parser_context.c +2 -2
- data/ext/nokogiri/nokogiri.c +34 -40
- data/ext/nokogiri/xml_document.c +18 -4
- data/ext/nokogiri/xml_io.c +8 -6
- data/ext/nokogiri/xml_node.c +21 -1
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_reader.c +6 -17
- data/ext/nokogiri/xml_relax_ng.c +29 -11
- data/ext/nokogiri/xml_sax_parser.c +2 -7
- data/ext/nokogiri/xml_sax_parser_context.c +2 -2
- data/ext/nokogiri/xml_schema.c +55 -13
- data/ext/nokogiri/xml_xpath_context.c +80 -4
- data/ext/nokogiri/xslt_stylesheet.c +1 -8
- data/lib/nokogiri.rb +22 -22
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +63 -62
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/builder.rb +1 -0
- data/lib/nokogiri/html/document.rb +13 -26
- data/lib/nokogiri/html/document_fragment.rb +1 -0
- data/lib/nokogiri/html/element_description.rb +1 -0
- data/lib/nokogiri/html/element_description_defaults.rb +1 -0
- data/lib/nokogiri/html/entity_lookup.rb +1 -0
- data/lib/nokogiri/html/sax/parser.rb +1 -0
- data/lib/nokogiri/html/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html/sax/push_parser.rb +1 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +3 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +20 -15
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node.rb +587 -249
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +10 -3
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +7 -3
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -0
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xslt.rb +1 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- metadata +86 -159
- data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +0 -107
- data/ext/java/nokogiri/internals/UncloseableInputStream.java +0 -102
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/nokogiri.h +0 -121
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
class DocumentFragment < Nokogiri::XML::Node
|
@@ -140,6 +141,10 @@ module Nokogiri
|
|
140
141
|
document.errors = things
|
141
142
|
end
|
142
143
|
|
144
|
+
def fragment(data)
|
145
|
+
document.fragment(data)
|
146
|
+
end
|
147
|
+
|
143
148
|
private
|
144
149
|
|
145
150
|
# fix for issue 770
|
@@ -149,12 +154,6 @@ module Nokogiri
|
|
149
154
|
%Q{xmlns#{prefix}="#{namespace.href}"}
|
150
155
|
end.join ' '
|
151
156
|
end
|
152
|
-
|
153
|
-
def coerce data
|
154
|
-
return super unless String === data
|
155
|
-
|
156
|
-
document.fragment(data).children
|
157
|
-
end
|
158
157
|
end
|
159
158
|
end
|
160
159
|
end
|
data/lib/nokogiri/xml/dtd.rb
CHANGED
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require
|
2
|
+
# frozen_string_literal: true
|
3
|
+
require "stringio"
|
4
|
+
require "nokogiri/xml/node/save_options"
|
4
5
|
|
5
6
|
module Nokogiri
|
6
7
|
module XML
|
@@ -56,49 +57,49 @@ module Nokogiri
|
|
56
57
|
include Enumerable
|
57
58
|
|
58
59
|
# Element node type, see Nokogiri::XML::Node#element?
|
59
|
-
ELEMENT_NODE =
|
60
|
+
ELEMENT_NODE = 1
|
60
61
|
# Attribute node type
|
61
|
-
ATTRIBUTE_NODE =
|
62
|
+
ATTRIBUTE_NODE = 2
|
62
63
|
# Text node type, see Nokogiri::XML::Node#text?
|
63
|
-
TEXT_NODE =
|
64
|
+
TEXT_NODE = 3
|
64
65
|
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
65
66
|
CDATA_SECTION_NODE = 4
|
66
67
|
# Entity reference node type
|
67
|
-
ENTITY_REF_NODE =
|
68
|
+
ENTITY_REF_NODE = 5
|
68
69
|
# Entity node type
|
69
|
-
ENTITY_NODE =
|
70
|
+
ENTITY_NODE = 6
|
70
71
|
# PI node type
|
71
|
-
PI_NODE =
|
72
|
+
PI_NODE = 7
|
72
73
|
# Comment node type, see Nokogiri::XML::Node#comment?
|
73
|
-
COMMENT_NODE =
|
74
|
+
COMMENT_NODE = 8
|
74
75
|
# Document node type, see Nokogiri::XML::Node#xml?
|
75
|
-
DOCUMENT_NODE =
|
76
|
+
DOCUMENT_NODE = 9
|
76
77
|
# Document type node type
|
77
78
|
DOCUMENT_TYPE_NODE = 10
|
78
79
|
# Document fragment node type
|
79
80
|
DOCUMENT_FRAG_NODE = 11
|
80
81
|
# Notation node type
|
81
|
-
NOTATION_NODE =
|
82
|
+
NOTATION_NODE = 12
|
82
83
|
# HTML document node type, see Nokogiri::XML::Node#html?
|
83
84
|
HTML_DOCUMENT_NODE = 13
|
84
85
|
# DTD node type
|
85
|
-
DTD_NODE =
|
86
|
+
DTD_NODE = 14
|
86
87
|
# Element declaration type
|
87
|
-
ELEMENT_DECL =
|
88
|
+
ELEMENT_DECL = 15
|
88
89
|
# Attribute declaration type
|
89
|
-
ATTRIBUTE_DECL =
|
90
|
+
ATTRIBUTE_DECL = 16
|
90
91
|
# Entity declaration type
|
91
|
-
ENTITY_DECL =
|
92
|
+
ENTITY_DECL = 17
|
92
93
|
# Namespace declaration type
|
93
|
-
NAMESPACE_DECL =
|
94
|
+
NAMESPACE_DECL = 18
|
94
95
|
# XInclude start type
|
95
|
-
XINCLUDE_START =
|
96
|
+
XINCLUDE_START = 19
|
96
97
|
# XInclude end type
|
97
|
-
XINCLUDE_END =
|
98
|
+
XINCLUDE_END = 20
|
98
99
|
# DOCB document node type
|
99
100
|
DOCB_DOCUMENT_NODE = 21
|
100
101
|
|
101
|
-
def initialize
|
102
|
+
def initialize(name, document) # :nodoc:
|
102
103
|
# ... Ya. This is empty on purpose.
|
103
104
|
end
|
104
105
|
|
@@ -108,24 +109,18 @@ module Nokogiri
|
|
108
109
|
document.decorate(self)
|
109
110
|
end
|
110
111
|
|
112
|
+
# @!group Searching via XPath or CSS Queries
|
113
|
+
|
111
114
|
###
|
112
115
|
# Search this node's immediate children using CSS selector +selector+
|
113
|
-
def >
|
116
|
+
def >(selector)
|
114
117
|
ns = document.root.namespaces
|
115
118
|
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
116
119
|
end
|
117
120
|
|
118
|
-
|
119
|
-
# Get the attribute value for the attribute +name+
|
120
|
-
def [] name
|
121
|
-
get(name.to_s)
|
122
|
-
end
|
121
|
+
# @!endgroup
|
123
122
|
|
124
|
-
|
125
|
-
# Set the attribute value for the attribute +name+ to +value+
|
126
|
-
def []= name, value
|
127
|
-
set name.to_s, value.to_s
|
128
|
-
end
|
123
|
+
# @!group Manipulating Document Structure
|
129
124
|
|
130
125
|
###
|
131
126
|
# Add +node_or_tags+ as a child of this Node.
|
@@ -134,7 +129,7 @@ module Nokogiri
|
|
134
129
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
135
130
|
#
|
136
131
|
# Also see related method +<<+.
|
137
|
-
def add_child
|
132
|
+
def add_child(node_or_tags)
|
138
133
|
node_or_tags = coerce(node_or_tags)
|
139
134
|
if node_or_tags.is_a?(XML::NodeSet)
|
140
135
|
node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
|
@@ -151,7 +146,7 @@ module Nokogiri
|
|
151
146
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
152
147
|
#
|
153
148
|
# Also see related method +add_child+.
|
154
|
-
def prepend_child
|
149
|
+
def prepend_child(node_or_tags)
|
155
150
|
if first = children.first
|
156
151
|
# Mimic the error add_child would raise.
|
157
152
|
raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
@@ -161,7 +156,6 @@ module Nokogiri
|
|
161
156
|
end
|
162
157
|
end
|
163
158
|
|
164
|
-
|
165
159
|
###
|
166
160
|
# Add html around this node
|
167
161
|
#
|
@@ -180,7 +174,7 @@ module Nokogiri
|
|
180
174
|
# Returns self, to support chaining of calls (e.g., root << child1 << child2)
|
181
175
|
#
|
182
176
|
# Also see related method +add_child+.
|
183
|
-
def <<
|
177
|
+
def <<(node_or_tags)
|
184
178
|
add_child node_or_tags
|
185
179
|
self
|
186
180
|
end
|
@@ -192,7 +186,7 @@ module Nokogiri
|
|
192
186
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
193
187
|
#
|
194
188
|
# Also see related method +before+.
|
195
|
-
def add_previous_sibling
|
189
|
+
def add_previous_sibling(node_or_tags)
|
196
190
|
raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
197
191
|
|
198
192
|
add_sibling :previous, node_or_tags
|
@@ -205,7 +199,7 @@ module Nokogiri
|
|
205
199
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
206
200
|
#
|
207
201
|
# Also see related method +after+.
|
208
|
-
def add_next_sibling
|
202
|
+
def add_next_sibling(node_or_tags)
|
209
203
|
raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
210
204
|
|
211
205
|
add_sibling :next, node_or_tags
|
@@ -218,7 +212,7 @@ module Nokogiri
|
|
218
212
|
# Returns self, to support chaining of calls.
|
219
213
|
#
|
220
214
|
# Also see related method +add_previous_sibling+.
|
221
|
-
def before
|
215
|
+
def before(node_or_tags)
|
222
216
|
add_previous_sibling node_or_tags
|
223
217
|
self
|
224
218
|
end
|
@@ -230,7 +224,7 @@ module Nokogiri
|
|
230
224
|
# Returns self, to support chaining of calls.
|
231
225
|
#
|
232
226
|
# Also see related method +add_next_sibling+.
|
233
|
-
def after
|
227
|
+
def after(node_or_tags)
|
234
228
|
add_next_sibling node_or_tags
|
235
229
|
self
|
236
230
|
end
|
@@ -242,7 +236,7 @@ module Nokogiri
|
|
242
236
|
# Returns self.
|
243
237
|
#
|
244
238
|
# Also see related method +children=+
|
245
|
-
def inner_html=
|
239
|
+
def inner_html=(node_or_tags)
|
246
240
|
self.children = node_or_tags
|
247
241
|
self
|
248
242
|
end
|
@@ -254,7 +248,7 @@ module Nokogiri
|
|
254
248
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
255
249
|
#
|
256
250
|
# Also see related method +inner_html=+
|
257
|
-
def children=
|
251
|
+
def children=(node_or_tags)
|
258
252
|
node_or_tags = coerce(node_or_tags)
|
259
253
|
children.unlink
|
260
254
|
if node_or_tags.is_a?(XML::NodeSet)
|
@@ -272,19 +266,21 @@ module Nokogiri
|
|
272
266
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
273
267
|
#
|
274
268
|
# Also see related method +swap+.
|
275
|
-
def replace
|
269
|
+
def replace(node_or_tags)
|
270
|
+
raise("Cannot replace a node with no parent") unless parent
|
271
|
+
|
276
272
|
# We cannot replace a text node directly, otherwise libxml will return
|
277
273
|
# an internal error at parser.c:13031, I don't know exactly why
|
278
274
|
# libxml is trying to find a parent node that is an element or document
|
279
275
|
# so I can't tell if this is bug in libxml or not. issue #775.
|
280
276
|
if text?
|
281
|
-
replacee = Nokogiri::XML::Node.new
|
277
|
+
replacee = Nokogiri::XML::Node.new "dummy", document
|
282
278
|
add_previous_sibling_node replacee
|
283
279
|
unlink
|
284
280
|
return replacee.replace node_or_tags
|
285
281
|
end
|
286
282
|
|
287
|
-
node_or_tags = coerce(node_or_tags)
|
283
|
+
node_or_tags = parent.coerce(node_or_tags)
|
288
284
|
|
289
285
|
if node_or_tags.is_a?(XML::NodeSet)
|
290
286
|
node_or_tags.each { |n| add_previous_sibling n }
|
@@ -302,33 +298,98 @@ module Nokogiri
|
|
302
298
|
# Returns self, to support chaining of calls.
|
303
299
|
#
|
304
300
|
# Also see related method +replace+.
|
305
|
-
def swap
|
301
|
+
def swap(node_or_tags)
|
306
302
|
replace node_or_tags
|
307
303
|
self
|
308
304
|
end
|
309
305
|
|
310
|
-
|
311
|
-
|
306
|
+
####
|
307
|
+
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
|
308
|
+
def content=(string)
|
309
|
+
self.native_content = encode_special_chars(string.to_s)
|
310
|
+
end
|
312
311
|
|
313
|
-
|
314
|
-
#
|
315
|
-
|
316
|
-
|
312
|
+
###
|
313
|
+
# Set the parent Node for this Node
|
314
|
+
def parent=(parent_node)
|
315
|
+
parent_node.add_child(self)
|
316
|
+
parent_node
|
317
|
+
end
|
317
318
|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
319
|
+
###
|
320
|
+
# Adds a default namespace supplied as a string +url+ href, to self.
|
321
|
+
# The consequence is as an xmlns attribute with supplied argument were
|
322
|
+
# present in parsed XML. A default namespace set with this method will
|
323
|
+
# now show up in #attributes, but when this node is serialized to XML an
|
324
|
+
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
325
|
+
def default_namespace=(url)
|
326
|
+
add_namespace_definition(nil, url)
|
327
|
+
end
|
328
|
+
|
329
|
+
###
|
330
|
+
# Set the default namespace on this node (as would be defined with an
|
331
|
+
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
332
|
+
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
333
|
+
# for this node. You probably want #default_namespace= instead, or perhaps
|
334
|
+
# #add_namespace_definition with a nil prefix argument.
|
335
|
+
def namespace=(ns)
|
336
|
+
return set_namespace(ns) unless ns
|
337
|
+
|
338
|
+
unless Nokogiri::XML::Namespace === ns
|
339
|
+
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
340
|
+
end
|
341
|
+
if ns.document != document
|
342
|
+
raise ArgumentError, "namespace must be declared on the same document"
|
343
|
+
end
|
344
|
+
|
345
|
+
set_namespace ns
|
346
|
+
end
|
347
|
+
|
348
|
+
###
|
349
|
+
# Do xinclude substitution on the subtree below node. If given a block, a
|
350
|
+
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
351
|
+
# passed to it, allowing more convenient modification of the parser options.
|
352
|
+
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
|
353
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
354
|
+
|
355
|
+
# give options to user
|
356
|
+
yield options if block_given?
|
357
|
+
|
358
|
+
# call c extension
|
359
|
+
process_xincludes(options.to_i)
|
360
|
+
end
|
361
|
+
|
362
|
+
alias :next :next_sibling
|
363
|
+
alias :previous :previous_sibling
|
364
|
+
alias :next= :add_next_sibling
|
365
|
+
alias :previous= :add_previous_sibling
|
366
|
+
alias :remove :unlink
|
367
|
+
alias :name= :node_name=
|
368
|
+
alias :add_namespace :add_namespace_definition
|
369
|
+
|
370
|
+
# @!endgroup
|
371
|
+
|
372
|
+
alias :text :content
|
373
|
+
alias :inner_text :content
|
374
|
+
alias :name :node_name
|
375
|
+
alias :type :node_type
|
376
|
+
alias :to_str :text
|
377
|
+
alias :clone :dup
|
378
|
+
alias :elements :element_children
|
379
|
+
|
380
|
+
# @!group Working With Node Attributes
|
381
|
+
|
382
|
+
###
|
383
|
+
# Get the attribute value for the attribute +name+
|
384
|
+
def [](name)
|
385
|
+
get(name.to_s)
|
386
|
+
end
|
387
|
+
|
388
|
+
###
|
389
|
+
# Set the attribute value for the attribute +name+ to +value+
|
390
|
+
def []=(name, value)
|
391
|
+
set name.to_s, value.to_s
|
392
|
+
end
|
332
393
|
|
333
394
|
####
|
334
395
|
# Returns a hash containing the node's attributes. The key is
|
@@ -337,9 +398,9 @@ module Nokogiri
|
|
337
398
|
# If you need to distinguish attributes with the same name, with different namespaces
|
338
399
|
# use #attribute_nodes instead.
|
339
400
|
def attributes
|
340
|
-
|
341
|
-
[node.node_name
|
342
|
-
|
401
|
+
attribute_nodes.each_with_object({}) do |node, hash|
|
402
|
+
hash[node.node_name] = node
|
403
|
+
end
|
343
404
|
end
|
344
405
|
|
345
406
|
###
|
@@ -348,6 +409,12 @@ module Nokogiri
|
|
348
409
|
attribute_nodes.map(&:value)
|
349
410
|
end
|
350
411
|
|
412
|
+
###
|
413
|
+
# Does this Node's attributes include <value>
|
414
|
+
def value?(value)
|
415
|
+
values.include? value
|
416
|
+
end
|
417
|
+
|
351
418
|
###
|
352
419
|
# Get the attribute names for this Node.
|
353
420
|
def keys
|
@@ -363,82 +430,366 @@ module Nokogiri
|
|
363
430
|
end
|
364
431
|
|
365
432
|
###
|
366
|
-
#
|
367
|
-
|
433
|
+
# Remove the attribute named +name+
|
434
|
+
def remove_attribute(name)
|
435
|
+
attr = attributes[name].remove if key? name
|
436
|
+
clear_xpath_context if Nokogiri.jruby?
|
437
|
+
attr
|
438
|
+
end
|
439
|
+
|
440
|
+
# Get the CSS class names of a Node.
|
441
|
+
#
|
442
|
+
# This is a convenience function and is equivalent to:
|
443
|
+
# node.kwattr_values("class")
|
444
|
+
#
|
445
|
+
# @see #kwattr_values
|
446
|
+
# @see #add_class
|
447
|
+
# @see #append_class
|
448
|
+
# @see #remove_class
|
449
|
+
#
|
450
|
+
# @return [Array<String>]
|
451
|
+
#
|
452
|
+
# The CSS classes present in the Node's +class+ attribute. If
|
453
|
+
# the attribute is empty or non-existent, the return value is
|
454
|
+
# an empty array.
|
455
|
+
#
|
456
|
+
# @example
|
457
|
+
# node # => <div class="section title header"></div>
|
458
|
+
# node.classes # => ["section", "title", "header"]
|
459
|
+
#
|
368
460
|
def classes
|
369
|
-
|
461
|
+
kwattr_values("class")
|
370
462
|
end
|
371
463
|
|
372
|
-
|
373
|
-
#
|
374
|
-
#
|
375
|
-
#
|
376
|
-
#
|
464
|
+
# Ensure HTML CSS classes are present on a +Node+. Any CSS
|
465
|
+
# classes in +names+ that already exist in the +Node+'s +class+
|
466
|
+
# attribute are _not_ added. Note that any existing duplicates
|
467
|
+
# in the +class+ attribute are not removed. Compare with
|
468
|
+
# {#append_class}.
|
469
|
+
#
|
470
|
+
# This is a convenience function and is equivalent to:
|
471
|
+
# node.kwattr_add("class", names)
|
472
|
+
#
|
473
|
+
# @see #kwattr_add
|
474
|
+
# @see #classes
|
475
|
+
# @see #append_class
|
476
|
+
# @see #remove_class
|
477
|
+
#
|
478
|
+
# @param names [String, Array<String>]
|
479
|
+
#
|
480
|
+
# CSS class names to be added to the Node's +class+
|
481
|
+
# attribute. May be a string containing whitespace-delimited
|
482
|
+
# names, or an Array of String names. Any class names already
|
483
|
+
# present will not be added. Any class names not present will
|
484
|
+
# be added. If no +class+ attribute exists, one is created.
|
485
|
+
#
|
486
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
487
|
+
#
|
488
|
+
# @example Ensure that a +Node+ has CSS class "section"
|
489
|
+
# node # => <div></div>
|
490
|
+
# node.add_class("section") # => <div class="section"></div>
|
491
|
+
# node.add_class("section") # => <div class="section"></div> # duplicate not added
|
492
|
+
#
|
493
|
+
# @example Ensure that a +Node+ has CSS classes "section" and "header", via a String argument.
|
494
|
+
# node # => <div class="section section"></div>
|
495
|
+
# node.add_class("section header") # => <div class="section section header"></div>
|
496
|
+
# # Note that the CSS class "section" is not added because it is already present.
|
497
|
+
# # Note also that the pre-existing duplicate CSS class "section" is not removed.
|
498
|
+
#
|
499
|
+
# @example Ensure that a +Node+ has CSS classes "section" and "header", via an Array argument.
|
500
|
+
# node # => <div></div>
|
501
|
+
# node.add_class(["section", "header"]) # => <div class="section header"></div>
|
502
|
+
#
|
503
|
+
def add_class(names)
|
504
|
+
kwattr_add("class", names)
|
505
|
+
end
|
506
|
+
|
507
|
+
# Add HTML CSS classes to a +Node+, regardless of
|
508
|
+
# duplication. Compare with {#add_class}.
|
509
|
+
#
|
510
|
+
# This is a convenience function and is equivalent to:
|
511
|
+
# node.kwattr_append("class", names)
|
512
|
+
#
|
513
|
+
# @see #kwattr_append
|
514
|
+
# @see #classes
|
515
|
+
# @see #add_class
|
516
|
+
# @see #remove_class
|
517
|
+
#
|
518
|
+
# @param names [String, Array<String>]
|
519
|
+
#
|
520
|
+
# CSS class names to be appended to the Node's +class+
|
521
|
+
# attribute. May be a string containing whitespace-delimited
|
522
|
+
# names, or an Array of String names. All class names passed
|
523
|
+
# in will be appended to the +class+ attribute even if they
|
524
|
+
# are already present in the attribute value. If no +class+
|
525
|
+
# attribute exists, one is created.
|
526
|
+
#
|
527
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
528
|
+
#
|
529
|
+
# @example Append "section" to a +Node+'s CSS +class+ attriubute
|
530
|
+
# node # => <div></div>
|
531
|
+
# node.append_class("section") # => <div class="section"></div>
|
532
|
+
# node.append_class("section") # => <div class="section section"></div> # duplicate added!
|
533
|
+
#
|
534
|
+
# @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via a String argument.
|
535
|
+
# node # => <div class="section section"></div>
|
536
|
+
# node.append_class("section header") # => <div class="section section section header"></div>
|
537
|
+
# # Note that the CSS class "section" is appended even though it is already present.
|
538
|
+
#
|
539
|
+
# @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via an Array argument.
|
540
|
+
# node # => <div></div>
|
541
|
+
# node.append_class(["section", "header"]) # => <div class="section header"></div>
|
542
|
+
# node.append_class(["section", "header"]) # => <div class="section header section header"></div>
|
377
543
|
#
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
544
|
+
def append_class(names)
|
545
|
+
kwattr_append("class", names)
|
546
|
+
end
|
547
|
+
|
548
|
+
# Remove HTML CSS classes from a +Node+. Any CSS classes in +names+ that
|
549
|
+
# exist in the +Node+'s +class+ attribute are removed, including any
|
550
|
+
# multiple entries.
|
551
|
+
#
|
552
|
+
# If no CSS classes remain after this operation, or if +names+ is
|
553
|
+
# +nil+, the +class+ attribute is deleted from the node.
|
554
|
+
#
|
555
|
+
# This is a convenience function and is equivalent to:
|
556
|
+
# node.kwattr_remove("class", names)
|
557
|
+
#
|
558
|
+
# @see #kwattr_remove
|
559
|
+
# @see #classes
|
560
|
+
# @see #add_class
|
561
|
+
# @see #append_class
|
562
|
+
#
|
563
|
+
# @param names [String, Array<String>]
|
564
|
+
#
|
565
|
+
# CSS class names to be removed from the Node's +class+ attribute. May
|
566
|
+
# be a string containing whitespace-delimited names, or an Array of
|
567
|
+
# String names. Any class names already present will be removed. If no
|
568
|
+
# CSS classes remain, the +class+ attribute is deleted.
|
569
|
+
#
|
570
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
571
|
+
#
|
572
|
+
# @example
|
573
|
+
# node # => <div class="section header"></div>
|
574
|
+
# node.remove_class("section") # => <div class="header"></div>
|
575
|
+
# node.remove_class("header") # => <div></div> # attribute is deleted when empty
|
576
|
+
#
|
577
|
+
def remove_class(names = nil)
|
578
|
+
kwattr_remove("class", names)
|
579
|
+
end
|
580
|
+
|
581
|
+
# Retrieve values from a keyword attribute of a Node.
|
582
|
+
#
|
583
|
+
# A "keyword attribute" is a node attribute that contains a set
|
584
|
+
# of space-delimited values. Perhaps the most familiar example
|
585
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
586
|
+
# classes. But other keyword attributes exist, for instance
|
587
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
588
|
+
#
|
589
|
+
# @see #classes
|
590
|
+
# @see #kwattr_add
|
591
|
+
# @see #kwattr_append
|
592
|
+
# @see #kwattr_remove
|
593
|
+
#
|
594
|
+
# @param attribute_name [String] The name of the keyword attribute to be inspected.
|
595
|
+
#
|
596
|
+
# @return [Array<String>]
|
597
|
+
#
|
598
|
+
# The values present in the Node's +attribute_name+
|
599
|
+
# attribute. If the attribute is empty or non-existent, the
|
600
|
+
# return value is an empty array.
|
601
|
+
#
|
602
|
+
# @example
|
603
|
+
# node # => <a rel="nofollow noopener external">link</a>
|
604
|
+
# node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
|
605
|
+
#
|
606
|
+
# @since v1.11.0
|
607
|
+
#
|
608
|
+
def kwattr_values(attribute_name)
|
609
|
+
keywordify(get_attribute(attribute_name) || [])
|
610
|
+
end
|
611
|
+
|
612
|
+
# Ensure that values are present in a keyword attribute.
|
613
|
+
#
|
614
|
+
# Any values in +keywords+ that already exist in the +Node+'s
|
615
|
+
# attribute values are _not_ added. Note that any existing
|
616
|
+
# duplicates in the attribute values are not removed. Compare
|
617
|
+
# with {#kwattr_append}.
|
618
|
+
#
|
619
|
+
# A "keyword attribute" is a node attribute that contains a set
|
620
|
+
# of space-delimited values. Perhaps the most familiar example
|
621
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
622
|
+
# classes. But other keyword attributes exist, for instance
|
623
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
624
|
+
#
|
625
|
+
# @see #add_class
|
626
|
+
# @see #kwattr_values
|
627
|
+
# @see #kwattr_append
|
628
|
+
# @see #kwattr_remove
|
629
|
+
#
|
630
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
631
|
+
#
|
632
|
+
# @param keywords [String, Array<String>]
|
633
|
+
#
|
634
|
+
# Keywords to be added to the attribute named
|
635
|
+
# +attribute_name+. May be a string containing
|
636
|
+
# whitespace-delimited values, or an Array of String
|
637
|
+
# values. Any values already present will not be added. Any
|
638
|
+
# values not present will be added. If the named attribute
|
639
|
+
# does not exist, it is created.
|
640
|
+
#
|
641
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
642
|
+
#
|
643
|
+
# @example Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
|
644
|
+
# node # => <a></a>
|
645
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
|
646
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a> # duplicate not added
|
647
|
+
#
|
648
|
+
# @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a String argument.
|
649
|
+
# node # => <a rel="nofollow nofollow"></a>
|
650
|
+
# node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
651
|
+
# # Note that "nofollow" is not added because it is already present.
|
652
|
+
# # Note also that the pre-existing duplicate "nofollow" is not removed.
|
653
|
+
#
|
654
|
+
# @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via an Array argument.
|
655
|
+
# node # => <a></a>
|
656
|
+
# node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
657
|
+
#
|
658
|
+
# @since v1.11.0
|
659
|
+
#
|
660
|
+
def kwattr_add(attribute_name, keywords)
|
661
|
+
keywords = keywordify(keywords)
|
662
|
+
current_kws = kwattr_values(attribute_name)
|
663
|
+
new_kws = (current_kws + (keywords - current_kws)).join(" ")
|
664
|
+
set_attribute(attribute_name, new_kws)
|
383
665
|
self
|
384
666
|
end
|
385
667
|
|
386
|
-
|
387
|
-
#
|
388
|
-
#
|
389
|
-
#
|
390
|
-
#
|
668
|
+
# Add keywords to a Node's keyword attribute, regardless of
|
669
|
+
# duplication. Compare with {#kwattr_add}.
|
670
|
+
#
|
671
|
+
# A "keyword attribute" is a node attribute that contains a set
|
672
|
+
# of space-delimited values. Perhaps the most familiar example
|
673
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
674
|
+
# classes. But other keyword attributes exist, for instance
|
675
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
676
|
+
#
|
677
|
+
# @see #append_class
|
678
|
+
# @see #kwattr_values
|
679
|
+
# @see #kwattr_add
|
680
|
+
# @see #kwattr_remove
|
391
681
|
#
|
392
|
-
#
|
393
|
-
#
|
394
|
-
|
395
|
-
|
682
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
683
|
+
#
|
684
|
+
# @param keywords [String, Array<String>]
|
685
|
+
#
|
686
|
+
# Keywords to be added to the attribute named
|
687
|
+
# +attribute_name+. May be a string containing
|
688
|
+
# whitespace-delimited values, or an Array of String
|
689
|
+
# values. All values passed in will be appended to the named
|
690
|
+
# attribute even if they are already present in the
|
691
|
+
# attribute. If the named attribute does not exist, it is
|
692
|
+
# created.
|
693
|
+
#
|
694
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
695
|
+
#
|
696
|
+
# @example Append "nofollow" to the +rel+ attribute.
|
697
|
+
# node # => <a></a>
|
698
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
|
699
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a> # duplicate added!
|
700
|
+
#
|
701
|
+
# @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
|
702
|
+
# node # => <a rel="nofollow"></a>
|
703
|
+
# node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
704
|
+
# # Note that "nofollow" is appended even though it is already present.
|
705
|
+
#
|
706
|
+
# @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
|
707
|
+
# node # => <a></a>
|
708
|
+
# node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
709
|
+
#
|
710
|
+
# @since v1.11.0
|
711
|
+
#
|
712
|
+
def kwattr_append(attribute_name, keywords)
|
713
|
+
keywords = keywordify(keywords)
|
714
|
+
current_kws = kwattr_values(attribute_name)
|
715
|
+
new_kws = (current_kws + keywords).join(" ")
|
716
|
+
set_attribute(attribute_name, new_kws)
|
396
717
|
self
|
397
718
|
end
|
398
719
|
|
399
|
-
|
400
|
-
#
|
401
|
-
#
|
402
|
-
# they are all removed.
|
720
|
+
# Remove keywords from a keyword attribute. Any matching
|
721
|
+
# keywords that exist in the named attribute are removed,
|
722
|
+
# including any multiple entries.
|
403
723
|
#
|
404
|
-
#
|
405
|
-
#
|
724
|
+
# If no keywords remain after this operation, or if +keywords+
|
725
|
+
# is +nil+, the attribute is deleted from the node.
|
406
726
|
#
|
407
|
-
#
|
408
|
-
#
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
727
|
+
# A "keyword attribute" is a node attribute that contains a set
|
728
|
+
# of space-delimited values. Perhaps the most familiar example
|
729
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
730
|
+
# classes. But other keyword attributes exist, for instance
|
731
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
732
|
+
#
|
733
|
+
# @see #remove_class
|
734
|
+
# @see #kwattr_values
|
735
|
+
# @see #kwattr_add
|
736
|
+
# @see #kwattr_append
|
737
|
+
#
|
738
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
739
|
+
#
|
740
|
+
# @param keywords [String, Array<String>]
|
741
|
+
#
|
742
|
+
# Keywords to be removed from the attribute named
|
743
|
+
# +attribute_name+. May be a string containing
|
744
|
+
# whitespace-delimited values, or an Array of String
|
745
|
+
# values. Any keywords present in the named attribute will be
|
746
|
+
# removed. If no keywords remain, or if +keywords+ is nil, the
|
747
|
+
# attribute is deleted.
|
748
|
+
#
|
749
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
750
|
+
#
|
751
|
+
# @example
|
752
|
+
# node # => <a rel="nofollow noreferrer">link</a>
|
753
|
+
# node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
|
754
|
+
# node.kwattr_remove("rel", "noreferrer") # => <a>link</a> # attribute is deleted when empty
|
755
|
+
#
|
756
|
+
# @since v1.11.0
|
757
|
+
#
|
758
|
+
def kwattr_remove(attribute_name, keywords)
|
759
|
+
if keywords.nil?
|
760
|
+
remove_attribute(attribute_name)
|
761
|
+
return self
|
762
|
+
end
|
763
|
+
|
764
|
+
keywords = keywordify(keywords)
|
765
|
+
current_kws = kwattr_values(attribute_name)
|
766
|
+
new_kws = current_kws - keywords
|
767
|
+
if new_kws.empty?
|
768
|
+
remove_attribute(attribute_name)
|
417
769
|
else
|
418
|
-
|
770
|
+
set_attribute(attribute_name, new_kws.join(" "))
|
419
771
|
end
|
420
772
|
self
|
421
773
|
end
|
422
774
|
|
423
|
-
###
|
424
|
-
# Remove the attribute named +name+
|
425
|
-
def remove_attribute name
|
426
|
-
attr = attributes[name].remove if key? name
|
427
|
-
clear_xpath_context if Nokogiri.jruby?
|
428
|
-
attr
|
429
|
-
end
|
430
775
|
alias :delete :remove_attribute
|
776
|
+
alias :get_attribute :[]
|
777
|
+
alias :attr :[]
|
778
|
+
alias :set_attribute :[]=
|
779
|
+
alias :has_attribute? :key?
|
780
|
+
|
781
|
+
# @!endgroup
|
431
782
|
|
432
783
|
###
|
433
784
|
# Returns true if this Node matches +selector+
|
434
|
-
def matches?
|
785
|
+
def matches?(selector)
|
435
786
|
ancestors.last.search(selector).include?(self)
|
436
787
|
end
|
437
788
|
|
438
789
|
###
|
439
790
|
# Create a DocumentFragment containing +tags+ that is relative to _this_
|
440
791
|
# context node.
|
441
|
-
def fragment
|
792
|
+
def fragment(tags)
|
442
793
|
type = document.html? ? Nokogiri::HTML : Nokogiri::XML
|
443
794
|
type::DocumentFragment.new(document, tags, self)
|
444
795
|
end
|
@@ -447,7 +798,7 @@ module Nokogiri
|
|
447
798
|
# Parse +string_or_io+ as a document fragment within the context of
|
448
799
|
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
|
449
800
|
# +string_or_io+.
|
450
|
-
def parse
|
801
|
+
def parse(string_or_io, options = nil)
|
451
802
|
##
|
452
803
|
# When the current node is unparented and not an element node, use the
|
453
804
|
# document as the parsing context instead. Otherwise, the in-context
|
@@ -470,30 +821,34 @@ module Nokogiri
|
|
470
821
|
|
471
822
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
472
823
|
|
473
|
-
|
474
|
-
#
|
824
|
+
# libxml2 does not obey the `recover` option after encountering errors during `in_context`
|
825
|
+
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
826
|
+
#
|
827
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
828
|
+
# would have been inherited from the context node won't be handled correctly. This hack was
|
829
|
+
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
830
|
+
# that's not easily prevented (or even detected).
|
831
|
+
#
|
832
|
+
# I think preferable behavior would be to either:
|
833
|
+
#
|
834
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
|
835
|
+
# b. don't recover, but raise a sensible exception
|
836
|
+
#
|
837
|
+
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
838
|
+
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
475
839
|
error_count = document.errors.length
|
476
840
|
node_set = in_context(contents, options.to_i)
|
477
|
-
if node_set.empty?
|
478
|
-
|
479
|
-
|
841
|
+
if (node_set.empty? && (document.errors.length > error_count))
|
842
|
+
if options.recover?
|
843
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse contents
|
844
|
+
node_set = fragment.children
|
845
|
+
else
|
846
|
+
raise document.errors[error_count]
|
847
|
+
end
|
480
848
|
end
|
481
849
|
node_set
|
482
850
|
end
|
483
851
|
|
484
|
-
####
|
485
|
-
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
|
486
|
-
def content= string
|
487
|
-
self.native_content = encode_special_chars(string.to_s)
|
488
|
-
end
|
489
|
-
|
490
|
-
###
|
491
|
-
# Set the parent Node for this Node
|
492
|
-
def parent= parent_node
|
493
|
-
parent_node.add_child(self)
|
494
|
-
parent_node
|
495
|
-
end
|
496
|
-
|
497
852
|
###
|
498
853
|
# Returns a Hash of +{prefix => value}+ for all namespaces on this
|
499
854
|
# node and its ancestors.
|
@@ -509,10 +864,11 @@ module Nokogiri
|
|
509
864
|
# default namespaces set on ancestor will NOT be, even if self
|
510
865
|
# has no explicit default namespace.
|
511
866
|
def namespaces
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
867
|
+
namespace_scopes.each_with_object({}) do |ns, hash|
|
868
|
+
prefix = ns.prefix
|
869
|
+
key = prefix ? "xmlns:#{prefix}" : "xmlns"
|
870
|
+
hash[key] = ns.href
|
871
|
+
end
|
516
872
|
end
|
517
873
|
|
518
874
|
# Returns true if this is a Comment
|
@@ -574,6 +930,7 @@ module Nokogiri
|
|
574
930
|
def element?
|
575
931
|
type == ELEMENT_NODE
|
576
932
|
end
|
933
|
+
|
577
934
|
alias :elem? :element?
|
578
935
|
|
579
936
|
###
|
@@ -584,7 +941,7 @@ module Nokogiri
|
|
584
941
|
end
|
585
942
|
|
586
943
|
# Get the inner_html for this node's Node#children
|
587
|
-
def inner_html
|
944
|
+
def inner_html(*args)
|
588
945
|
children.map { |x| x.to_html(*args) }.join
|
589
946
|
end
|
590
947
|
|
@@ -592,13 +949,13 @@ module Nokogiri
|
|
592
949
|
def css_path
|
593
950
|
path.split(/\//).map { |part|
|
594
951
|
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
595
|
-
}.compact.join(
|
952
|
+
}.compact.join(" > ")
|
596
953
|
end
|
597
954
|
|
598
955
|
###
|
599
956
|
# Get a list of ancestor Node for this Node. If +selector+ is given,
|
600
957
|
# the ancestors must match +selector+
|
601
|
-
def ancestors
|
958
|
+
def ancestors(selector = nil)
|
602
959
|
return NodeSet.new(document) unless respond_to?(:parent)
|
603
960
|
return NodeSet.new(document) unless parent
|
604
961
|
|
@@ -619,57 +976,38 @@ module Nokogiri
|
|
619
976
|
})
|
620
977
|
end
|
621
978
|
|
622
|
-
###
|
623
|
-
# Adds a default namespace supplied as a string +url+ href, to self.
|
624
|
-
# The consequence is as an xmlns attribute with supplied argument were
|
625
|
-
# present in parsed XML. A default namespace set with this method will
|
626
|
-
# now show up in #attributes, but when this node is serialized to XML an
|
627
|
-
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
628
|
-
def default_namespace= url
|
629
|
-
add_namespace_definition(nil, url)
|
630
|
-
end
|
631
|
-
alias :add_namespace :add_namespace_definition
|
632
|
-
|
633
|
-
###
|
634
|
-
# Set the default namespace on this node (as would be defined with an
|
635
|
-
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
636
|
-
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
637
|
-
# for this node. You probably want #default_namespace= instead, or perhaps
|
638
|
-
# #add_namespace_definition with a nil prefix argument.
|
639
|
-
def namespace= ns
|
640
|
-
return set_namespace(ns) unless ns
|
641
|
-
|
642
|
-
unless Nokogiri::XML::Namespace === ns
|
643
|
-
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
644
|
-
end
|
645
|
-
if ns.document != document
|
646
|
-
raise ArgumentError, 'namespace must be declared on the same document'
|
647
|
-
end
|
648
|
-
|
649
|
-
set_namespace ns
|
650
|
-
end
|
651
|
-
|
652
979
|
####
|
653
980
|
# Yields self and all children to +block+ recursively.
|
654
|
-
def traverse
|
655
|
-
children.each{|j| j.traverse(&block) }
|
981
|
+
def traverse(&block)
|
982
|
+
children.each { |j| j.traverse(&block) }
|
656
983
|
block.call(self)
|
657
984
|
end
|
658
985
|
|
659
986
|
###
|
660
987
|
# Accept a visitor. This method calls "visit" on +visitor+ with self.
|
661
|
-
def accept
|
988
|
+
def accept(visitor)
|
662
989
|
visitor.visit(self)
|
663
990
|
end
|
664
991
|
|
665
992
|
###
|
666
993
|
# Test to see if this Node is equal to +other+
|
667
|
-
def ==
|
994
|
+
def ==(other)
|
668
995
|
return false unless other
|
669
996
|
return false unless other.respond_to?(:pointer_id)
|
670
997
|
pointer_id == other.pointer_id
|
671
998
|
end
|
672
999
|
|
1000
|
+
###
|
1001
|
+
# Compare two Node objects with respect to their Document. Nodes from
|
1002
|
+
# different documents cannot be compared.
|
1003
|
+
def <=>(other)
|
1004
|
+
return nil unless other.is_a?(Nokogiri::XML::Node)
|
1005
|
+
return nil unless document == other.document
|
1006
|
+
compare other
|
1007
|
+
end
|
1008
|
+
|
1009
|
+
# @!group Serialization and Generating Output
|
1010
|
+
|
673
1011
|
###
|
674
1012
|
# Serialize Node using +options+. Save options can also be set using a
|
675
1013
|
# block. See SaveOptions.
|
@@ -684,17 +1022,17 @@ module Nokogiri
|
|
684
1022
|
# config.format.as_xml
|
685
1023
|
# end
|
686
1024
|
#
|
687
|
-
def serialize
|
1025
|
+
def serialize(*args, &block)
|
688
1026
|
options = args.first.is_a?(Hash) ? args.shift : {
|
689
|
-
:encoding
|
690
|
-
:save_with
|
1027
|
+
:encoding => args[0],
|
1028
|
+
:save_with => args[1],
|
691
1029
|
}
|
692
1030
|
|
693
1031
|
encoding = options[:encoding] || document.encoding
|
694
1032
|
options[:encoding] = encoding
|
695
1033
|
|
696
1034
|
outstring = String.new
|
697
|
-
outstring.force_encoding(Encoding.find(encoding ||
|
1035
|
+
outstring.force_encoding(Encoding.find(encoding || "utf-8"))
|
698
1036
|
io = StringIO.new(outstring)
|
699
1037
|
write_to io, options, &block
|
700
1038
|
io.string
|
@@ -707,7 +1045,7 @@ module Nokogiri
|
|
707
1045
|
#
|
708
1046
|
# See Node#write_to for a list of +options+. For formatted output,
|
709
1047
|
# use Node#to_xhtml instead.
|
710
|
-
def to_html
|
1048
|
+
def to_html(options = {})
|
711
1049
|
to_format SaveOptions::DEFAULT_HTML, options
|
712
1050
|
end
|
713
1051
|
|
@@ -717,7 +1055,7 @@ module Nokogiri
|
|
717
1055
|
# doc.to_xml(:indent => 5, :encoding => 'UTF-8')
|
718
1056
|
#
|
719
1057
|
# See Node#write_to for a list of +options+
|
720
|
-
def to_xml
|
1058
|
+
def to_xml(options = {})
|
721
1059
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
722
1060
|
serialize(options)
|
723
1061
|
end
|
@@ -728,7 +1066,7 @@ module Nokogiri
|
|
728
1066
|
# doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
|
729
1067
|
#
|
730
1068
|
# See Node#write_to for a list of +options+
|
731
|
-
def to_xhtml
|
1069
|
+
def to_xhtml(options = {})
|
732
1070
|
to_format SaveOptions::DEFAULT_XHTML, options
|
733
1071
|
end
|
734
1072
|
|
@@ -749,29 +1087,34 @@ module Nokogiri
|
|
749
1087
|
#
|
750
1088
|
# node.write_to(io, :indent_text => '-', :indent => 2)
|
751
1089
|
#
|
752
|
-
def write_to
|
753
|
-
options
|
754
|
-
encoding
|
1090
|
+
def write_to(io, *options)
|
1091
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
1092
|
+
encoding = options[:encoding] || options[0]
|
755
1093
|
if Nokogiri.jruby?
|
756
|
-
save_options
|
757
|
-
indent_times
|
1094
|
+
save_options = options[:save_with] || options[1]
|
1095
|
+
indent_times = options[:indent] || 0
|
758
1096
|
else
|
759
|
-
save_options
|
760
|
-
indent_times
|
1097
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
1098
|
+
indent_times = options[:indent] || 2
|
761
1099
|
end
|
762
|
-
indent_text
|
1100
|
+
indent_text = options[:indent_text] || " "
|
1101
|
+
|
1102
|
+
# Any string times 0 returns an empty string. Therefore, use the same
|
1103
|
+
# string instead of generating a new empty string for every node with
|
1104
|
+
# zero indentation.
|
1105
|
+
indentation = indent_times.zero? ? "" : (indent_text * indent_times)
|
763
1106
|
|
764
1107
|
config = SaveOptions.new(save_options.to_i)
|
765
1108
|
yield config if block_given?
|
766
1109
|
|
767
|
-
native_write_to(io, encoding,
|
1110
|
+
native_write_to(io, encoding, indentation, config.options)
|
768
1111
|
end
|
769
1112
|
|
770
1113
|
###
|
771
1114
|
# Write Node as HTML to +io+ with +options+
|
772
1115
|
#
|
773
1116
|
# See Node#write_to for a list of +options+
|
774
|
-
def write_html_to
|
1117
|
+
def write_html_to(io, options = {})
|
775
1118
|
write_format_to SaveOptions::DEFAULT_HTML, io, options
|
776
1119
|
end
|
777
1120
|
|
@@ -779,7 +1122,7 @@ module Nokogiri
|
|
779
1122
|
# Write Node as XHTML to +io+ with +options+
|
780
1123
|
#
|
781
1124
|
# See Node#write_to for a list of +options+
|
782
|
-
def write_xhtml_to
|
1125
|
+
def write_xhtml_to(io, options = {})
|
783
1126
|
write_format_to SaveOptions::DEFAULT_XHTML, io, options
|
784
1127
|
end
|
785
1128
|
|
@@ -789,52 +1132,66 @@ module Nokogiri
|
|
789
1132
|
# doc.write_xml_to io, :encoding => 'UTF-8'
|
790
1133
|
#
|
791
1134
|
# See Node#write_to for a list of options
|
792
|
-
def write_xml_to
|
1135
|
+
def write_xml_to(io, options = {})
|
793
1136
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
794
1137
|
write_to io, options
|
795
1138
|
end
|
796
1139
|
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
compare other
|
1140
|
+
def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
|
1141
|
+
c14n_root = self
|
1142
|
+
document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
|
1143
|
+
tn = node.is_a?(XML::Node) ? node : parent
|
1144
|
+
tn == c14n_root || tn.ancestors.include?(c14n_root)
|
1145
|
+
end
|
804
1146
|
end
|
805
1147
|
|
806
|
-
|
807
|
-
# Do xinclude substitution on the subtree below node. If given a block, a
|
808
|
-
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
809
|
-
# passed to it, allowing more convenient modification of the parser options.
|
810
|
-
def do_xinclude options = XML::ParseOptions::DEFAULT_XML
|
811
|
-
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
1148
|
+
# @!endgroup
|
812
1149
|
|
813
|
-
|
814
|
-
yield options if block_given?
|
1150
|
+
protected
|
815
1151
|
|
816
|
-
|
817
|
-
|
1152
|
+
def coerce(data)
|
1153
|
+
case data
|
1154
|
+
when XML::NodeSet
|
1155
|
+
return data
|
1156
|
+
when XML::DocumentFragment
|
1157
|
+
return data.children
|
1158
|
+
when String
|
1159
|
+
return fragment(data).children
|
1160
|
+
when Document, XML::Attr
|
1161
|
+
# unacceptable
|
1162
|
+
when XML::Node
|
1163
|
+
return data
|
1164
|
+
end
|
1165
|
+
|
1166
|
+
raise ArgumentError, <<-EOERR
|
1167
|
+
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1168
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1169
|
+
EOERR
|
818
1170
|
end
|
819
1171
|
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
1172
|
+
private
|
1173
|
+
|
1174
|
+
def keywordify(keywords)
|
1175
|
+
case keywords
|
1176
|
+
when Enumerable
|
1177
|
+
return keywords
|
1178
|
+
when String
|
1179
|
+
return keywords.scan(/\S+/)
|
1180
|
+
else
|
1181
|
+
raise ArgumentError.new("Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}")
|
825
1182
|
end
|
826
1183
|
end
|
827
1184
|
|
828
|
-
|
1185
|
+
def add_sibling(next_or_previous, node_or_tags)
|
1186
|
+
raise("Cannot add sibling to a node with no parent") unless parent
|
829
1187
|
|
830
|
-
def add_sibling next_or_previous, node_or_tags
|
831
1188
|
impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
|
832
|
-
iter = (next_or_previous == :next) ? :reverse_each
|
1189
|
+
iter = (next_or_previous == :next) ? :reverse_each : :each
|
833
1190
|
|
834
|
-
node_or_tags = coerce
|
1191
|
+
node_or_tags = parent.coerce(node_or_tags)
|
835
1192
|
if node_or_tags.is_a?(XML::NodeSet)
|
836
1193
|
if text?
|
837
|
-
pivot = Nokogiri::XML::Node.new
|
1194
|
+
pivot = Nokogiri::XML::Node.new "dummy", document
|
838
1195
|
send impl, pivot
|
839
1196
|
else
|
840
1197
|
pivot = self
|
@@ -847,17 +1204,18 @@ module Nokogiri
|
|
847
1204
|
node_or_tags
|
848
1205
|
end
|
849
1206
|
|
850
|
-
|
851
|
-
|
852
|
-
|
1207
|
+
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
|
1208
|
+
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1209
|
+
|
1210
|
+
def to_format(save_option, options)
|
1211
|
+
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
853
1212
|
|
854
1213
|
options[:save_with] = save_option unless options[:save_with]
|
855
1214
|
serialize(options)
|
856
1215
|
end
|
857
1216
|
|
858
|
-
def write_format_to
|
859
|
-
|
860
|
-
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
1217
|
+
def write_format_to(save_option, io, options)
|
1218
|
+
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
861
1219
|
|
862
1220
|
options[:save_with] ||= save_option
|
863
1221
|
write_to io, options
|
@@ -867,30 +1225,10 @@ module Nokogiri
|
|
867
1225
|
[:name, :namespace, :attribute_nodes, :children]
|
868
1226
|
end
|
869
1227
|
|
870
|
-
def coerce data # :nodoc:
|
871
|
-
case data
|
872
|
-
when XML::NodeSet
|
873
|
-
return data
|
874
|
-
when XML::DocumentFragment
|
875
|
-
return data.children
|
876
|
-
when String
|
877
|
-
return fragment(data).children
|
878
|
-
when Document, XML::Attr
|
879
|
-
# unacceptable
|
880
|
-
when XML::Node
|
881
|
-
return data
|
882
|
-
end
|
883
|
-
|
884
|
-
raise ArgumentError, <<-EOERR
|
885
|
-
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
886
|
-
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
887
|
-
EOERR
|
888
|
-
end
|
889
|
-
|
890
1228
|
# @private
|
891
|
-
IMPLIED_XPATH_CONTEXTS = [
|
1229
|
+
IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
|
892
1230
|
|
893
|
-
def add_child_node_and_reparent_attrs
|
1231
|
+
def add_child_node_and_reparent_attrs(node)
|
894
1232
|
add_child_node node
|
895
1233
|
node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
|
896
1234
|
attr_node.remove
|