nokogiri 1.18.0.rc1-arm-linux-gnu
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
@@ -0,0 +1,1650 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "stringio"
|
5
|
+
|
6
|
+
module Nokogiri
|
7
|
+
module XML
|
8
|
+
# Nokogiri::XML::Node is the primary API you'll use to interact with your Document.
|
9
|
+
#
|
10
|
+
# == Attributes
|
11
|
+
#
|
12
|
+
# A Nokogiri::XML::Node may be treated similarly to a hash with regard to attributes. For
|
13
|
+
# example:
|
14
|
+
#
|
15
|
+
# node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
|
16
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
|
17
|
+
# node['href'] # => "#foo"
|
18
|
+
# node.keys # => ["href", "id"]
|
19
|
+
# node.values # => ["#foo", "link"]
|
20
|
+
# node['class'] = 'green' # => "green"
|
21
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
|
22
|
+
#
|
23
|
+
# See the method group entitled Node@Working+With+Node+Attributes for the full set of methods.
|
24
|
+
#
|
25
|
+
# == Navigation
|
26
|
+
#
|
27
|
+
# Nokogiri::XML::Node also has methods that let you move around your tree:
|
28
|
+
#
|
29
|
+
# [#parent, #children, #next, #previous]
|
30
|
+
# Navigate up, down, or through siblings.
|
31
|
+
#
|
32
|
+
# See the method group entitled Node@Traversing+Document+Structure for the full set of methods.
|
33
|
+
#
|
34
|
+
# == Serialization
|
35
|
+
#
|
36
|
+
# When printing or otherwise emitting a document or a node (and its subtree), there are a few
|
37
|
+
# methods you might want to use:
|
38
|
+
#
|
39
|
+
# [#content, #text, #inner_text, #to_str]
|
40
|
+
# These methods will all **emit plaintext**,
|
41
|
+
# meaning that entities will be replaced (e.g., +<+ will be replaced with +<+), meaning
|
42
|
+
# that any sanitizing will likely be un-done in the output.
|
43
|
+
#
|
44
|
+
# [#to_s, #to_xml, #to_html, #inner_html]
|
45
|
+
# These methods will all **emit properly-escaped markup**, meaning that it's suitable for
|
46
|
+
# consumption by browsers, parsers, etc.
|
47
|
+
#
|
48
|
+
# See the method group entitled Node@Serialization+and+Generating+Output for the full set of methods.
|
49
|
+
#
|
50
|
+
# == Searching
|
51
|
+
#
|
52
|
+
# You may search this node's subtree using methods like #xpath and #css.
|
53
|
+
#
|
54
|
+
# See the method group entitled Node@Searching+via+XPath+or+CSS+Queries for the full set of methods.
|
55
|
+
#
|
56
|
+
class Node
|
57
|
+
include Nokogiri::XML::PP::Node
|
58
|
+
include Nokogiri::XML::Searchable
|
59
|
+
include Nokogiri::ClassResolver
|
60
|
+
include Enumerable
|
61
|
+
|
62
|
+
# Element node type, see Nokogiri::XML::Node#element?
|
63
|
+
ELEMENT_NODE = 1
|
64
|
+
# Attribute node type
|
65
|
+
ATTRIBUTE_NODE = 2
|
66
|
+
# Text node type, see Nokogiri::XML::Node#text?
|
67
|
+
TEXT_NODE = 3
|
68
|
+
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
69
|
+
CDATA_SECTION_NODE = 4
|
70
|
+
# Entity reference node type
|
71
|
+
ENTITY_REF_NODE = 5
|
72
|
+
# Entity node type
|
73
|
+
ENTITY_NODE = 6
|
74
|
+
# PI node type
|
75
|
+
PI_NODE = 7
|
76
|
+
# Comment node type, see Nokogiri::XML::Node#comment?
|
77
|
+
COMMENT_NODE = 8
|
78
|
+
# Document node type, see Nokogiri::XML::Node#xml?
|
79
|
+
DOCUMENT_NODE = 9
|
80
|
+
# Document type node type
|
81
|
+
DOCUMENT_TYPE_NODE = 10
|
82
|
+
# Document fragment node type
|
83
|
+
DOCUMENT_FRAG_NODE = 11
|
84
|
+
# Notation node type
|
85
|
+
NOTATION_NODE = 12
|
86
|
+
# HTML document node type, see Nokogiri::XML::Node#html?
|
87
|
+
HTML_DOCUMENT_NODE = 13
|
88
|
+
# DTD node type
|
89
|
+
DTD_NODE = 14
|
90
|
+
# Element declaration type
|
91
|
+
ELEMENT_DECL = 15
|
92
|
+
# Attribute declaration type
|
93
|
+
ATTRIBUTE_DECL = 16
|
94
|
+
# Entity declaration type
|
95
|
+
ENTITY_DECL = 17
|
96
|
+
# Namespace declaration type
|
97
|
+
NAMESPACE_DECL = 18
|
98
|
+
# XInclude start type
|
99
|
+
XINCLUDE_START = 19
|
100
|
+
# XInclude end type
|
101
|
+
XINCLUDE_END = 20
|
102
|
+
# DOCB document node type
|
103
|
+
DOCB_DOCUMENT_NODE = 21
|
104
|
+
|
105
|
+
#
|
106
|
+
# :call-seq:
|
107
|
+
# new(name, document) -> Nokogiri::XML::Node
|
108
|
+
# new(name, document) { |node| ... } -> Nokogiri::XML::Node
|
109
|
+
#
|
110
|
+
# Create a new node with +name+ that belongs to +document+.
|
111
|
+
#
|
112
|
+
# If you intend to add a node to a document tree, it's likely that you will prefer one of the
|
113
|
+
# Nokogiri::XML::Node methods like #add_child, #add_next_sibling, #replace, etc. which will
|
114
|
+
# both create an element (or subtree) and place it in the document tree.
|
115
|
+
#
|
116
|
+
# Another alternative, if you are concerned about performance, is
|
117
|
+
# Nokogiri::XML::Document#create_element which accepts additional arguments for contents or
|
118
|
+
# attributes but (like this method) avoids parsing markup.
|
119
|
+
#
|
120
|
+
# [Parameters]
|
121
|
+
# - +name+ (String)
|
122
|
+
# - +document+ (Nokogiri::XML::Document) The document to which the the returned node will belong.
|
123
|
+
# [Yields] Nokogiri::XML::Node
|
124
|
+
# [Returns] Nokogiri::XML::Node
|
125
|
+
#
|
126
|
+
def initialize(name, document)
|
127
|
+
# This is intentionally empty, and sets the method signature for subclasses.
|
128
|
+
end
|
129
|
+
|
130
|
+
#
|
131
|
+
# :call-seq:
|
132
|
+
# dup → Nokogiri::XML::Node
|
133
|
+
# dup(level) → Nokogiri::XML::Node
|
134
|
+
# dup(level, new_parent_doc) → Nokogiri::XML::Node
|
135
|
+
#
|
136
|
+
# Duplicate this node.
|
137
|
+
#
|
138
|
+
# [Parameters]
|
139
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
140
|
+
# - +new_parent_doc+ (optional Nokogiri::XML::Document)
|
141
|
+
# The new node's parent Document. Defaults to the the Document of the current node.
|
142
|
+
# [Returns] The new Nokogiri::XML::Node
|
143
|
+
#
|
144
|
+
def dup(level = 1, new_parent_doc = document)
|
145
|
+
super().initialize_copy_with_args(self, level, new_parent_doc)
|
146
|
+
end
|
147
|
+
|
148
|
+
#
|
149
|
+
# :call-seq:
|
150
|
+
# clone → Nokogiri::XML::Node
|
151
|
+
# clone(level) → Nokogiri::XML::Node
|
152
|
+
# clone(level, new_parent_doc) → Nokogiri::XML::Node
|
153
|
+
#
|
154
|
+
# Clone this node.
|
155
|
+
#
|
156
|
+
# [Parameters]
|
157
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
158
|
+
# - +new_parent_doc+
|
159
|
+
# The new node's parent Document. Defaults to the the Document of the current node.
|
160
|
+
# [Returns] The new Nokogiri::XML::Node
|
161
|
+
#
|
162
|
+
def clone(level = 1, new_parent_doc = document)
|
163
|
+
super().initialize_copy_with_args(self, level, new_parent_doc)
|
164
|
+
end
|
165
|
+
|
166
|
+
###
|
167
|
+
# Decorate this node with the decorators set up in this node's Document
|
168
|
+
def decorate!
|
169
|
+
document.decorate(self)
|
170
|
+
end
|
171
|
+
|
172
|
+
# :section: Manipulating Document Structure
|
173
|
+
|
174
|
+
###
|
175
|
+
# Add +node_or_tags+ as a child of this Node.
|
176
|
+
#
|
177
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
178
|
+
# containing markup.
|
179
|
+
#
|
180
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
181
|
+
# a DocumentFragment, NodeSet, or String).
|
182
|
+
#
|
183
|
+
# Also see related method +<<+.
|
184
|
+
def add_child(node_or_tags)
|
185
|
+
node_or_tags = coerce(node_or_tags)
|
186
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
187
|
+
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
|
188
|
+
else
|
189
|
+
add_child_node_and_reparent_attrs(node_or_tags)
|
190
|
+
end
|
191
|
+
node_or_tags
|
192
|
+
end
|
193
|
+
|
194
|
+
###
|
195
|
+
# Add +node_or_tags+ as the first child of this Node.
|
196
|
+
#
|
197
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
198
|
+
# containing markup.
|
199
|
+
#
|
200
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
201
|
+
# a DocumentFragment, NodeSet, or String).
|
202
|
+
#
|
203
|
+
# Also see related method +add_child+.
|
204
|
+
def prepend_child(node_or_tags)
|
205
|
+
if (first = children.first)
|
206
|
+
# Mimic the error add_child would raise.
|
207
|
+
raise "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
208
|
+
|
209
|
+
first.__send__(:add_sibling, :previous, node_or_tags)
|
210
|
+
else
|
211
|
+
add_child(node_or_tags)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
# :call-seq:
|
216
|
+
# wrap(markup) -> self
|
217
|
+
# wrap(node) -> self
|
218
|
+
#
|
219
|
+
# Wrap this Node with the node parsed from +markup+ or a dup of the +node+.
|
220
|
+
#
|
221
|
+
# [Parameters]
|
222
|
+
# - *markup* (String)
|
223
|
+
# Markup that is parsed and used as the wrapper. This node's parent, if it exists, is used
|
224
|
+
# as the context node for parsing; otherwise the associated document is used. If the parsed
|
225
|
+
# fragment has multiple roots, the first root node is used as the wrapper.
|
226
|
+
# - *node* (Nokogiri::XML::Node)
|
227
|
+
# An element that is `#dup`ed and used as the wrapper.
|
228
|
+
#
|
229
|
+
# [Returns] +self+, to support chaining.
|
230
|
+
#
|
231
|
+
# Also see NodeSet#wrap
|
232
|
+
#
|
233
|
+
# *Example* with a +String+ argument:
|
234
|
+
#
|
235
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
236
|
+
# <html><body>
|
237
|
+
# <a>asdf</a>
|
238
|
+
# </body></html>
|
239
|
+
# HTML
|
240
|
+
# doc.at_css("a").wrap("<div></div>")
|
241
|
+
# doc.to_html
|
242
|
+
# # => <html><head></head><body>
|
243
|
+
# # <div><a>asdf</a></div>
|
244
|
+
# # </body></html>
|
245
|
+
#
|
246
|
+
# *Example* with a +Node+ argument:
|
247
|
+
#
|
248
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
249
|
+
# <html><body>
|
250
|
+
# <a>asdf</a>
|
251
|
+
# </body></html>
|
252
|
+
# HTML
|
253
|
+
# doc.at_css("a").wrap(doc.create_element("div"))
|
254
|
+
# doc.to_html
|
255
|
+
# # <html><head></head><body>
|
256
|
+
# # <div><a>asdf</a></div>
|
257
|
+
# # </body></html>
|
258
|
+
#
|
259
|
+
def wrap(node_or_tags)
|
260
|
+
case node_or_tags
|
261
|
+
when String
|
262
|
+
context_node = parent || document
|
263
|
+
new_parent = context_node.coerce(node_or_tags).first
|
264
|
+
if new_parent.nil?
|
265
|
+
raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
|
266
|
+
end
|
267
|
+
when Node
|
268
|
+
new_parent = node_or_tags.dup
|
269
|
+
else
|
270
|
+
raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
|
271
|
+
end
|
272
|
+
|
273
|
+
if parent
|
274
|
+
add_next_sibling(new_parent)
|
275
|
+
else
|
276
|
+
new_parent.unlink
|
277
|
+
end
|
278
|
+
new_parent.add_child(self)
|
279
|
+
|
280
|
+
self
|
281
|
+
end
|
282
|
+
|
283
|
+
###
|
284
|
+
# Add +node_or_tags+ as a child of this Node.
|
285
|
+
#
|
286
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
287
|
+
# containing markup.
|
288
|
+
#
|
289
|
+
# Returns +self+, to support chaining of calls (e.g., root << child1 << child2)
|
290
|
+
#
|
291
|
+
# Also see related method +add_child+.
|
292
|
+
def <<(node_or_tags)
|
293
|
+
add_child(node_or_tags)
|
294
|
+
self
|
295
|
+
end
|
296
|
+
|
297
|
+
###
|
298
|
+
# Insert +node_or_tags+ before this Node (as a sibling).
|
299
|
+
#
|
300
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
301
|
+
# containing markup.
|
302
|
+
#
|
303
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
304
|
+
# a DocumentFragment, NodeSet, or String).
|
305
|
+
#
|
306
|
+
# Also see related method +before+.
|
307
|
+
def add_previous_sibling(node_or_tags)
|
308
|
+
raise ArgumentError,
|
309
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
310
|
+
|
311
|
+
add_sibling(:previous, node_or_tags)
|
312
|
+
end
|
313
|
+
|
314
|
+
###
|
315
|
+
# Insert +node_or_tags+ after this Node (as a sibling).
|
316
|
+
#
|
317
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
318
|
+
# containing markup.
|
319
|
+
#
|
320
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
321
|
+
# a DocumentFragment, NodeSet, or String).
|
322
|
+
#
|
323
|
+
# Also see related method +after+.
|
324
|
+
def add_next_sibling(node_or_tags)
|
325
|
+
raise ArgumentError,
|
326
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
327
|
+
|
328
|
+
add_sibling(:next, node_or_tags)
|
329
|
+
end
|
330
|
+
|
331
|
+
####
|
332
|
+
# Insert +node_or_tags+ before this node (as a sibling).
|
333
|
+
#
|
334
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
335
|
+
# containing markup.
|
336
|
+
#
|
337
|
+
# Returns +self+, to support chaining of calls.
|
338
|
+
#
|
339
|
+
# Also see related method +add_previous_sibling+.
|
340
|
+
def before(node_or_tags)
|
341
|
+
add_previous_sibling(node_or_tags)
|
342
|
+
self
|
343
|
+
end
|
344
|
+
|
345
|
+
####
|
346
|
+
# Insert +node_or_tags+ after this node (as a sibling).
|
347
|
+
#
|
348
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
|
349
|
+
# containing markup.
|
350
|
+
#
|
351
|
+
# Returns +self+, to support chaining of calls.
|
352
|
+
#
|
353
|
+
# Also see related method +add_next_sibling+.
|
354
|
+
def after(node_or_tags)
|
355
|
+
add_next_sibling(node_or_tags)
|
356
|
+
self
|
357
|
+
end
|
358
|
+
|
359
|
+
####
|
360
|
+
# Set the content for this Node to +node_or_tags+.
|
361
|
+
#
|
362
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
|
363
|
+
# containing markup.
|
364
|
+
#
|
365
|
+
# ⚠ Please note that despite the name, this method will *not* always parse a String argument
|
366
|
+
# as HTML. A String argument will be parsed with the +DocumentFragment+ parser related to this
|
367
|
+
# node's document.
|
368
|
+
#
|
369
|
+
# For example, if the document is an HTML4::Document then the string will be parsed as HTML4
|
370
|
+
# using HTML4::DocumentFragment; but if the document is an XML::Document then it will
|
371
|
+
# parse the string as XML using XML::DocumentFragment.
|
372
|
+
#
|
373
|
+
# Also see related method +children=+
|
374
|
+
def inner_html=(node_or_tags)
|
375
|
+
self.children = node_or_tags
|
376
|
+
end
|
377
|
+
|
378
|
+
####
|
379
|
+
# Set the content for this Node +node_or_tags+
|
380
|
+
#
|
381
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
|
382
|
+
# containing markup.
|
383
|
+
#
|
384
|
+
# Also see related method +inner_html=+
|
385
|
+
def children=(node_or_tags)
|
386
|
+
node_or_tags = coerce(node_or_tags)
|
387
|
+
children.unlink
|
388
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
389
|
+
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
|
390
|
+
else
|
391
|
+
add_child_node_and_reparent_attrs(node_or_tags)
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
####
|
396
|
+
# Replace this Node with +node_or_tags+.
|
397
|
+
#
|
398
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
399
|
+
# containing markup.
|
400
|
+
#
|
401
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
402
|
+
# a DocumentFragment, NodeSet, or String).
|
403
|
+
#
|
404
|
+
# Also see related method +swap+.
|
405
|
+
def replace(node_or_tags)
|
406
|
+
raise("Cannot replace a node with no parent") unless parent
|
407
|
+
|
408
|
+
# We cannot replace a text node directly, otherwise libxml will return
|
409
|
+
# an internal error at parser.c:13031, I don't know exactly why
|
410
|
+
# libxml is trying to find a parent node that is an element or document
|
411
|
+
# so I can't tell if this is bug in libxml or not. issue #775.
|
412
|
+
if text?
|
413
|
+
replacee = Nokogiri::XML::Node.new("dummy", document)
|
414
|
+
add_previous_sibling_node(replacee)
|
415
|
+
unlink
|
416
|
+
return replacee.replace(node_or_tags)
|
417
|
+
end
|
418
|
+
|
419
|
+
node_or_tags = parent.coerce(node_or_tags)
|
420
|
+
|
421
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
422
|
+
node_or_tags.each { |n| add_previous_sibling(n) }
|
423
|
+
unlink
|
424
|
+
else
|
425
|
+
replace_node(node_or_tags)
|
426
|
+
end
|
427
|
+
node_or_tags
|
428
|
+
end
|
429
|
+
|
430
|
+
####
|
431
|
+
# Swap this Node for +node_or_tags+
|
432
|
+
#
|
433
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
434
|
+
# Containing markup.
|
435
|
+
#
|
436
|
+
# Returns self, to support chaining of calls.
|
437
|
+
#
|
438
|
+
# Also see related method +replace+.
|
439
|
+
def swap(node_or_tags)
|
440
|
+
replace(node_or_tags)
|
441
|
+
self
|
442
|
+
end
|
443
|
+
|
444
|
+
####
|
445
|
+
# call-seq:
|
446
|
+
# content=(input)
|
447
|
+
#
|
448
|
+
# Set the content of this node to +input+.
|
449
|
+
#
|
450
|
+
# [Parameters]
|
451
|
+
# - +input+ (String) The new content for this node. Input is considered to be raw content, and
|
452
|
+
# so will be entity-escaped in the final DOM string.
|
453
|
+
#
|
454
|
+
# [Example]
|
455
|
+
# Note how entities are handled:
|
456
|
+
#
|
457
|
+
# doc = Nokogiri::HTML::Document.parse(<<~HTML)
|
458
|
+
# <html>
|
459
|
+
# <body>
|
460
|
+
# <div id="first">asdf</div>
|
461
|
+
# <div id="second">asdf</div>
|
462
|
+
# HTML
|
463
|
+
#
|
464
|
+
# text_node = doc.at_css("div#first").children.first
|
465
|
+
# div_node = doc.at_css("div#second")
|
466
|
+
#
|
467
|
+
# value = "You & Me"
|
468
|
+
#
|
469
|
+
# text_node.content = value
|
470
|
+
# div_node.content = value
|
471
|
+
#
|
472
|
+
# doc.css("div").to_html
|
473
|
+
# # => "<div id=\"first\">You &amp; Me</div>
|
474
|
+
# # <div id=\"second\">You &amp; Me</div>"
|
475
|
+
#
|
476
|
+
# For content that is already entity-escaped, use CGI::unescapeHTML to decode it:
|
477
|
+
#
|
478
|
+
# text_node.content = CGI::unescapeHTML(value)
|
479
|
+
# div_node.content = CGI::unescapeHTML(value)
|
480
|
+
#
|
481
|
+
# doc.css("div").to_html
|
482
|
+
# # => "<div id=\"first\">You & Me</div>
|
483
|
+
# # <div id=\"second\">You & Me</div>"
|
484
|
+
#
|
485
|
+
# See also: #native_content=
|
486
|
+
#
|
487
|
+
def content=(string)
|
488
|
+
self.native_content = encode_special_chars(string.to_s)
|
489
|
+
end
|
490
|
+
|
491
|
+
###
|
492
|
+
# Set the parent Node for this Node
|
493
|
+
def parent=(parent_node)
|
494
|
+
parent_node.add_child(self)
|
495
|
+
end
|
496
|
+
|
497
|
+
###
|
498
|
+
# Adds a default namespace supplied as a string +url+ href, to self.
|
499
|
+
# The consequence is as an xmlns attribute with supplied argument were
|
500
|
+
# present in parsed XML. A default namespace set with this method will
|
501
|
+
# now show up in #attributes, but when this node is serialized to XML an
|
502
|
+
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
503
|
+
def default_namespace=(url)
|
504
|
+
add_namespace_definition(nil, url)
|
505
|
+
end
|
506
|
+
|
507
|
+
###
|
508
|
+
# Set the default namespace on this node (as would be defined with an
|
509
|
+
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
510
|
+
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
511
|
+
# for this node. You probably want #default_namespace= instead, or perhaps
|
512
|
+
# #add_namespace_definition with a nil prefix argument.
|
513
|
+
def namespace=(ns)
|
514
|
+
return set_namespace(ns) unless ns
|
515
|
+
|
516
|
+
unless Nokogiri::XML::Namespace === ns
|
517
|
+
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
518
|
+
end
|
519
|
+
if ns.document != document
|
520
|
+
raise ArgumentError, "namespace must be declared on the same document"
|
521
|
+
end
|
522
|
+
|
523
|
+
set_namespace(ns)
|
524
|
+
end
|
525
|
+
|
526
|
+
###
|
527
|
+
# Do xinclude substitution on the subtree below node. If given a block, a
|
528
|
+
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
529
|
+
# passed to it, allowing more convenient modification of the parser options.
|
530
|
+
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
|
531
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
532
|
+
yield options if block_given?
|
533
|
+
|
534
|
+
# call c extension
|
535
|
+
process_xincludes(options.to_i)
|
536
|
+
end
|
537
|
+
|
538
|
+
alias_method :next, :next_sibling
|
539
|
+
alias_method :previous, :previous_sibling
|
540
|
+
alias_method :next=, :add_next_sibling
|
541
|
+
alias_method :previous=, :add_previous_sibling
|
542
|
+
alias_method :remove, :unlink
|
543
|
+
alias_method :name=, :node_name=
|
544
|
+
alias_method :add_namespace, :add_namespace_definition
|
545
|
+
|
546
|
+
# :section:
|
547
|
+
|
548
|
+
alias_method :inner_text, :content
|
549
|
+
alias_method :text, :content
|
550
|
+
alias_method :to_str, :content
|
551
|
+
alias_method :name, :node_name
|
552
|
+
alias_method :type, :node_type
|
553
|
+
alias_method :elements, :element_children
|
554
|
+
|
555
|
+
# :section: Working With Node Attributes
|
556
|
+
|
557
|
+
# :call-seq: [](name) → (String, nil)
|
558
|
+
#
|
559
|
+
# Fetch an attribute from this node.
|
560
|
+
#
|
561
|
+
# ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
|
562
|
+
# namespaced attributes, use #attribute_with_ns.
|
563
|
+
#
|
564
|
+
# [Returns] (String, nil) value of the attribute +name+, or +nil+ if no matching attribute exists
|
565
|
+
#
|
566
|
+
# *Example*
|
567
|
+
#
|
568
|
+
# doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
|
569
|
+
# child = doc.at_css("child")
|
570
|
+
# child["size"] # => "large"
|
571
|
+
# child["class"] # => "big wide tall"
|
572
|
+
#
|
573
|
+
# *Example:* Namespaced attributes will not be returned.
|
574
|
+
#
|
575
|
+
# ⚠ Note namespaced attributes may be accessed with #attribute or #attribute_with_ns
|
576
|
+
#
|
577
|
+
# doc = Nokogiri::XML(<<~EOF)
|
578
|
+
# <root xmlns:width='http://example.com/widths'>
|
579
|
+
# <child width:size='broad'/>
|
580
|
+
# </root>
|
581
|
+
# EOF
|
582
|
+
# doc.at_css("child")["size"] # => nil
|
583
|
+
# doc.at_css("child").attribute("size").value # => "broad"
|
584
|
+
# doc.at_css("child").attribute_with_ns("size", "http://example.com/widths").value
|
585
|
+
# # => "broad"
|
586
|
+
#
|
587
|
+
def [](name)
|
588
|
+
get(name.to_s)
|
589
|
+
end
|
590
|
+
|
591
|
+
# :call-seq: []=(name, value) → value
|
592
|
+
#
|
593
|
+
# Update the attribute +name+ to +value+, or create the attribute if it does not exist.
|
594
|
+
#
|
595
|
+
# ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
|
596
|
+
# namespaced attributes for update, use #attribute_with_ns. To add a namespaced attribute,
|
597
|
+
# see the example below.
|
598
|
+
#
|
599
|
+
# [Returns] +value+
|
600
|
+
#
|
601
|
+
# *Example*
|
602
|
+
#
|
603
|
+
# doc = Nokogiri::XML("<root><child/></root>")
|
604
|
+
# child = doc.at_css("child")
|
605
|
+
# child["size"] = "broad"
|
606
|
+
# child.to_html
|
607
|
+
# # => "<child size=\"broad\"></child>"
|
608
|
+
#
|
609
|
+
# *Example:* Add a namespaced attribute.
|
610
|
+
#
|
611
|
+
# doc = Nokogiri::XML(<<~EOF)
|
612
|
+
# <root xmlns:width='http://example.com/widths'>
|
613
|
+
# <child/>
|
614
|
+
# </root>
|
615
|
+
# EOF
|
616
|
+
# child = doc.at_css("child")
|
617
|
+
# child["size"] = "broad"
|
618
|
+
# ns = doc.root.namespace_definitions.find { |ns| ns.prefix == "width" }
|
619
|
+
# child.attribute("size").namespace = ns
|
620
|
+
# doc.to_html
|
621
|
+
# # => "<root xmlns:width=\"http://example.com/widths\">\n" +
|
622
|
+
# # " <child width:size=\"broad\"></child>\n" +
|
623
|
+
# # "</root>\n"
|
624
|
+
#
|
625
|
+
def []=(name, value)
|
626
|
+
set(name.to_s, value.to_s)
|
627
|
+
end
|
628
|
+
|
629
|
+
#
|
630
|
+
# :call-seq: attributes() → Hash<String ⇒ Nokogiri::XML::Attr>
|
631
|
+
#
|
632
|
+
# Fetch this node's attributes.
|
633
|
+
#
|
634
|
+
# ⚠ Because the keys do not include any namespace information for the attribute, in case of a
|
635
|
+
# simple name collision, not all attributes will be returned. In this case, you will need to
|
636
|
+
# use #attribute_nodes.
|
637
|
+
#
|
638
|
+
# [Returns]
|
639
|
+
# Hash containing attributes belonging to +self+. The hash keys are String attribute
|
640
|
+
# names (without the namespace), and the hash values are Nokogiri::XML::Attr.
|
641
|
+
#
|
642
|
+
# *Example* with no namespaces:
|
643
|
+
#
|
644
|
+
# doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
|
645
|
+
# doc.at_css("child").attributes
|
646
|
+
# # => {"size"=>#(Attr:0x550 { name = "size", value = "large" }),
|
647
|
+
# # "class"=>#(Attr:0x564 { name = "class", value = "big wide tall" })}
|
648
|
+
#
|
649
|
+
# *Example* with a namespace:
|
650
|
+
#
|
651
|
+
# doc = Nokogiri::XML("<root xmlns:desc='http://example.com/sizes'><child desc:size='large'/></root>")
|
652
|
+
# doc.at_css("child").attributes
|
653
|
+
# # => {"size"=>
|
654
|
+
# # #(Attr:0x550 {
|
655
|
+
# # name = "size",
|
656
|
+
# # namespace = #(Namespace:0x564 {
|
657
|
+
# # prefix = "desc",
|
658
|
+
# # href = "http://example.com/sizes"
|
659
|
+
# # }),
|
660
|
+
# # value = "large"
|
661
|
+
# # })}
|
662
|
+
#
|
663
|
+
# *Example* with an attribute name collision:
|
664
|
+
#
|
665
|
+
# ⚠ Note that only one of the attributes is returned in the Hash.
|
666
|
+
#
|
667
|
+
# doc = Nokogiri::XML(<<~EOF)
|
668
|
+
# <root xmlns:width='http://example.com/widths'
|
669
|
+
# xmlns:height='http://example.com/heights'>
|
670
|
+
# <child width:size='broad' height:size='tall'/>
|
671
|
+
# </root>
|
672
|
+
# EOF
|
673
|
+
# doc.at_css("child").attributes
|
674
|
+
# # => {"size"=>
|
675
|
+
# # #(Attr:0x550 {
|
676
|
+
# # name = "size",
|
677
|
+
# # namespace = #(Namespace:0x564 {
|
678
|
+
# # prefix = "height",
|
679
|
+
# # href = "http://example.com/heights"
|
680
|
+
# # }),
|
681
|
+
# # value = "tall"
|
682
|
+
# # })}
|
683
|
+
#
|
684
|
+
def attributes
|
685
|
+
attribute_nodes.each_with_object({}) do |node, hash|
|
686
|
+
hash[node.node_name] = node
|
687
|
+
end
|
688
|
+
end
|
689
|
+
|
690
|
+
###
|
691
|
+
# Get the attribute values for this Node.
|
692
|
+
def values
|
693
|
+
attribute_nodes.map(&:value)
|
694
|
+
end
|
695
|
+
|
696
|
+
###
|
697
|
+
# Does this Node's attributes include <value>
|
698
|
+
def value?(value)
|
699
|
+
values.include?(value)
|
700
|
+
end
|
701
|
+
|
702
|
+
###
|
703
|
+
# Get the attribute names for this Node.
|
704
|
+
def keys
|
705
|
+
attribute_nodes.map(&:node_name)
|
706
|
+
end
|
707
|
+
|
708
|
+
###
|
709
|
+
# Iterate over each attribute name and value pair for this Node.
|
710
|
+
def each
|
711
|
+
attribute_nodes.each do |node|
|
712
|
+
yield [node.node_name, node.value]
|
713
|
+
end
|
714
|
+
end
|
715
|
+
|
716
|
+
###
|
717
|
+
# Remove the attribute named +name+
|
718
|
+
def remove_attribute(name)
|
719
|
+
attr = attributes[name].remove if key?(name)
|
720
|
+
clear_xpath_context if Nokogiri.jruby?
|
721
|
+
attr
|
722
|
+
end
|
723
|
+
|
724
|
+
#
|
725
|
+
# :call-seq: classes() → Array<String>
|
726
|
+
#
|
727
|
+
# Fetch CSS class names of a Node.
|
728
|
+
#
|
729
|
+
# This is a convenience function and is equivalent to:
|
730
|
+
#
|
731
|
+
# node.kwattr_values("class")
|
732
|
+
#
|
733
|
+
# See related: #kwattr_values, #add_class, #append_class, #remove_class
|
734
|
+
#
|
735
|
+
# [Returns]
|
736
|
+
# The CSS classes (Array of String) present in the Node's "class" attribute. If the
|
737
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
738
|
+
#
|
739
|
+
# *Example*
|
740
|
+
#
|
741
|
+
# node # => <div class="section title header"></div>
|
742
|
+
# node.classes # => ["section", "title", "header"]
|
743
|
+
#
|
744
|
+
def classes
|
745
|
+
kwattr_values("class")
|
746
|
+
end
|
747
|
+
|
748
|
+
#
|
749
|
+
# :call-seq: add_class(names) → self
|
750
|
+
#
|
751
|
+
# Ensure HTML CSS classes are present on +self+. Any CSS classes in +names+ that already exist
|
752
|
+
# in the "class" attribute are _not_ added. Note that any existing duplicates in the
|
753
|
+
# "class" attribute are not removed. Compare with #append_class.
|
754
|
+
#
|
755
|
+
# This is a convenience function and is equivalent to:
|
756
|
+
#
|
757
|
+
# node.kwattr_add("class", names)
|
758
|
+
#
|
759
|
+
# See related: #kwattr_add, #classes, #append_class, #remove_class
|
760
|
+
#
|
761
|
+
# [Parameters]
|
762
|
+
# - +names+ (String, Array<String>)
|
763
|
+
#
|
764
|
+
# CSS class names to be added to the Node's "class" attribute. May be a string containing
|
765
|
+
# whitespace-delimited names, or an Array of String names. Any class names already present
|
766
|
+
# will not be added. Any class names not present will be added. If no "class" attribute
|
767
|
+
# exists, one is created.
|
768
|
+
#
|
769
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
770
|
+
#
|
771
|
+
# *Example:* Ensure that the node has CSS class "section"
|
772
|
+
#
|
773
|
+
# node # => <div></div>
|
774
|
+
# node.add_class("section") # => <div class="section"></div>
|
775
|
+
# node.add_class("section") # => <div class="section"></div> # duplicate not added
|
776
|
+
#
|
777
|
+
# *Example:* Ensure that the node has CSS classes "section" and "header", via a String argument
|
778
|
+
#
|
779
|
+
# Note that the CSS class "section" is not added because it is already present.
|
780
|
+
# Note also that the pre-existing duplicate CSS class "section" is not removed.
|
781
|
+
#
|
782
|
+
# node # => <div class="section section"></div>
|
783
|
+
# node.add_class("section header") # => <div class="section section header"></div>
|
784
|
+
#
|
785
|
+
# *Example:* Ensure that the node has CSS classes "section" and "header", via an Array argument
|
786
|
+
#
|
787
|
+
# node # => <div></div>
|
788
|
+
# node.add_class(["section", "header"]) # => <div class="section header"></div>
|
789
|
+
#
|
790
|
+
def add_class(names)
|
791
|
+
kwattr_add("class", names)
|
792
|
+
end
|
793
|
+
|
794
|
+
#
|
795
|
+
# :call-seq: append_class(names) → self
|
796
|
+
#
|
797
|
+
# Add HTML CSS classes to +self+, regardless of duplication. Compare with #add_class.
|
798
|
+
#
|
799
|
+
# This is a convenience function and is equivalent to:
|
800
|
+
#
|
801
|
+
# node.kwattr_append("class", names)
|
802
|
+
#
|
803
|
+
# See related: #kwattr_append, #classes, #add_class, #remove_class
|
804
|
+
#
|
805
|
+
# [Parameters]
|
806
|
+
# - +names+ (String, Array<String>)
|
807
|
+
#
|
808
|
+
# CSS class names to be appended to the Node's "class" attribute. May be a string containing
|
809
|
+
# whitespace-delimited names, or an Array of String names. All class names passed in will be
|
810
|
+
# appended to the "class" attribute even if they are already present in the attribute
|
811
|
+
# value. If no "class" attribute exists, one is created.
|
812
|
+
#
|
813
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
814
|
+
#
|
815
|
+
# *Example:* Append "section" to the node's CSS "class" attribute
|
816
|
+
#
|
817
|
+
# node # => <div></div>
|
818
|
+
# node.append_class("section") # => <div class="section"></div>
|
819
|
+
# node.append_class("section") # => <div class="section section"></div> # duplicate added!
|
820
|
+
#
|
821
|
+
# *Example:* Append "section" and "header" to the noded's CSS "class" attribute, via a String argument
|
822
|
+
#
|
823
|
+
# Note that the CSS class "section" is appended even though it is already present
|
824
|
+
#
|
825
|
+
# node # => <div class="section section"></div>
|
826
|
+
# node.append_class("section header") # => <div class="section section section header"></div>
|
827
|
+
#
|
828
|
+
# *Example:* Append "section" and "header" to the node's CSS "class" attribute, via an Array argument
|
829
|
+
#
|
830
|
+
# node # => <div></div>
|
831
|
+
# node.append_class(["section", "header"]) # => <div class="section header"></div>
|
832
|
+
# node.append_class(["section", "header"]) # => <div class="section header section header"></div>
|
833
|
+
#
|
834
|
+
def append_class(names)
|
835
|
+
kwattr_append("class", names)
|
836
|
+
end
|
837
|
+
|
838
|
+
# :call-seq:
|
839
|
+
# remove_class(css_classes) → self
|
840
|
+
#
|
841
|
+
# Remove HTML CSS classes from this node. Any CSS class names in +css_classes+ that exist in
|
842
|
+
# this node's "class" attribute are removed, including any multiple entries.
|
843
|
+
#
|
844
|
+
# If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
|
845
|
+
# attribute is deleted from the node.
|
846
|
+
#
|
847
|
+
# This is a convenience function and is equivalent to:
|
848
|
+
#
|
849
|
+
# node.kwattr_remove("class", css_classes)
|
850
|
+
#
|
851
|
+
# Also see #kwattr_remove, #classes, #add_class, #append_class
|
852
|
+
#
|
853
|
+
# [Parameters]
|
854
|
+
# - +css_classes+ (String, Array<String>)
|
855
|
+
#
|
856
|
+
# CSS class names to be removed from the Node's
|
857
|
+
# "class" attribute. May be a string containing whitespace-delimited names, or an Array of
|
858
|
+
# String names. Any class names already present will be removed. If no CSS classes remain,
|
859
|
+
# the "class" attribute is deleted.
|
860
|
+
#
|
861
|
+
# [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
|
862
|
+
#
|
863
|
+
# *Example*: Deleting a CSS class
|
864
|
+
#
|
865
|
+
# Note that all instances of the class "section" are removed from the "class" attribute.
|
866
|
+
#
|
867
|
+
# node # => <div class="section header section"></div>
|
868
|
+
# node.remove_class("section") # => <div class="header"></div>
|
869
|
+
#
|
870
|
+
# *Example*: Deleting the only remaining CSS class
|
871
|
+
#
|
872
|
+
# Note that the attribute is removed once there are no remaining classes.
|
873
|
+
#
|
874
|
+
# node # => <div class="section"></div>
|
875
|
+
# node.remove_class("section") # => <div></div>
|
876
|
+
#
|
877
|
+
# *Example*: Deleting multiple CSS classes
|
878
|
+
#
|
879
|
+
# Note that the "class" attribute is deleted once it's empty.
|
880
|
+
#
|
881
|
+
# node # => <div class="section header float"></div>
|
882
|
+
# node.remove_class(["section", "float"]) # => <div class="header"></div>
|
883
|
+
#
|
884
|
+
def remove_class(names = nil)
|
885
|
+
kwattr_remove("class", names)
|
886
|
+
end
|
887
|
+
|
888
|
+
# :call-seq:
|
889
|
+
# kwattr_values(attribute_name) → Array<String>
|
890
|
+
#
|
891
|
+
# Fetch values from a keyword attribute of a Node.
|
892
|
+
#
|
893
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
894
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
895
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
896
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
897
|
+
#
|
898
|
+
# See also #classes, #kwattr_add, #kwattr_append, #kwattr_remove
|
899
|
+
#
|
900
|
+
# [Parameters]
|
901
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be inspected.
|
902
|
+
#
|
903
|
+
# [Returns]
|
904
|
+
# (Array<String>) The values present in the Node's +attribute_name+ attribute. If the
|
905
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
906
|
+
#
|
907
|
+
# *Example:*
|
908
|
+
#
|
909
|
+
# node # => <a rel="nofollow noopener external">link</a>
|
910
|
+
# node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
|
911
|
+
#
|
912
|
+
# Since v1.11.0
|
913
|
+
def kwattr_values(attribute_name)
|
914
|
+
keywordify(get_attribute(attribute_name) || [])
|
915
|
+
end
|
916
|
+
|
917
|
+
# :call-seq:
|
918
|
+
# kwattr_add(attribute_name, keywords) → self
|
919
|
+
#
|
920
|
+
# Ensure that values are present in a keyword attribute.
|
921
|
+
#
|
922
|
+
# Any values in +keywords+ that already exist in the Node's attribute values are _not_
|
923
|
+
# added. Note that any existing duplicates in the attribute values are not removed. Compare
|
924
|
+
# with #kwattr_append.
|
925
|
+
#
|
926
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
927
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
928
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
929
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
930
|
+
#
|
931
|
+
# See also #add_class, #kwattr_values, #kwattr_append, #kwattr_remove
|
932
|
+
#
|
933
|
+
# [Parameters]
|
934
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
|
935
|
+
# - +keywords+ (String, Array<String>)
|
936
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
937
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
938
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
939
|
+
# it is created.
|
940
|
+
#
|
941
|
+
# [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
|
942
|
+
#
|
943
|
+
# *Example:* Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
|
944
|
+
#
|
945
|
+
# Note that duplicates are not added.
|
946
|
+
#
|
947
|
+
# node # => <a></a>
|
948
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
|
949
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
|
950
|
+
#
|
951
|
+
# *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a
|
952
|
+
# String argument.
|
953
|
+
#
|
954
|
+
# Note that "nofollow" is not added because it is already present. Note also that the
|
955
|
+
# pre-existing duplicate "nofollow" is not removed.
|
956
|
+
#
|
957
|
+
# node # => <a rel="nofollow nofollow"></a>
|
958
|
+
# node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
959
|
+
#
|
960
|
+
# *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via
|
961
|
+
# an Array argument.
|
962
|
+
#
|
963
|
+
# node # => <a></a>
|
964
|
+
# node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
965
|
+
#
|
966
|
+
# Since v1.11.0
|
967
|
+
def kwattr_add(attribute_name, keywords)
|
968
|
+
keywords = keywordify(keywords)
|
969
|
+
current_kws = kwattr_values(attribute_name)
|
970
|
+
new_kws = (current_kws + (keywords - current_kws)).join(" ")
|
971
|
+
set_attribute(attribute_name, new_kws)
|
972
|
+
self
|
973
|
+
end
|
974
|
+
|
975
|
+
# :call-seq:
|
976
|
+
# kwattr_append(attribute_name, keywords) → self
|
977
|
+
#
|
978
|
+
# Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
|
979
|
+
# #kwattr_add.
|
980
|
+
#
|
981
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
982
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
983
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
984
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
985
|
+
#
|
986
|
+
# See also #append_class, #kwattr_values, #kwattr_add, #kwattr_remove
|
987
|
+
#
|
988
|
+
# [Parameters]
|
989
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
|
990
|
+
# - +keywords+ (String, Array<String>)
|
991
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
992
|
+
# whitespace-delimited values, or an Array of String values. All values passed in will be
|
993
|
+
# appended to the named attribute even if they are already present in the attribute. If the
|
994
|
+
# named attribute does not exist, it is created.
|
995
|
+
#
|
996
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
997
|
+
#
|
998
|
+
# *Example:* Append "nofollow" to the +rel+ attribute.
|
999
|
+
#
|
1000
|
+
# Note that duplicates are added.
|
1001
|
+
#
|
1002
|
+
# node # => <a></a>
|
1003
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
|
1004
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a>
|
1005
|
+
#
|
1006
|
+
# *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
|
1007
|
+
#
|
1008
|
+
# Note that "nofollow" is appended even though it is already present.
|
1009
|
+
#
|
1010
|
+
# node # => <a rel="nofollow"></a>
|
1011
|
+
# node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
1012
|
+
#
|
1013
|
+
#
|
1014
|
+
# *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
|
1015
|
+
#
|
1016
|
+
# node # => <a></a>
|
1017
|
+
# node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
1018
|
+
#
|
1019
|
+
# Since v1.11.0
|
1020
|
+
def kwattr_append(attribute_name, keywords)
|
1021
|
+
keywords = keywordify(keywords)
|
1022
|
+
current_kws = kwattr_values(attribute_name)
|
1023
|
+
new_kws = (current_kws + keywords).join(" ")
|
1024
|
+
set_attribute(attribute_name, new_kws)
|
1025
|
+
self
|
1026
|
+
end
|
1027
|
+
|
1028
|
+
# :call-seq:
|
1029
|
+
# kwattr_remove(attribute_name, keywords) → self
|
1030
|
+
#
|
1031
|
+
# Remove keywords from a keyword attribute. Any matching keywords that exist in the named
|
1032
|
+
# attribute are removed, including any multiple entries.
|
1033
|
+
#
|
1034
|
+
# If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
|
1035
|
+
# deleted from the node.
|
1036
|
+
#
|
1037
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
1038
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
1039
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
1040
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
1041
|
+
#
|
1042
|
+
# See also #remove_class, #kwattr_values, #kwattr_add, #kwattr_append
|
1043
|
+
#
|
1044
|
+
# [Parameters]
|
1045
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
|
1046
|
+
# - +keywords+ (String, Array<String>)
|
1047
|
+
# Keywords to be removed from the attribute named +attribute_name+. May be a string
|
1048
|
+
# containing whitespace-delimited values, or an Array of String values. Any keywords present
|
1049
|
+
# in the named attribute will be removed. If no keywords remain, or if +keywords+ is nil,
|
1050
|
+
# the attribute is deleted.
|
1051
|
+
#
|
1052
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
1053
|
+
#
|
1054
|
+
# *Example:*
|
1055
|
+
#
|
1056
|
+
# Note that the +rel+ attribute is deleted when empty.
|
1057
|
+
#
|
1058
|
+
# node # => <a rel="nofollow noreferrer">link</a>
|
1059
|
+
# node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
|
1060
|
+
# node.kwattr_remove("rel", "noreferrer") # => <a>link</a>
|
1061
|
+
#
|
1062
|
+
# Since v1.11.0
|
1063
|
+
def kwattr_remove(attribute_name, keywords)
|
1064
|
+
if keywords.nil?
|
1065
|
+
remove_attribute(attribute_name)
|
1066
|
+
return self
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
keywords = keywordify(keywords)
|
1070
|
+
current_kws = kwattr_values(attribute_name)
|
1071
|
+
new_kws = current_kws - keywords
|
1072
|
+
if new_kws.empty?
|
1073
|
+
remove_attribute(attribute_name)
|
1074
|
+
else
|
1075
|
+
set_attribute(attribute_name, new_kws.join(" "))
|
1076
|
+
end
|
1077
|
+
self
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
alias_method :delete, :remove_attribute
|
1081
|
+
alias_method :get_attribute, :[]
|
1082
|
+
alias_method :attr, :[]
|
1083
|
+
alias_method :set_attribute, :[]=
|
1084
|
+
alias_method :has_attribute?, :key?
|
1085
|
+
|
1086
|
+
# :section:
|
1087
|
+
|
1088
|
+
###
|
1089
|
+
# Returns true if this Node matches +selector+
|
1090
|
+
def matches?(selector)
|
1091
|
+
ancestors.last.search(selector).include?(self)
|
1092
|
+
end
|
1093
|
+
|
1094
|
+
###
|
1095
|
+
# Create a DocumentFragment containing +tags+ that is relative to _this_
|
1096
|
+
# context node.
|
1097
|
+
def fragment(tags)
|
1098
|
+
document.related_class("DocumentFragment").new(document, tags, self)
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
###
|
1102
|
+
# Parse +string_or_io+ as a document fragment within the context of
|
1103
|
+
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
|
1104
|
+
# +string_or_io+.
|
1105
|
+
def parse(string_or_io, options = nil)
|
1106
|
+
##
|
1107
|
+
# When the current node is unparented and not an element node, use the
|
1108
|
+
# document as the parsing context instead. Otherwise, the in-context
|
1109
|
+
# parser cannot find an element or a document node.
|
1110
|
+
# Document Fragments are also not usable by the in-context parser.
|
1111
|
+
if !element? && !document? && (!parent || parent.fragment?)
|
1112
|
+
return document.parse(string_or_io, options)
|
1113
|
+
end
|
1114
|
+
|
1115
|
+
options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
|
1116
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
1117
|
+
yield options if block_given?
|
1118
|
+
|
1119
|
+
contents = if string_or_io.respond_to?(:read)
|
1120
|
+
string_or_io.read
|
1121
|
+
else
|
1122
|
+
string_or_io
|
1123
|
+
end
|
1124
|
+
|
1125
|
+
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
1126
|
+
|
1127
|
+
error_count = document.errors.length
|
1128
|
+
node_set = in_context(contents, options.to_i)
|
1129
|
+
|
1130
|
+
if document.errors.length > error_count
|
1131
|
+
raise document.errors[error_count] unless options.recover?
|
1132
|
+
|
1133
|
+
# TODO: remove this block when libxml2 < 2.13 is no longer supported
|
1134
|
+
if node_set.empty?
|
1135
|
+
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
|
1136
|
+
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
|
1137
|
+
# behavior.
|
1138
|
+
#
|
1139
|
+
# (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
|
1140
|
+
# fragment parsing is fixed in 1c106edf. Both are in 2.13.)
|
1141
|
+
#
|
1142
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1143
|
+
# would have been inherited from the context node won't be handled correctly. This hack
|
1144
|
+
# was written in 2010, and I regret it, because it's silently degrading functionality in
|
1145
|
+
# a way that's not easily prevented (or even detected).
|
1146
|
+
#
|
1147
|
+
# I think preferable behavior would be to either:
|
1148
|
+
#
|
1149
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the
|
1150
|
+
# +recover+ option
|
1151
|
+
# b. don't recover, but raise a sensible exception
|
1152
|
+
#
|
1153
|
+
# For context and background:
|
1154
|
+
# - https://github.com/sparklemotion/nokogiri/issues/313
|
1155
|
+
# - https://github.com/sparklemotion/nokogiri/issues/2092
|
1156
|
+
fragment = document.related_class("DocumentFragment").parse(contents)
|
1157
|
+
node_set = fragment.children
|
1158
|
+
end
|
1159
|
+
end
|
1160
|
+
|
1161
|
+
node_set
|
1162
|
+
end
|
1163
|
+
|
1164
|
+
# :call-seq:
|
1165
|
+
# namespaces() → Hash<String(Namespace#prefix) ⇒ String(Namespace#href)>
|
1166
|
+
#
|
1167
|
+
# Fetch all the namespaces on this node and its ancestors.
|
1168
|
+
#
|
1169
|
+
# Note that the keys in this hash XML attributes that would be used to define this namespace,
|
1170
|
+
# such as "xmlns:prefix", not just the prefix.
|
1171
|
+
#
|
1172
|
+
# The default namespace for this node will be included with key "xmlns".
|
1173
|
+
#
|
1174
|
+
# See also #namespace_scopes
|
1175
|
+
#
|
1176
|
+
# [Returns]
|
1177
|
+
# Hash containing all the namespaces on this node and its ancestors. The hash keys are the
|
1178
|
+
# namespace prefix, and the hash value for each key is the namespace URI.
|
1179
|
+
#
|
1180
|
+
# *Example:*
|
1181
|
+
#
|
1182
|
+
# doc = Nokogiri::XML(<<~EOF)
|
1183
|
+
# <root xmlns="http://example.com/root" xmlns:in_scope="http://example.com/in_scope">
|
1184
|
+
# <first/>
|
1185
|
+
# <second xmlns="http://example.com/child"/>
|
1186
|
+
# <third xmlns:foo="http://example.com/foo"/>
|
1187
|
+
# </root>
|
1188
|
+
# EOF
|
1189
|
+
# doc.at_xpath("//root:first", "root" => "http://example.com/root").namespaces
|
1190
|
+
# # => {"xmlns"=>"http://example.com/root",
|
1191
|
+
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
|
1192
|
+
# doc.at_xpath("//child:second", "child" => "http://example.com/child").namespaces
|
1193
|
+
# # => {"xmlns"=>"http://example.com/child",
|
1194
|
+
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
|
1195
|
+
# doc.at_xpath("//root:third", "root" => "http://example.com/root").namespaces
|
1196
|
+
# # => {"xmlns:foo"=>"http://example.com/foo",
|
1197
|
+
# # "xmlns"=>"http://example.com/root",
|
1198
|
+
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
|
1199
|
+
#
|
1200
|
+
def namespaces
|
1201
|
+
namespace_scopes.each_with_object({}) do |ns, hash|
|
1202
|
+
prefix = ns.prefix
|
1203
|
+
key = prefix ? "xmlns:#{prefix}" : "xmlns"
|
1204
|
+
hash[key] = ns.href
|
1205
|
+
end
|
1206
|
+
end
|
1207
|
+
|
1208
|
+
# Returns true if this is a Comment
|
1209
|
+
def comment?
|
1210
|
+
type == COMMENT_NODE
|
1211
|
+
end
|
1212
|
+
|
1213
|
+
# Returns true if this is a CDATA
|
1214
|
+
def cdata?
|
1215
|
+
type == CDATA_SECTION_NODE
|
1216
|
+
end
|
1217
|
+
|
1218
|
+
# Returns true if this is an XML::Document node
|
1219
|
+
def xml?
|
1220
|
+
type == DOCUMENT_NODE
|
1221
|
+
end
|
1222
|
+
|
1223
|
+
# Returns true if this is an HTML4::Document or HTML5::Document node
|
1224
|
+
def html?
|
1225
|
+
type == HTML_DOCUMENT_NODE
|
1226
|
+
end
|
1227
|
+
|
1228
|
+
# Returns true if this is a Document
|
1229
|
+
def document?
|
1230
|
+
is_a?(XML::Document)
|
1231
|
+
end
|
1232
|
+
|
1233
|
+
# Returns true if this is a ProcessingInstruction node
|
1234
|
+
def processing_instruction?
|
1235
|
+
type == PI_NODE
|
1236
|
+
end
|
1237
|
+
|
1238
|
+
# Returns true if this is a Text node
|
1239
|
+
def text?
|
1240
|
+
type == TEXT_NODE
|
1241
|
+
end
|
1242
|
+
|
1243
|
+
# Returns true if this is a DocumentFragment
|
1244
|
+
def fragment?
|
1245
|
+
type == DOCUMENT_FRAG_NODE
|
1246
|
+
end
|
1247
|
+
|
1248
|
+
###
|
1249
|
+
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
1250
|
+
# nil on XML documents and on unknown tags.
|
1251
|
+
def description
|
1252
|
+
return if document.xml?
|
1253
|
+
|
1254
|
+
Nokogiri::HTML4::ElementDescription[name]
|
1255
|
+
end
|
1256
|
+
|
1257
|
+
###
|
1258
|
+
# Is this a read only node?
|
1259
|
+
def read_only?
|
1260
|
+
# According to gdome2, these are read-only node types
|
1261
|
+
[NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
|
1262
|
+
end
|
1263
|
+
|
1264
|
+
# Returns true if this is an Element node
|
1265
|
+
def element?
|
1266
|
+
type == ELEMENT_NODE
|
1267
|
+
end
|
1268
|
+
|
1269
|
+
alias_method :elem?, :element?
|
1270
|
+
|
1271
|
+
###
|
1272
|
+
# Turn this node in to a string. If the document is HTML, this method
|
1273
|
+
# returns html. If the document is XML, this method returns XML.
|
1274
|
+
def to_s
|
1275
|
+
document.xml? ? to_xml : to_html
|
1276
|
+
end
|
1277
|
+
|
1278
|
+
# Get the inner_html for this node's Node#children
|
1279
|
+
def inner_html(*args)
|
1280
|
+
children.map { |x| x.to_html(*args) }.join
|
1281
|
+
end
|
1282
|
+
|
1283
|
+
# Get the path to this node as a CSS expression
|
1284
|
+
def css_path
|
1285
|
+
path.split(%r{/}).filter_map do |part|
|
1286
|
+
part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
1287
|
+
end.join(" > ")
|
1288
|
+
end
|
1289
|
+
|
1290
|
+
###
|
1291
|
+
# Get a list of ancestor Node for this Node. If +selector+ is given,
|
1292
|
+
# the ancestors must match +selector+
|
1293
|
+
def ancestors(selector = nil)
|
1294
|
+
return NodeSet.new(document) unless respond_to?(:parent)
|
1295
|
+
return NodeSet.new(document) unless parent
|
1296
|
+
|
1297
|
+
parents = [parent]
|
1298
|
+
|
1299
|
+
while parents.last.respond_to?(:parent)
|
1300
|
+
break unless (ctx_parent = parents.last.parent)
|
1301
|
+
|
1302
|
+
parents << ctx_parent
|
1303
|
+
end
|
1304
|
+
|
1305
|
+
return NodeSet.new(document, parents) unless selector
|
1306
|
+
|
1307
|
+
root = parents.last
|
1308
|
+
search_results = root.search(selector)
|
1309
|
+
|
1310
|
+
NodeSet.new(document, parents.find_all do |parent|
|
1311
|
+
search_results.include?(parent)
|
1312
|
+
end)
|
1313
|
+
end
|
1314
|
+
|
1315
|
+
####
|
1316
|
+
# Yields self and all children to +block+ recursively.
|
1317
|
+
def traverse(&block)
|
1318
|
+
children.each { |j| j.traverse(&block) }
|
1319
|
+
yield(self)
|
1320
|
+
end
|
1321
|
+
|
1322
|
+
###
|
1323
|
+
# Accept a visitor. This method calls "visit" on +visitor+ with self.
|
1324
|
+
def accept(visitor)
|
1325
|
+
visitor.visit(self)
|
1326
|
+
end
|
1327
|
+
|
1328
|
+
###
|
1329
|
+
# Test to see if this Node is equal to +other+
|
1330
|
+
def ==(other)
|
1331
|
+
return false unless other
|
1332
|
+
return false unless other.respond_to?(:pointer_id)
|
1333
|
+
|
1334
|
+
pointer_id == other.pointer_id
|
1335
|
+
end
|
1336
|
+
|
1337
|
+
###
|
1338
|
+
# Compare two Node objects with respect to their Document. Nodes from
|
1339
|
+
# different documents cannot be compared.
|
1340
|
+
def <=>(other)
|
1341
|
+
return unless other.is_a?(Nokogiri::XML::Node)
|
1342
|
+
return unless document == other.document
|
1343
|
+
|
1344
|
+
compare(other)
|
1345
|
+
end
|
1346
|
+
|
1347
|
+
# :section: Serialization and Generating Output
|
1348
|
+
|
1349
|
+
###
|
1350
|
+
# Serialize Node using +options+. Save options can also be set using a block.
|
1351
|
+
#
|
1352
|
+
# See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
|
1353
|
+
#
|
1354
|
+
# These two statements are equivalent:
|
1355
|
+
#
|
1356
|
+
# node.serialize(encoding: 'UTF-8', save_with: FORMAT | AS_XML)
|
1357
|
+
#
|
1358
|
+
# or
|
1359
|
+
#
|
1360
|
+
# node.serialize(encoding: 'UTF-8') do |config|
|
1361
|
+
# config.format.as_xml
|
1362
|
+
# end
|
1363
|
+
#
|
1364
|
+
def serialize(*args, &block)
|
1365
|
+
# TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
|
1366
|
+
options = if args.first.is_a?(Hash)
|
1367
|
+
args.shift
|
1368
|
+
else
|
1369
|
+
{
|
1370
|
+
encoding: args[0],
|
1371
|
+
save_with: args[1],
|
1372
|
+
}
|
1373
|
+
end
|
1374
|
+
|
1375
|
+
options[:encoding] ||= document.encoding
|
1376
|
+
encoding = Encoding.find(options[:encoding] || "UTF-8")
|
1377
|
+
|
1378
|
+
io = StringIO.new(String.new(encoding: encoding))
|
1379
|
+
|
1380
|
+
write_to(io, options, &block)
|
1381
|
+
io.string
|
1382
|
+
end
|
1383
|
+
|
1384
|
+
###
|
1385
|
+
# Serialize this Node to HTML
|
1386
|
+
#
|
1387
|
+
# doc.to_html
|
1388
|
+
#
|
1389
|
+
# See Node#write_to for a list of +options+. For formatted output,
|
1390
|
+
# use Node#to_xhtml instead.
|
1391
|
+
def to_html(options = {})
|
1392
|
+
to_format(SaveOptions::DEFAULT_HTML, options)
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
###
|
1396
|
+
# Serialize this Node to XML using +options+
|
1397
|
+
#
|
1398
|
+
# doc.to_xml(indent: 5, encoding: 'UTF-8')
|
1399
|
+
#
|
1400
|
+
# See Node#write_to for a list of +options+
|
1401
|
+
def to_xml(options = {})
|
1402
|
+
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
1403
|
+
serialize(options)
|
1404
|
+
end
|
1405
|
+
|
1406
|
+
###
|
1407
|
+
# Serialize this Node to XHTML using +options+
|
1408
|
+
#
|
1409
|
+
# doc.to_xhtml(indent: 5, encoding: 'UTF-8')
|
1410
|
+
#
|
1411
|
+
# See Node#write_to for a list of +options+
|
1412
|
+
def to_xhtml(options = {})
|
1413
|
+
to_format(SaveOptions::DEFAULT_XHTML, options)
|
1414
|
+
end
|
1415
|
+
|
1416
|
+
###
|
1417
|
+
# :call-seq:
|
1418
|
+
# write_to(io, *options)
|
1419
|
+
#
|
1420
|
+
# Serialize this node or document to +io+.
|
1421
|
+
#
|
1422
|
+
# [Parameters]
|
1423
|
+
# - +io+ (IO) An IO-like object to which the serialized content will be written.
|
1424
|
+
# - +options+ (Hash) See below
|
1425
|
+
#
|
1426
|
+
# [Options]
|
1427
|
+
# * +:encoding+ (String or Encoding) specify the encoding of the output (defaults to document encoding)
|
1428
|
+
# * +:indent_text+ (String) the indentation text (defaults to <code>" "</code>)
|
1429
|
+
# * +:indent+ (Integer) the number of +:indent_text+ to use (defaults to +2+)
|
1430
|
+
# * +:save_with+ (Integer) a combination of SaveOptions constants
|
1431
|
+
#
|
1432
|
+
# To save with UTF-8 indented twice:
|
1433
|
+
#
|
1434
|
+
# node.write_to(io, encoding: 'UTF-8', indent: 2)
|
1435
|
+
#
|
1436
|
+
# To save indented with two dashes:
|
1437
|
+
#
|
1438
|
+
# node.write_to(io, indent_text: '-', indent: 2)
|
1439
|
+
#
|
1440
|
+
def write_to(io, *options)
|
1441
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
1442
|
+
encoding = options[:encoding] || options[0] || document.encoding
|
1443
|
+
if Nokogiri.jruby?
|
1444
|
+
save_options = options[:save_with] || options[1]
|
1445
|
+
indent_times = options[:indent] || 0
|
1446
|
+
else
|
1447
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
1448
|
+
indent_times = options[:indent] || 2
|
1449
|
+
end
|
1450
|
+
indent_text = options[:indent_text] || " "
|
1451
|
+
|
1452
|
+
# Any string times 0 returns an empty string. Therefore, use the same
|
1453
|
+
# string instead of generating a new empty string for every node with
|
1454
|
+
# zero indentation.
|
1455
|
+
indentation = indent_times.zero? ? "" : (indent_text * indent_times)
|
1456
|
+
|
1457
|
+
config = SaveOptions.new(save_options.to_i)
|
1458
|
+
yield config if block_given?
|
1459
|
+
|
1460
|
+
encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
|
1461
|
+
|
1462
|
+
native_write_to(io, encoding, indentation, config.options)
|
1463
|
+
end
|
1464
|
+
|
1465
|
+
###
|
1466
|
+
# Write Node as HTML to +io+ with +options+
|
1467
|
+
#
|
1468
|
+
# See Node#write_to for a list of +options+
|
1469
|
+
def write_html_to(io, options = {})
|
1470
|
+
write_format_to(SaveOptions::DEFAULT_HTML, io, options)
|
1471
|
+
end
|
1472
|
+
|
1473
|
+
###
|
1474
|
+
# Write Node as XHTML to +io+ with +options+
|
1475
|
+
#
|
1476
|
+
# See Node#write_to for a list of +options+
|
1477
|
+
def write_xhtml_to(io, options = {})
|
1478
|
+
write_format_to(SaveOptions::DEFAULT_XHTML, io, options)
|
1479
|
+
end
|
1480
|
+
|
1481
|
+
###
|
1482
|
+
# Write Node as XML to +io+ with +options+
|
1483
|
+
#
|
1484
|
+
# doc.write_xml_to io, :encoding => 'UTF-8'
|
1485
|
+
#
|
1486
|
+
# See Node#write_to for a list of options
|
1487
|
+
def write_xml_to(io, options = {})
|
1488
|
+
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
1489
|
+
write_to(io, options)
|
1490
|
+
end
|
1491
|
+
|
1492
|
+
def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
|
1493
|
+
c14n_root = self
|
1494
|
+
document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
|
1495
|
+
tn = node.is_a?(XML::Node) ? node : parent
|
1496
|
+
tn == c14n_root || tn.ancestors.include?(c14n_root)
|
1497
|
+
end
|
1498
|
+
end
|
1499
|
+
|
1500
|
+
DECONSTRUCT_KEYS = [:name, :attributes, :children, :namespace, :content, :elements, :inner_html].freeze # :nodoc:
|
1501
|
+
DECONSTRUCT_METHODS = { attributes: :attribute_nodes }.freeze # :nodoc:
|
1502
|
+
|
1503
|
+
#
|
1504
|
+
# :call-seq: deconstruct_keys(array_of_names) → Hash
|
1505
|
+
#
|
1506
|
+
# Returns a hash describing the Node, to use in pattern matching.
|
1507
|
+
#
|
1508
|
+
# Valid keys and their values:
|
1509
|
+
# - +name+ → (String) The name of this node, or "text" if it is a Text node.
|
1510
|
+
# - +namespace+ → (Namespace, nil) The namespace of this node, or nil if there is no namespace.
|
1511
|
+
# - +attributes+ → (Array<Attr>) The attributes of this node.
|
1512
|
+
# - +children+ → (Array<Node>) The children of this node. 💡 Note this includes text nodes.
|
1513
|
+
# - +elements+ → (Array<Node>) The child elements of this node. 💡 Note this does not include text nodes.
|
1514
|
+
# - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
|
1515
|
+
# - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
|
1516
|
+
#
|
1517
|
+
# *Example*
|
1518
|
+
#
|
1519
|
+
# doc = Nokogiri::XML.parse(<<~XML)
|
1520
|
+
# <?xml version="1.0"?>
|
1521
|
+
# <parent xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
|
1522
|
+
# <child1 foo="abc" noko:bar="def">First</child1>
|
1523
|
+
# <noko:child2 foo="qwe" noko:bar="rty">Second</noko:child2>
|
1524
|
+
# </parent>
|
1525
|
+
# XML
|
1526
|
+
#
|
1527
|
+
# doc.root.deconstruct_keys([:name, :namespace])
|
1528
|
+
# # => {:name=>"parent",
|
1529
|
+
# # :namespace=>
|
1530
|
+
# # #(Namespace:0x35c { href = "http://nokogiri.org/ns/default" })}
|
1531
|
+
#
|
1532
|
+
# doc.root.deconstruct_keys([:inner_html, :content])
|
1533
|
+
# # => {:content=>"\n" + " First\n" + " Second\n",
|
1534
|
+
# # :inner_html=>
|
1535
|
+
# # "\n" +
|
1536
|
+
# # " <child1 foo=\"abc\" noko:bar=\"def\">First</child1>\n" +
|
1537
|
+
# # " <noko:child2 foo=\"qwe\" noko:bar=\"rty\">Second</noko:child2>\n"}
|
1538
|
+
#
|
1539
|
+
# doc.root.elements.first.deconstruct_keys([:attributes])
|
1540
|
+
# # => {:attributes=>
|
1541
|
+
# # [#(Attr:0x370 { name = "foo", value = "abc" }),
|
1542
|
+
# # #(Attr:0x384 {
|
1543
|
+
# # name = "bar",
|
1544
|
+
# # namespace = #(Namespace:0x398 {
|
1545
|
+
# # prefix = "noko",
|
1546
|
+
# # href = "http://nokogiri.org/ns/noko"
|
1547
|
+
# # }),
|
1548
|
+
# # value = "def"
|
1549
|
+
# # })]}
|
1550
|
+
#
|
1551
|
+
# Since v1.14.0
|
1552
|
+
#
|
1553
|
+
def deconstruct_keys(keys)
|
1554
|
+
requested_keys = DECONSTRUCT_KEYS & keys
|
1555
|
+
{}.tap do |values|
|
1556
|
+
requested_keys.each do |key|
|
1557
|
+
method = DECONSTRUCT_METHODS[key] || key
|
1558
|
+
values[key] = send(method)
|
1559
|
+
end
|
1560
|
+
end
|
1561
|
+
end
|
1562
|
+
|
1563
|
+
# :section:
|
1564
|
+
|
1565
|
+
protected
|
1566
|
+
|
1567
|
+
def coerce(data)
|
1568
|
+
case data
|
1569
|
+
when XML::NodeSet
|
1570
|
+
return data
|
1571
|
+
when XML::DocumentFragment
|
1572
|
+
return data.children
|
1573
|
+
when String
|
1574
|
+
return fragment(data).children
|
1575
|
+
when Document, XML::Attr
|
1576
|
+
# unacceptable
|
1577
|
+
when XML::Node
|
1578
|
+
return data
|
1579
|
+
end
|
1580
|
+
|
1581
|
+
raise ArgumentError, <<~EOERR
|
1582
|
+
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1583
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1584
|
+
EOERR
|
1585
|
+
end
|
1586
|
+
|
1587
|
+
private
|
1588
|
+
|
1589
|
+
def keywordify(keywords)
|
1590
|
+
case keywords
|
1591
|
+
when Enumerable
|
1592
|
+
keywords
|
1593
|
+
when String
|
1594
|
+
keywords.scan(/\S+/)
|
1595
|
+
else
|
1596
|
+
raise ArgumentError,
|
1597
|
+
"Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}"
|
1598
|
+
end
|
1599
|
+
end
|
1600
|
+
|
1601
|
+
def add_sibling(next_or_previous, node_or_tags)
|
1602
|
+
raise("Cannot add sibling to a node with no parent") unless parent
|
1603
|
+
|
1604
|
+
impl = next_or_previous == :next ? :add_next_sibling_node : :add_previous_sibling_node
|
1605
|
+
iter = next_or_previous == :next ? :reverse_each : :each
|
1606
|
+
|
1607
|
+
node_or_tags = parent.coerce(node_or_tags)
|
1608
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
1609
|
+
if text?
|
1610
|
+
pivot = Nokogiri::XML::Node.new("dummy", document)
|
1611
|
+
send(impl, pivot)
|
1612
|
+
else
|
1613
|
+
pivot = self
|
1614
|
+
end
|
1615
|
+
node_or_tags.send(iter) { |n| pivot.send(impl, n) }
|
1616
|
+
pivot.unlink if text?
|
1617
|
+
else
|
1618
|
+
send(impl, node_or_tags)
|
1619
|
+
end
|
1620
|
+
node_or_tags
|
1621
|
+
end
|
1622
|
+
|
1623
|
+
def to_format(save_option, options)
|
1624
|
+
options[:save_with] = save_option unless options[:save_with]
|
1625
|
+
serialize(options)
|
1626
|
+
end
|
1627
|
+
|
1628
|
+
def write_format_to(save_option, io, options)
|
1629
|
+
options[:save_with] ||= save_option
|
1630
|
+
write_to(io, options)
|
1631
|
+
end
|
1632
|
+
|
1633
|
+
def inspect_attributes
|
1634
|
+
[:name, :namespace, :attribute_nodes, :children]
|
1635
|
+
end
|
1636
|
+
|
1637
|
+
IMPLIED_XPATH_CONTEXTS = [".//"].freeze
|
1638
|
+
|
1639
|
+
def add_child_node_and_reparent_attrs(node)
|
1640
|
+
add_child_node(node)
|
1641
|
+
node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node|
|
1642
|
+
attr_node.remove
|
1643
|
+
node[attr_node.name] = attr_node.value
|
1644
|
+
end
|
1645
|
+
end
|
1646
|
+
end
|
1647
|
+
end
|
1648
|
+
end
|
1649
|
+
|
1650
|
+
require_relative "node/save_options"
|