nokogiri 1.10.3 → 1.13.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +178 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +11 -60
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +752 -423
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +191 -89
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +291 -219
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +56 -50
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +43 -18
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +60 -51
- data/ext/nokogiri/xml_node.c +1001 -610
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +226 -175
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +105 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +223 -115
- data/ext/nokogiri/xslt_stylesheet.c +265 -173
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +52 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +218 -91
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/{html → html4}/document.rb +99 -103
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +14 -15
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +88 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +96 -0
- data/lib/nokogiri/html5.rb +477 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +221 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +95 -53
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +219 -86
- data/lib/nokogiri/xml/document_fragment.rb +46 -44
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +876 -376
- data/lib/nokogiri/xml/node_set.rb +47 -54
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +21 -8
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +23 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +37 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +112 -72
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +37 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +49 -65
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +207 -137
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,105 +1,130 @@
|
|
1
|
-
# encoding:
|
2
|
-
|
3
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "stringio"
|
4
5
|
|
5
6
|
module Nokogiri
|
6
7
|
module XML
|
7
|
-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
8
|
+
# Nokogiri::XML::Node is the primary API you'll use to interact with your Document.
|
9
|
+
#
|
10
|
+
# == Attributes
|
11
|
+
#
|
12
|
+
# A Nokogiri::XML::Node may be treated similarly to a hash with regard to attributes. For
|
13
|
+
# example:
|
14
|
+
#
|
15
|
+
# node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
|
16
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
|
17
|
+
# node['href'] # => "#foo"
|
18
|
+
# node.keys # => ["href", "id"]
|
19
|
+
# node.values # => ["#foo", "link"]
|
20
|
+
# node['class'] = 'green' # => "green"
|
21
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
|
22
|
+
#
|
23
|
+
# See the method group entitled Node@Working+With+Node+Attributes for the full set of methods.
|
24
|
+
#
|
25
|
+
# == Navigation
|
11
26
|
#
|
12
|
-
#
|
13
|
-
# => <a href="#foo" id="link">link</a>
|
14
|
-
# irb(main):005:0> node['href']
|
15
|
-
# => "#foo"
|
16
|
-
# irb(main):006:0> node.keys
|
17
|
-
# => ["href", "id"]
|
18
|
-
# irb(main):007:0> node.values
|
19
|
-
# => ["#foo", "link"]
|
20
|
-
# irb(main):008:0> node['class'] = 'green'
|
21
|
-
# => "green"
|
22
|
-
# irb(main):009:0> node
|
23
|
-
# => <a href="#foo" id="link" class="green">link</a>
|
24
|
-
# irb(main):010:0>
|
27
|
+
# Nokogiri::XML::Node also has methods that let you move around your tree:
|
25
28
|
#
|
26
|
-
#
|
29
|
+
# [#parent, #children, #next, #previous]
|
30
|
+
# Navigate up, down, or through siblings.
|
27
31
|
#
|
28
|
-
#
|
29
|
-
# tree. For navigating your tree, see:
|
32
|
+
# See the method group entitled Node@Traversing+Document+Structure for the full set of methods.
|
30
33
|
#
|
31
|
-
#
|
32
|
-
# * Nokogiri::XML::Node#children
|
33
|
-
# * Nokogiri::XML::Node#next
|
34
|
-
# * Nokogiri::XML::Node#previous
|
34
|
+
# == Serialization
|
35
35
|
#
|
36
|
+
# When printing or otherwise emitting a document or a node (and its subtree), there are a few
|
37
|
+
# methods you might want to use:
|
36
38
|
#
|
37
|
-
#
|
38
|
-
#
|
39
|
+
# [#content, #text, #inner_text, #to_str]
|
40
|
+
# These methods will all **emit plaintext**,
|
41
|
+
# meaning that entities will be replaced (e.g., +<+ will be replaced with +<+), meaning
|
42
|
+
# that any sanitizing will likely be un-done in the output.
|
39
43
|
#
|
40
|
-
#
|
44
|
+
# [#to_s, #to_xml, #to_html, #inner_html]
|
45
|
+
# These methods will all **emit properly-escaped markup**, meaning that it's suitable for
|
46
|
+
# consumption by browsers, parsers, etc.
|
41
47
|
#
|
42
|
-
#
|
43
|
-
# document, meaning that entities will be replaced (e.g., "<"
|
44
|
-
# will be replaced with "<"), meaning that any sanitizing will
|
45
|
-
# likely be un-done in the output.
|
48
|
+
# See the method group entitled Node@Serialization+and+Generating+Output for the full set of methods.
|
46
49
|
#
|
47
|
-
#
|
50
|
+
# == Searching
|
48
51
|
#
|
49
|
-
#
|
50
|
-
#
|
52
|
+
# You may search this node's subtree using methods like #xpath and #css.
|
53
|
+
#
|
54
|
+
# See the method group entitled Node@Searching+via+XPath+or+CSS+Queries for the full set of methods.
|
51
55
|
#
|
52
|
-
# You may search this node's subtree using Searchable#xpath and Searchable#css
|
53
56
|
class Node
|
54
57
|
include Nokogiri::XML::PP::Node
|
55
58
|
include Nokogiri::XML::Searchable
|
59
|
+
include Nokogiri::ClassResolver
|
56
60
|
include Enumerable
|
57
61
|
|
58
62
|
# Element node type, see Nokogiri::XML::Node#element?
|
59
|
-
ELEMENT_NODE =
|
63
|
+
ELEMENT_NODE = 1
|
60
64
|
# Attribute node type
|
61
|
-
ATTRIBUTE_NODE =
|
65
|
+
ATTRIBUTE_NODE = 2
|
62
66
|
# Text node type, see Nokogiri::XML::Node#text?
|
63
|
-
TEXT_NODE =
|
67
|
+
TEXT_NODE = 3
|
64
68
|
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
65
69
|
CDATA_SECTION_NODE = 4
|
66
70
|
# Entity reference node type
|
67
|
-
ENTITY_REF_NODE =
|
71
|
+
ENTITY_REF_NODE = 5
|
68
72
|
# Entity node type
|
69
|
-
ENTITY_NODE =
|
73
|
+
ENTITY_NODE = 6
|
70
74
|
# PI node type
|
71
|
-
PI_NODE =
|
75
|
+
PI_NODE = 7
|
72
76
|
# Comment node type, see Nokogiri::XML::Node#comment?
|
73
|
-
COMMENT_NODE =
|
77
|
+
COMMENT_NODE = 8
|
74
78
|
# Document node type, see Nokogiri::XML::Node#xml?
|
75
|
-
DOCUMENT_NODE =
|
79
|
+
DOCUMENT_NODE = 9
|
76
80
|
# Document type node type
|
77
81
|
DOCUMENT_TYPE_NODE = 10
|
78
82
|
# Document fragment node type
|
79
83
|
DOCUMENT_FRAG_NODE = 11
|
80
84
|
# Notation node type
|
81
|
-
NOTATION_NODE =
|
85
|
+
NOTATION_NODE = 12
|
82
86
|
# HTML document node type, see Nokogiri::XML::Node#html?
|
83
87
|
HTML_DOCUMENT_NODE = 13
|
84
88
|
# DTD node type
|
85
|
-
DTD_NODE =
|
89
|
+
DTD_NODE = 14
|
86
90
|
# Element declaration type
|
87
|
-
ELEMENT_DECL =
|
91
|
+
ELEMENT_DECL = 15
|
88
92
|
# Attribute declaration type
|
89
|
-
ATTRIBUTE_DECL =
|
93
|
+
ATTRIBUTE_DECL = 16
|
90
94
|
# Entity declaration type
|
91
|
-
ENTITY_DECL =
|
95
|
+
ENTITY_DECL = 17
|
92
96
|
# Namespace declaration type
|
93
|
-
NAMESPACE_DECL =
|
97
|
+
NAMESPACE_DECL = 18
|
94
98
|
# XInclude start type
|
95
|
-
XINCLUDE_START =
|
99
|
+
XINCLUDE_START = 19
|
96
100
|
# XInclude end type
|
97
|
-
XINCLUDE_END =
|
101
|
+
XINCLUDE_END = 20
|
98
102
|
# DOCB document node type
|
99
103
|
DOCB_DOCUMENT_NODE = 21
|
100
104
|
|
101
|
-
|
102
|
-
|
105
|
+
#
|
106
|
+
# :call-seq:
|
107
|
+
# new(name, document) -> Nokogiri::XML::Node
|
108
|
+
# new(name, document) { |node| ... } -> Nokogiri::XML::Node
|
109
|
+
#
|
110
|
+
# Create a new node with +name+ that belongs to +document+.
|
111
|
+
#
|
112
|
+
# If you intend to add a node to a document tree, it's likely that you will prefer one of the
|
113
|
+
# Nokogiri::XML::Node methods like #add_child, #add_next_sibling, #replace, etc. which will
|
114
|
+
# both create an element (or subtree) and place it in the document tree.
|
115
|
+
#
|
116
|
+
# Another alternative, if you are concerned about performance, is
|
117
|
+
# Nokogiri::XML::Document#create_element which accepts additional arguments for contents or
|
118
|
+
# attributes but (like this method) avoids parsing markup.
|
119
|
+
#
|
120
|
+
# [Parameters]
|
121
|
+
# - +name+ (String)
|
122
|
+
# - +document+ (Nokogiri::XML::Document) The document to which the the returned node will belong.
|
123
|
+
# [Yields] Nokogiri::XML::Node
|
124
|
+
# [Returns] Nokogiri::XML::Node
|
125
|
+
#
|
126
|
+
def initialize(name, document)
|
127
|
+
# This is intentionally empty.
|
103
128
|
end
|
104
129
|
|
105
130
|
###
|
@@ -108,24 +133,7 @@ module Nokogiri
|
|
108
133
|
document.decorate(self)
|
109
134
|
end
|
110
135
|
|
111
|
-
|
112
|
-
# Search this node's immediate children using CSS selector +selector+
|
113
|
-
def > selector
|
114
|
-
ns = document.root.namespaces
|
115
|
-
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
116
|
-
end
|
117
|
-
|
118
|
-
###
|
119
|
-
# Get the attribute value for the attribute +name+
|
120
|
-
def [] name
|
121
|
-
get(name.to_s)
|
122
|
-
end
|
123
|
-
|
124
|
-
###
|
125
|
-
# Set the attribute value for the attribute +name+ to +value+
|
126
|
-
def []= name, value
|
127
|
-
set name.to_s, value.to_s
|
128
|
-
end
|
136
|
+
# :section: Manipulating Document Structure
|
129
137
|
|
130
138
|
###
|
131
139
|
# Add +node_or_tags+ as a child of this Node.
|
@@ -134,12 +142,12 @@ module Nokogiri
|
|
134
142
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
135
143
|
#
|
136
144
|
# Also see related method +<<+.
|
137
|
-
def add_child
|
145
|
+
def add_child(node_or_tags)
|
138
146
|
node_or_tags = coerce(node_or_tags)
|
139
147
|
if node_or_tags.is_a?(XML::NodeSet)
|
140
|
-
node_or_tags.each { |n| add_child_node_and_reparent_attrs
|
148
|
+
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
|
141
149
|
else
|
142
|
-
add_child_node_and_reparent_attrs
|
150
|
+
add_child_node_and_reparent_attrs(node_or_tags)
|
143
151
|
end
|
144
152
|
node_or_tags
|
145
153
|
end
|
@@ -151,17 +159,16 @@ module Nokogiri
|
|
151
159
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
152
160
|
#
|
153
161
|
# Also see related method +add_child+.
|
154
|
-
def prepend_child
|
155
|
-
if first = children.first
|
162
|
+
def prepend_child(node_or_tags)
|
163
|
+
if (first = children.first)
|
156
164
|
# Mimic the error add_child would raise.
|
157
|
-
raise
|
165
|
+
raise "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
158
166
|
first.__send__(:add_sibling, :previous, node_or_tags)
|
159
167
|
else
|
160
168
|
add_child(node_or_tags)
|
161
169
|
end
|
162
170
|
end
|
163
171
|
|
164
|
-
|
165
172
|
###
|
166
173
|
# Add html around this node
|
167
174
|
#
|
@@ -180,8 +187,8 @@ module Nokogiri
|
|
180
187
|
# Returns self, to support chaining of calls (e.g., root << child1 << child2)
|
181
188
|
#
|
182
189
|
# Also see related method +add_child+.
|
183
|
-
def <<
|
184
|
-
add_child
|
190
|
+
def <<(node_or_tags)
|
191
|
+
add_child(node_or_tags)
|
185
192
|
self
|
186
193
|
end
|
187
194
|
|
@@ -192,10 +199,11 @@ module Nokogiri
|
|
192
199
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
193
200
|
#
|
194
201
|
# Also see related method +before+.
|
195
|
-
def add_previous_sibling
|
196
|
-
raise ArgumentError
|
202
|
+
def add_previous_sibling(node_or_tags)
|
203
|
+
raise ArgumentError,
|
204
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
197
205
|
|
198
|
-
add_sibling
|
206
|
+
add_sibling(:previous, node_or_tags)
|
199
207
|
end
|
200
208
|
|
201
209
|
###
|
@@ -205,10 +213,11 @@ module Nokogiri
|
|
205
213
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
206
214
|
#
|
207
215
|
# Also see related method +after+.
|
208
|
-
def add_next_sibling
|
209
|
-
raise ArgumentError
|
216
|
+
def add_next_sibling(node_or_tags)
|
217
|
+
raise ArgumentError,
|
218
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
210
219
|
|
211
|
-
add_sibling
|
220
|
+
add_sibling(:next, node_or_tags)
|
212
221
|
end
|
213
222
|
|
214
223
|
####
|
@@ -218,8 +227,8 @@ module Nokogiri
|
|
218
227
|
# Returns self, to support chaining of calls.
|
219
228
|
#
|
220
229
|
# Also see related method +add_previous_sibling+.
|
221
|
-
def before
|
222
|
-
add_previous_sibling
|
230
|
+
def before(node_or_tags)
|
231
|
+
add_previous_sibling(node_or_tags)
|
223
232
|
self
|
224
233
|
end
|
225
234
|
|
@@ -230,8 +239,8 @@ module Nokogiri
|
|
230
239
|
# Returns self, to support chaining of calls.
|
231
240
|
#
|
232
241
|
# Also see related method +add_next_sibling+.
|
233
|
-
def after
|
234
|
-
add_next_sibling
|
242
|
+
def after(node_or_tags)
|
243
|
+
add_next_sibling(node_or_tags)
|
235
244
|
self
|
236
245
|
end
|
237
246
|
|
@@ -239,30 +248,24 @@ module Nokogiri
|
|
239
248
|
# Set the inner html for this Node to +node_or_tags+
|
240
249
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
241
250
|
#
|
242
|
-
# Returns self.
|
243
|
-
#
|
244
251
|
# Also see related method +children=+
|
245
|
-
def inner_html=
|
252
|
+
def inner_html=(node_or_tags)
|
246
253
|
self.children = node_or_tags
|
247
|
-
self
|
248
254
|
end
|
249
255
|
|
250
256
|
####
|
251
257
|
# Set the inner html for this Node +node_or_tags+
|
252
258
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
253
259
|
#
|
254
|
-
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
255
|
-
#
|
256
260
|
# Also see related method +inner_html=+
|
257
|
-
def children=
|
261
|
+
def children=(node_or_tags)
|
258
262
|
node_or_tags = coerce(node_or_tags)
|
259
263
|
children.unlink
|
260
264
|
if node_or_tags.is_a?(XML::NodeSet)
|
261
|
-
node_or_tags.each { |n| add_child_node_and_reparent_attrs
|
265
|
+
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
|
262
266
|
else
|
263
|
-
add_child_node_and_reparent_attrs
|
267
|
+
add_child_node_and_reparent_attrs(node_or_tags)
|
264
268
|
end
|
265
|
-
node_or_tags
|
266
269
|
end
|
267
270
|
|
268
271
|
####
|
@@ -272,25 +275,27 @@ module Nokogiri
|
|
272
275
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
273
276
|
#
|
274
277
|
# Also see related method +swap+.
|
275
|
-
def replace
|
278
|
+
def replace(node_or_tags)
|
279
|
+
raise("Cannot replace a node with no parent") unless parent
|
280
|
+
|
276
281
|
# We cannot replace a text node directly, otherwise libxml will return
|
277
282
|
# an internal error at parser.c:13031, I don't know exactly why
|
278
283
|
# libxml is trying to find a parent node that is an element or document
|
279
284
|
# so I can't tell if this is bug in libxml or not. issue #775.
|
280
285
|
if text?
|
281
|
-
replacee = Nokogiri::XML::Node.new
|
282
|
-
add_previous_sibling_node
|
286
|
+
replacee = Nokogiri::XML::Node.new("dummy", document)
|
287
|
+
add_previous_sibling_node(replacee)
|
283
288
|
unlink
|
284
|
-
return replacee.replace
|
289
|
+
return replacee.replace(node_or_tags)
|
285
290
|
end
|
286
291
|
|
287
|
-
node_or_tags = coerce(node_or_tags)
|
292
|
+
node_or_tags = parent.coerce(node_or_tags)
|
288
293
|
|
289
294
|
if node_or_tags.is_a?(XML::NodeSet)
|
290
|
-
node_or_tags.each { |n| add_previous_sibling
|
295
|
+
node_or_tags.each { |n| add_previous_sibling(n) }
|
291
296
|
unlink
|
292
297
|
else
|
293
|
-
replace_node
|
298
|
+
replace_node(node_or_tags)
|
294
299
|
end
|
295
300
|
node_or_tags
|
296
301
|
end
|
@@ -302,44 +307,215 @@ module Nokogiri
|
|
302
307
|
# Returns self, to support chaining of calls.
|
303
308
|
#
|
304
309
|
# Also see related method +replace+.
|
305
|
-
def swap
|
306
|
-
replace
|
310
|
+
def swap(node_or_tags)
|
311
|
+
replace(node_or_tags)
|
307
312
|
self
|
308
313
|
end
|
309
314
|
|
310
|
-
alias :next :next_sibling
|
311
|
-
alias :previous :previous_sibling
|
312
|
-
|
313
|
-
# :stopdoc:
|
314
|
-
# HACK: This is to work around an RDoc bug
|
315
|
-
alias :next= :add_next_sibling
|
316
|
-
# :startdoc:
|
317
|
-
|
318
|
-
alias :previous= :add_previous_sibling
|
319
|
-
alias :remove :unlink
|
320
|
-
alias :get_attribute :[]
|
321
|
-
alias :attr :[]
|
322
|
-
alias :set_attribute :[]=
|
323
|
-
alias :text :content
|
324
|
-
alias :inner_text :content
|
325
|
-
alias :has_attribute? :key?
|
326
|
-
alias :name :node_name
|
327
|
-
alias :name= :node_name=
|
328
|
-
alias :type :node_type
|
329
|
-
alias :to_str :text
|
330
|
-
alias :clone :dup
|
331
|
-
alias :elements :element_children
|
332
|
-
|
333
315
|
####
|
334
|
-
#
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
316
|
+
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
|
317
|
+
def content=(string)
|
318
|
+
self.native_content = encode_special_chars(string.to_s)
|
319
|
+
end
|
320
|
+
|
321
|
+
###
|
322
|
+
# Set the parent Node for this Node
|
323
|
+
def parent=(parent_node)
|
324
|
+
parent_node.add_child(self)
|
325
|
+
end
|
326
|
+
|
327
|
+
###
|
328
|
+
# Adds a default namespace supplied as a string +url+ href, to self.
|
329
|
+
# The consequence is as an xmlns attribute with supplied argument were
|
330
|
+
# present in parsed XML. A default namespace set with this method will
|
331
|
+
# now show up in #attributes, but when this node is serialized to XML an
|
332
|
+
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
333
|
+
def default_namespace=(url)
|
334
|
+
add_namespace_definition(nil, url)
|
335
|
+
end
|
336
|
+
|
337
|
+
###
|
338
|
+
# Set the default namespace on this node (as would be defined with an
|
339
|
+
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
340
|
+
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
341
|
+
# for this node. You probably want #default_namespace= instead, or perhaps
|
342
|
+
# #add_namespace_definition with a nil prefix argument.
|
343
|
+
def namespace=(ns)
|
344
|
+
return set_namespace(ns) unless ns
|
345
|
+
|
346
|
+
unless Nokogiri::XML::Namespace === ns
|
347
|
+
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
348
|
+
end
|
349
|
+
if ns.document != document
|
350
|
+
raise ArgumentError, "namespace must be declared on the same document"
|
351
|
+
end
|
352
|
+
|
353
|
+
set_namespace(ns)
|
354
|
+
end
|
355
|
+
|
356
|
+
###
|
357
|
+
# Do xinclude substitution on the subtree below node. If given a block, a
|
358
|
+
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
359
|
+
# passed to it, allowing more convenient modification of the parser options.
|
360
|
+
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
|
361
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
362
|
+
yield options if block_given?
|
363
|
+
|
364
|
+
# call c extension
|
365
|
+
process_xincludes(options.to_i)
|
366
|
+
end
|
367
|
+
|
368
|
+
alias_method :next, :next_sibling
|
369
|
+
alias_method :previous, :previous_sibling
|
370
|
+
alias_method :next=, :add_next_sibling
|
371
|
+
alias_method :previous=, :add_previous_sibling
|
372
|
+
alias_method :remove, :unlink
|
373
|
+
alias_method :name=, :node_name=
|
374
|
+
alias_method :add_namespace, :add_namespace_definition
|
375
|
+
|
376
|
+
# :section:
|
377
|
+
|
378
|
+
alias_method :inner_text, :content
|
379
|
+
alias_method :text, :content
|
380
|
+
alias_method :to_str, :content
|
381
|
+
alias_method :name, :node_name
|
382
|
+
alias_method :type, :node_type
|
383
|
+
alias_method :clone, :dup
|
384
|
+
alias_method :elements, :element_children
|
385
|
+
|
386
|
+
# :section: Working With Node Attributes
|
387
|
+
|
388
|
+
# :call-seq: [](name) → (String, nil)
|
389
|
+
#
|
390
|
+
# Fetch an attribute from this node.
|
391
|
+
#
|
392
|
+
# ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
|
393
|
+
# namespaced attributes, use #attribute_with_ns.
|
394
|
+
#
|
395
|
+
# [Returns] (String, nil) value of the attribute +name+, or +nil+ if no matching attribute exists
|
396
|
+
#
|
397
|
+
# *Example*
|
398
|
+
#
|
399
|
+
# doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
|
400
|
+
# child = doc.at_css("child")
|
401
|
+
# child["size"] # => "large"
|
402
|
+
# child["class"] # => "big wide tall"
|
403
|
+
#
|
404
|
+
# *Example:* Namespaced attributes will not be returned.
|
405
|
+
#
|
406
|
+
# ⚠ Note namespaced attributes may be accessed with #attribute or #attribute_with_ns
|
407
|
+
#
|
408
|
+
# doc = Nokogiri::XML(<<~EOF)
|
409
|
+
# <root xmlns:width='http://example.com/widths'>
|
410
|
+
# <child width:size='broad'/>
|
411
|
+
# </root>
|
412
|
+
# EOF
|
413
|
+
# doc.at_css("child")["size"] # => nil
|
414
|
+
# doc.at_css("child").attribute("size").value # => "broad"
|
415
|
+
# doc.at_css("child").attribute_with_ns("size", "http://example.com/widths").value
|
416
|
+
# # => "broad"
|
417
|
+
#
|
418
|
+
def [](name)
|
419
|
+
get(name.to_s)
|
420
|
+
end
|
421
|
+
|
422
|
+
# :call-seq: []=(name, value) → value
|
423
|
+
#
|
424
|
+
# Update the attribute +name+ to +value+, or create the attribute if it does not exist.
|
425
|
+
#
|
426
|
+
# ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
|
427
|
+
# namespaced attributes for update, use #attribute_with_ns. To add a namespaced attribute,
|
428
|
+
# see the example below.
|
429
|
+
#
|
430
|
+
# [Returns] +value+
|
431
|
+
#
|
432
|
+
# *Example*
|
433
|
+
#
|
434
|
+
# doc = Nokogiri::XML("<root><child/></root>")
|
435
|
+
# child = doc.at_css("child")
|
436
|
+
# child["size"] = "broad"
|
437
|
+
# child.to_html
|
438
|
+
# # => "<child size=\"broad\"></child>"
|
439
|
+
#
|
440
|
+
# *Example:* Add a namespaced attribute.
|
441
|
+
#
|
442
|
+
# doc = Nokogiri::XML(<<~EOF)
|
443
|
+
# <root xmlns:width='http://example.com/widths'>
|
444
|
+
# <child/>
|
445
|
+
# </root>
|
446
|
+
# EOF
|
447
|
+
# child = doc.at_css("child")
|
448
|
+
# child["size"] = "broad"
|
449
|
+
# ns = doc.root.namespace_definitions.find { |ns| ns.prefix == "width" }
|
450
|
+
# child.attribute("size").namespace = ns
|
451
|
+
# doc.to_html
|
452
|
+
# # => "<root xmlns:width=\"http://example.com/widths\">\n" +
|
453
|
+
# # " <child width:size=\"broad\"></child>\n" +
|
454
|
+
# # "</root>\n"
|
455
|
+
#
|
456
|
+
def []=(name, value)
|
457
|
+
set(name.to_s, value.to_s)
|
458
|
+
end
|
459
|
+
|
460
|
+
#
|
461
|
+
# :call-seq: attributes() → Hash<String ⇒ Nokogiri::XML::Attr>
|
462
|
+
#
|
463
|
+
# Fetch this node's attributes.
|
464
|
+
#
|
465
|
+
# ⚠ Because the keys do not include any namespace information for the attribute, in case of a
|
466
|
+
# simple name collision, not all attributes will be returned. In this case, you will need to
|
467
|
+
# use #attribute_nodes.
|
468
|
+
#
|
469
|
+
# [Returns]
|
470
|
+
# Hash containing attributes belonging to +self+. The hash keys are String attribute
|
471
|
+
# names (without the namespace), and the hash values are Nokogiri::XML::Attr.
|
472
|
+
#
|
473
|
+
# *Example* with no namespaces:
|
474
|
+
#
|
475
|
+
# doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
|
476
|
+
# doc.at_css("child").attributes
|
477
|
+
# # => {"size"=>#(Attr:0x550 { name = "size", value = "large" }),
|
478
|
+
# # "class"=>#(Attr:0x564 { name = "class", value = "big wide tall" })}
|
479
|
+
#
|
480
|
+
# *Example* with a namespace:
|
481
|
+
#
|
482
|
+
# doc = Nokogiri::XML("<root xmlns:desc='http://example.com/sizes'><child desc:size='large'/></root>")
|
483
|
+
# doc.at_css("child").attributes
|
484
|
+
# # => {"size"=>
|
485
|
+
# # #(Attr:0x550 {
|
486
|
+
# # name = "size",
|
487
|
+
# # namespace = #(Namespace:0x564 {
|
488
|
+
# # prefix = "desc",
|
489
|
+
# # href = "http://example.com/sizes"
|
490
|
+
# # }),
|
491
|
+
# # value = "large"
|
492
|
+
# # })}
|
493
|
+
#
|
494
|
+
# *Example* with an attribute name collision:
|
495
|
+
#
|
496
|
+
# ⚠ Note that only one of the attributes is returned in the Hash.
|
497
|
+
#
|
498
|
+
# doc = Nokogiri::XML(<<~EOF)
|
499
|
+
# <root xmlns:width='http://example.com/widths'
|
500
|
+
# xmlns:height='http://example.com/heights'>
|
501
|
+
# <child width:size='broad' height:size='tall'/>
|
502
|
+
# </root>
|
503
|
+
# EOF
|
504
|
+
# doc.at_css("child").attributes
|
505
|
+
# # => {"size"=>
|
506
|
+
# # #(Attr:0x550 {
|
507
|
+
# # name = "size",
|
508
|
+
# # namespace = #(Namespace:0x564 {
|
509
|
+
# # prefix = "height",
|
510
|
+
# # href = "http://example.com/heights"
|
511
|
+
# # }),
|
512
|
+
# # value = "tall"
|
513
|
+
# # })}
|
514
|
+
#
|
339
515
|
def attributes
|
340
|
-
|
341
|
-
[node.node_name
|
342
|
-
|
516
|
+
attribute_nodes.each_with_object({}) do |node, hash|
|
517
|
+
hash[node.node_name] = node
|
518
|
+
end
|
343
519
|
end
|
344
520
|
|
345
521
|
###
|
@@ -348,6 +524,12 @@ module Nokogiri
|
|
348
524
|
attribute_nodes.map(&:value)
|
349
525
|
end
|
350
526
|
|
527
|
+
###
|
528
|
+
# Does this Node's attributes include <value>
|
529
|
+
def value?(value)
|
530
|
+
values.include?(value)
|
531
|
+
end
|
532
|
+
|
351
533
|
###
|
352
534
|
# Get the attribute names for this Node.
|
353
535
|
def keys
|
@@ -357,97 +539,401 @@ module Nokogiri
|
|
357
539
|
###
|
358
540
|
# Iterate over each attribute name and value pair for this Node.
|
359
541
|
def each
|
360
|
-
attribute_nodes.each
|
542
|
+
attribute_nodes.each do |node|
|
361
543
|
yield [node.node_name, node.value]
|
362
|
-
|
544
|
+
end
|
363
545
|
end
|
364
546
|
|
365
547
|
###
|
366
|
-
#
|
367
|
-
|
548
|
+
# Remove the attribute named +name+
|
549
|
+
def remove_attribute(name)
|
550
|
+
attr = attributes[name].remove if key?(name)
|
551
|
+
clear_xpath_context if Nokogiri.jruby?
|
552
|
+
attr
|
553
|
+
end
|
554
|
+
|
555
|
+
#
|
556
|
+
# :call-seq: classes() → Array<String>
|
557
|
+
#
|
558
|
+
# Fetch CSS class names of a Node.
|
559
|
+
#
|
560
|
+
# This is a convenience function and is equivalent to:
|
561
|
+
#
|
562
|
+
# node.kwattr_values("class")
|
563
|
+
#
|
564
|
+
# See related: #kwattr_values, #add_class, #append_class, #remove_class
|
565
|
+
#
|
566
|
+
# [Returns]
|
567
|
+
# The CSS classes (Array of String) present in the Node's "class" attribute. If the
|
568
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
569
|
+
#
|
570
|
+
# *Example*
|
571
|
+
#
|
572
|
+
# node # => <div class="section title header"></div>
|
573
|
+
# node.classes # => ["section", "title", "header"]
|
574
|
+
#
|
368
575
|
def classes
|
369
|
-
|
576
|
+
kwattr_values("class")
|
370
577
|
end
|
371
578
|
|
372
|
-
###
|
373
|
-
# Add +name+ to the "class" attribute value of this Node and
|
374
|
-
# return self. If the value is already in the current value, it
|
375
|
-
# is not added. If no "class" attribute exists yet, one is
|
376
|
-
# created with the given value.
|
377
579
|
#
|
378
|
-
#
|
379
|
-
#
|
380
|
-
|
381
|
-
|
382
|
-
|
580
|
+
# :call-seq: add_class(names) → self
|
581
|
+
#
|
582
|
+
# Ensure HTML CSS classes are present on +self+. Any CSS classes in +names+ that already exist
|
583
|
+
# in the "class" attribute are _not_ added. Note that any existing duplicates in the
|
584
|
+
# "class" attribute are not removed. Compare with #append_class.
|
585
|
+
#
|
586
|
+
# This is a convenience function and is equivalent to:
|
587
|
+
#
|
588
|
+
# node.kwattr_add("class", names)
|
589
|
+
#
|
590
|
+
# See related: #kwattr_add, #classes, #append_class, #remove_class
|
591
|
+
#
|
592
|
+
# [Parameters]
|
593
|
+
# - +names+ (String, Array<String>)
|
594
|
+
#
|
595
|
+
# CSS class names to be added to the Node's "class" attribute. May be a string containing
|
596
|
+
# whitespace-delimited names, or an Array of String names. Any class names already present
|
597
|
+
# will not be added. Any class names not present will be added. If no "class" attribute
|
598
|
+
# exists, one is created.
|
599
|
+
#
|
600
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
601
|
+
#
|
602
|
+
# *Example:* Ensure that the node has CSS class "section"
|
603
|
+
#
|
604
|
+
# node # => <div></div>
|
605
|
+
# node.add_class("section") # => <div class="section"></div>
|
606
|
+
# node.add_class("section") # => <div class="section"></div> # duplicate not added
|
607
|
+
#
|
608
|
+
# *Example:* Ensure that the node has CSS classes "section" and "header", via a String argument
|
609
|
+
#
|
610
|
+
# Note that the CSS class "section" is not added because it is already present.
|
611
|
+
# Note also that the pre-existing duplicate CSS class "section" is not removed.
|
612
|
+
#
|
613
|
+
# node # => <div class="section section"></div>
|
614
|
+
# node.add_class("section header") # => <div class="section section header"></div>
|
615
|
+
#
|
616
|
+
# *Example:* Ensure that the node has CSS classes "section" and "header", via an Array argument
|
617
|
+
#
|
618
|
+
# node # => <div></div>
|
619
|
+
# node.add_class(["section", "header"]) # => <div class="section header"></div>
|
620
|
+
#
|
621
|
+
def add_class(names)
|
622
|
+
kwattr_add("class", names)
|
623
|
+
end
|
624
|
+
|
625
|
+
#
|
626
|
+
# :call-seq: append_class(names) → self
|
627
|
+
#
|
628
|
+
# Add HTML CSS classes to +self+, regardless of duplication. Compare with #add_class.
|
629
|
+
#
|
630
|
+
# This is a convenience function and is equivalent to:
|
631
|
+
#
|
632
|
+
# node.kwattr_append("class", names)
|
633
|
+
#
|
634
|
+
# See related: #kwattr_append, #classes, #add_class, #remove_class
|
635
|
+
#
|
636
|
+
# [Parameters]
|
637
|
+
# - +names+ (String, Array<String>)
|
638
|
+
#
|
639
|
+
# CSS class names to be appended to the Node's "class" attribute. May be a string containing
|
640
|
+
# whitespace-delimited names, or an Array of String names. All class names passed in will be
|
641
|
+
# appended to the "class" attribute even if they are already present in the attribute
|
642
|
+
# value. If no "class" attribute exists, one is created.
|
643
|
+
#
|
644
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
645
|
+
#
|
646
|
+
# *Example:* Append "section" to the node's CSS "class" attribute
|
647
|
+
#
|
648
|
+
# node # => <div></div>
|
649
|
+
# node.append_class("section") # => <div class="section"></div>
|
650
|
+
# node.append_class("section") # => <div class="section section"></div> # duplicate added!
|
651
|
+
#
|
652
|
+
# *Example:* Append "section" and "header" to the noded's CSS "class" attribute, via a String argument
|
653
|
+
#
|
654
|
+
# Note that the CSS class "section" is appended even though it is already present
|
655
|
+
#
|
656
|
+
# node # => <div class="section section"></div>
|
657
|
+
# node.append_class("section header") # => <div class="section section section header"></div>
|
658
|
+
#
|
659
|
+
# *Example:* Append "section" and "header" to the node's CSS "class" attribute, via an Array argument
|
660
|
+
#
|
661
|
+
# node # => <div></div>
|
662
|
+
# node.append_class(["section", "header"]) # => <div class="section header"></div>
|
663
|
+
# node.append_class(["section", "header"]) # => <div class="section header section header"></div>
|
664
|
+
#
|
665
|
+
def append_class(names)
|
666
|
+
kwattr_append("class", names)
|
667
|
+
end
|
668
|
+
|
669
|
+
# :call-seq:
|
670
|
+
# remove_class(css_classes) → self
|
671
|
+
#
|
672
|
+
# Remove HTML CSS classes from this node. Any CSS class names in +css_classes+ that exist in
|
673
|
+
# this node's "class" attribute are removed, including any multiple entries.
|
674
|
+
#
|
675
|
+
# If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
|
676
|
+
# attribute is deleted from the node.
|
677
|
+
#
|
678
|
+
# This is a convenience function and is equivalent to:
|
679
|
+
#
|
680
|
+
# node.kwattr_remove("class", css_classes)
|
681
|
+
#
|
682
|
+
# Also see #kwattr_remove, #classes, #add_class, #append_class
|
683
|
+
#
|
684
|
+
# [Parameters]
|
685
|
+
# - +css_classes+ (String, Array<String>)
|
686
|
+
#
|
687
|
+
# CSS class names to be removed from the Node's
|
688
|
+
# "class" attribute. May be a string containing whitespace-delimited names, or an Array of
|
689
|
+
# String names. Any class names already present will be removed. If no CSS classes remain,
|
690
|
+
# the "class" attribute is deleted.
|
691
|
+
#
|
692
|
+
# [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
|
693
|
+
#
|
694
|
+
# *Example*: Deleting a CSS class
|
695
|
+
#
|
696
|
+
# Note that all instances of the class "section" are removed from the "class" attribute.
|
697
|
+
#
|
698
|
+
# node # => <div class="section header section"></div>
|
699
|
+
# node.remove_class("section") # => <div class="header"></div>
|
700
|
+
#
|
701
|
+
# *Example*: Deleting the only remaining CSS class
|
702
|
+
#
|
703
|
+
# Note that the attribute is removed once there are no remaining classes.
|
704
|
+
#
|
705
|
+
# node # => <div class="section"></div>
|
706
|
+
# node.remove_class("section") # => <div></div>
|
707
|
+
#
|
708
|
+
# *Example*: Deleting multiple CSS classes
|
709
|
+
#
|
710
|
+
# Note that the "class" attribute is deleted once it's empty.
|
711
|
+
#
|
712
|
+
# node # => <div class="section header float"></div>
|
713
|
+
# node.remove_class(["section", "float"]) # => <div class="header"></div>
|
714
|
+
#
|
715
|
+
def remove_class(names = nil)
|
716
|
+
kwattr_remove("class", names)
|
717
|
+
end
|
718
|
+
|
719
|
+
# :call-seq:
|
720
|
+
# kwattr_values(attribute_name) → Array<String>
|
721
|
+
#
|
722
|
+
# Fetch values from a keyword attribute of a Node.
|
723
|
+
#
|
724
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
725
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
726
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
727
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
728
|
+
#
|
729
|
+
# See also #classes, #kwattr_add, #kwattr_append, #kwattr_remove
|
730
|
+
#
|
731
|
+
# [Parameters]
|
732
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be inspected.
|
733
|
+
#
|
734
|
+
# [Returns]
|
735
|
+
# (Array<String>) The values present in the Node's +attribute_name+ attribute. If the
|
736
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
737
|
+
#
|
738
|
+
# *Example:*
|
739
|
+
#
|
740
|
+
# node # => <a rel="nofollow noopener external">link</a>
|
741
|
+
# node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
|
742
|
+
#
|
743
|
+
# Since v1.11.0
|
744
|
+
def kwattr_values(attribute_name)
|
745
|
+
keywordify(get_attribute(attribute_name) || [])
|
746
|
+
end
|
747
|
+
|
748
|
+
# :call-seq:
|
749
|
+
# kwattr_add(attribute_name, keywords) → self
|
750
|
+
#
|
751
|
+
# Ensure that values are present in a keyword attribute.
|
752
|
+
#
|
753
|
+
# Any values in +keywords+ that already exist in the Node's attribute values are _not_
|
754
|
+
# added. Note that any existing duplicates in the attribute values are not removed. Compare
|
755
|
+
# with #kwattr_append.
|
756
|
+
#
|
757
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
758
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
759
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
760
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
761
|
+
#
|
762
|
+
# See also #add_class, #kwattr_values, #kwattr_append, #kwattr_remove
|
763
|
+
#
|
764
|
+
# [Parameters]
|
765
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
|
766
|
+
# - +keywords+ (String, Array<String>)
|
767
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
768
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
769
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
770
|
+
# it is created.
|
771
|
+
#
|
772
|
+
# [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
|
773
|
+
#
|
774
|
+
# *Example:* Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
|
775
|
+
#
|
776
|
+
# Note that duplicates are not added.
|
777
|
+
#
|
778
|
+
# node # => <a></a>
|
779
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
|
780
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
|
781
|
+
#
|
782
|
+
# *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a
|
783
|
+
# String argument.
|
784
|
+
#
|
785
|
+
# Note that "nofollow" is not added because it is already present. Note also that the
|
786
|
+
# pre-existing duplicate "nofollow" is not removed.
|
787
|
+
#
|
788
|
+
# node # => <a rel="nofollow nofollow"></a>
|
789
|
+
# node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
790
|
+
#
|
791
|
+
# *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via
|
792
|
+
# an Array argument.
|
793
|
+
#
|
794
|
+
# node # => <a></a>
|
795
|
+
# node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
796
|
+
#
|
797
|
+
# Since v1.11.0
|
798
|
+
def kwattr_add(attribute_name, keywords)
|
799
|
+
keywords = keywordify(keywords)
|
800
|
+
current_kws = kwattr_values(attribute_name)
|
801
|
+
new_kws = (current_kws + (keywords - current_kws)).join(" ")
|
802
|
+
set_attribute(attribute_name, new_kws)
|
383
803
|
self
|
384
804
|
end
|
385
805
|
|
386
|
-
|
387
|
-
#
|
388
|
-
#
|
389
|
-
#
|
390
|
-
#
|
806
|
+
# :call-seq:
|
807
|
+
# kwattr_append(attribute_name, keywords) → self
|
808
|
+
#
|
809
|
+
# Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
|
810
|
+
# #kwattr_add.
|
811
|
+
#
|
812
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
813
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
814
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
815
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
816
|
+
#
|
817
|
+
# See also #append_class, #kwattr_values, #kwattr_add, #kwattr_remove
|
818
|
+
#
|
819
|
+
# [Parameters]
|
820
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
|
821
|
+
# - +keywords+ (String, Array<String>)
|
822
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
823
|
+
# whitespace-delimited values, or an Array of String values. All values passed in will be
|
824
|
+
# appended to the named attribute even if they are already present in the attribute. If the
|
825
|
+
# named attribute does not exist, it is created.
|
826
|
+
#
|
827
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
391
828
|
#
|
392
|
-
#
|
393
|
-
#
|
394
|
-
|
395
|
-
|
829
|
+
# *Example:* Append "nofollow" to the +rel+ attribute.
|
830
|
+
#
|
831
|
+
# Note that duplicates are added.
|
832
|
+
#
|
833
|
+
# node # => <a></a>
|
834
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
|
835
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a>
|
836
|
+
#
|
837
|
+
# *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
|
838
|
+
#
|
839
|
+
# Note that "nofollow" is appended even though it is already present.
|
840
|
+
#
|
841
|
+
# node # => <a rel="nofollow"></a>
|
842
|
+
# node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
843
|
+
#
|
844
|
+
#
|
845
|
+
# *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
|
846
|
+
#
|
847
|
+
# node # => <a></a>
|
848
|
+
# node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
849
|
+
#
|
850
|
+
# Since v1.11.0
|
851
|
+
def kwattr_append(attribute_name, keywords)
|
852
|
+
keywords = keywordify(keywords)
|
853
|
+
current_kws = kwattr_values(attribute_name)
|
854
|
+
new_kws = (current_kws + keywords).join(" ")
|
855
|
+
set_attribute(attribute_name, new_kws)
|
396
856
|
self
|
397
857
|
end
|
398
858
|
|
399
|
-
|
400
|
-
#
|
401
|
-
# and return self. If there are many occurrences of the name,
|
402
|
-
# they are all removed.
|
859
|
+
# :call-seq:
|
860
|
+
# kwattr_remove(attribute_name, keywords) → self
|
403
861
|
#
|
404
|
-
#
|
405
|
-
#
|
862
|
+
# Remove keywords from a keyword attribute. Any matching keywords that exist in the named
|
863
|
+
# attribute are removed, including any multiple entries.
|
406
864
|
#
|
407
|
-
# If no
|
408
|
-
#
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
865
|
+
# If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
|
866
|
+
# deleted from the node.
|
867
|
+
#
|
868
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
869
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
870
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
871
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
872
|
+
#
|
873
|
+
# See also #remove_class, #kwattr_values, #kwattr_add, #kwattr_append
|
874
|
+
#
|
875
|
+
# [Parameters]
|
876
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
|
877
|
+
# - +keywords+ (String, Array<String>)
|
878
|
+
# Keywords to be removed from the attribute named +attribute_name+. May be a string
|
879
|
+
# containing whitespace-delimited values, or an Array of String values. Any keywords present
|
880
|
+
# in the named attribute will be removed. If no keywords remain, or if +keywords+ is nil,
|
881
|
+
# the attribute is deleted.
|
882
|
+
#
|
883
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
884
|
+
#
|
885
|
+
# *Example:*
|
886
|
+
#
|
887
|
+
# Note that the +rel+ attribute is deleted when empty.
|
888
|
+
#
|
889
|
+
# node # => <a rel="nofollow noreferrer">link</a>
|
890
|
+
# node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
|
891
|
+
# node.kwattr_remove("rel", "noreferrer") # => <a>link</a>
|
892
|
+
#
|
893
|
+
# Since v1.11.0
|
894
|
+
def kwattr_remove(attribute_name, keywords)
|
895
|
+
if keywords.nil?
|
896
|
+
remove_attribute(attribute_name)
|
897
|
+
return self
|
898
|
+
end
|
899
|
+
|
900
|
+
keywords = keywordify(keywords)
|
901
|
+
current_kws = kwattr_values(attribute_name)
|
902
|
+
new_kws = current_kws - keywords
|
903
|
+
if new_kws.empty?
|
904
|
+
remove_attribute(attribute_name)
|
417
905
|
else
|
418
|
-
|
906
|
+
set_attribute(attribute_name, new_kws.join(" "))
|
419
907
|
end
|
420
908
|
self
|
421
909
|
end
|
422
910
|
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
alias :delete :remove_attribute
|
911
|
+
alias_method :delete, :remove_attribute
|
912
|
+
alias_method :get_attribute, :[]
|
913
|
+
alias_method :attr, :[]
|
914
|
+
alias_method :set_attribute, :[]=
|
915
|
+
alias_method :has_attribute?, :key?
|
916
|
+
|
917
|
+
# :section:
|
431
918
|
|
432
919
|
###
|
433
920
|
# Returns true if this Node matches +selector+
|
434
|
-
def matches?
|
921
|
+
def matches?(selector)
|
435
922
|
ancestors.last.search(selector).include?(self)
|
436
923
|
end
|
437
924
|
|
438
925
|
###
|
439
926
|
# Create a DocumentFragment containing +tags+ that is relative to _this_
|
440
927
|
# context node.
|
441
|
-
def fragment
|
442
|
-
|
443
|
-
type::DocumentFragment.new(document, tags, self)
|
928
|
+
def fragment(tags)
|
929
|
+
document.related_class("DocumentFragment").new(document, tags, self)
|
444
930
|
end
|
445
931
|
|
446
932
|
###
|
447
933
|
# Parse +string_or_io+ as a document fragment within the context of
|
448
934
|
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
|
449
935
|
# +string_or_io+.
|
450
|
-
def parse
|
936
|
+
def parse(string_or_io, options = nil)
|
451
937
|
##
|
452
938
|
# When the current node is unparented and not an element node, use the
|
453
939
|
# document as the parsing context instead. Otherwise, the in-context
|
@@ -458,61 +944,87 @@ module Nokogiri
|
|
458
944
|
end
|
459
945
|
|
460
946
|
options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
|
461
|
-
if Integer === options
|
462
|
-
options = Nokogiri::XML::ParseOptions.new(options)
|
463
|
-
end
|
464
|
-
# Give the options to the user
|
947
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
465
948
|
yield options if block_given?
|
466
949
|
|
467
|
-
contents = string_or_io.respond_to?(:read)
|
468
|
-
string_or_io.read
|
950
|
+
contents = if string_or_io.respond_to?(:read)
|
951
|
+
string_or_io.read
|
952
|
+
else
|
469
953
|
string_or_io
|
954
|
+
end
|
470
955
|
|
471
956
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
472
957
|
|
473
|
-
|
474
|
-
#
|
958
|
+
# libxml2 does not obey the +recover+ option after encountering errors during +in_context+
|
959
|
+
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
960
|
+
#
|
961
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
962
|
+
# would have been inherited from the context node won't be handled correctly. This hack was
|
963
|
+
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
964
|
+
# that's not easily prevented (or even detected).
|
965
|
+
#
|
966
|
+
# I think preferable behavior would be to either:
|
967
|
+
#
|
968
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
|
969
|
+
# b. don't recover, but raise a sensible exception
|
970
|
+
#
|
971
|
+
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
972
|
+
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
475
973
|
error_count = document.errors.length
|
476
974
|
node_set = in_context(contents, options.to_i)
|
477
|
-
if node_set.empty?
|
478
|
-
|
479
|
-
|
975
|
+
if node_set.empty? && (document.errors.length > error_count)
|
976
|
+
if options.recover?
|
977
|
+
fragment = document.related_class("DocumentFragment").parse(contents)
|
978
|
+
node_set = fragment.children
|
979
|
+
else
|
980
|
+
raise document.errors[error_count]
|
981
|
+
end
|
480
982
|
end
|
481
983
|
node_set
|
482
984
|
end
|
483
985
|
|
484
|
-
|
485
|
-
#
|
486
|
-
def content= string
|
487
|
-
self.native_content = encode_special_chars(string.to_s)
|
488
|
-
end
|
489
|
-
|
490
|
-
###
|
491
|
-
# Set the parent Node for this Node
|
492
|
-
def parent= parent_node
|
493
|
-
parent_node.add_child(self)
|
494
|
-
parent_node
|
495
|
-
end
|
496
|
-
|
497
|
-
###
|
498
|
-
# Returns a Hash of +{prefix => value}+ for all namespaces on this
|
499
|
-
# node and its ancestors.
|
986
|
+
# :call-seq:
|
987
|
+
# namespaces() → Hash<String(Namespace#prefix) ⇒ String(Namespace#href)>
|
500
988
|
#
|
501
|
-
#
|
989
|
+
# Fetch all the namespaces on this node and its ancestors.
|
990
|
+
#
|
991
|
+
# Note that the keys in this hash XML attributes that would be used to define this namespace,
|
992
|
+
# such as "xmlns:prefix", not just the prefix.
|
993
|
+
#
|
994
|
+
# The default namespace for this node will be included with key "xmlns".
|
995
|
+
#
|
996
|
+
# See also #namespace_scopes
|
997
|
+
#
|
998
|
+
# [Returns]
|
999
|
+
# Hash containing all the namespaces on this node and its ancestors. The hash keys are the
|
1000
|
+
# namespace prefix, and the hash value for each key is the namespace URI.
|
1001
|
+
#
|
1002
|
+
# *Example:*
|
1003
|
+
#
|
1004
|
+
# doc = Nokogiri::XML(<<~EOF)
|
1005
|
+
# <root xmlns="http://example.com/root" xmlns:in_scope="http://example.com/in_scope">
|
1006
|
+
# <first/>
|
1007
|
+
# <second xmlns="http://example.com/child"/>
|
1008
|
+
# <third xmlns:foo="http://example.com/foo"/>
|
1009
|
+
# </root>
|
1010
|
+
# EOF
|
1011
|
+
# doc.at_xpath("//root:first", "root" => "http://example.com/root").namespaces
|
1012
|
+
# # => {"xmlns"=>"http://example.com/root",
|
1013
|
+
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
|
1014
|
+
# doc.at_xpath("//child:second", "child" => "http://example.com/child").namespaces
|
1015
|
+
# # => {"xmlns"=>"http://example.com/child",
|
1016
|
+
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
|
1017
|
+
# doc.at_xpath("//root:third", "root" => "http://example.com/root").namespaces
|
1018
|
+
# # => {"xmlns:foo"=>"http://example.com/foo",
|
1019
|
+
# # "xmlns"=>"http://example.com/root",
|
1020
|
+
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
|
502
1021
|
#
|
503
|
-
# Returns namespaces in scope for self -- those defined on self
|
504
|
-
# element directly or any ancestor node -- as a Hash of
|
505
|
-
# attribute-name/value pairs. Note that the keys in this hash
|
506
|
-
# XML attributes that would be used to define this namespace,
|
507
|
-
# such as "xmlns:prefix", not just the prefix. Default namespace
|
508
|
-
# set on self will be included with key "xmlns". However,
|
509
|
-
# default namespaces set on ancestor will NOT be, even if self
|
510
|
-
# has no explicit default namespace.
|
511
1022
|
def namespaces
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
1023
|
+
namespace_scopes.each_with_object({}) do |ns, hash|
|
1024
|
+
prefix = ns.prefix
|
1025
|
+
key = prefix ? "xmlns:#{prefix}" : "xmlns"
|
1026
|
+
hash[key] = ns.href
|
1027
|
+
end
|
516
1028
|
end
|
517
1029
|
|
518
1030
|
# Returns true if this is a Comment
|
@@ -530,14 +1042,14 @@ module Nokogiri
|
|
530
1042
|
type == DOCUMENT_NODE
|
531
1043
|
end
|
532
1044
|
|
533
|
-
# Returns true if this is an
|
1045
|
+
# Returns true if this is an HTML4::Document or HTML5::Document node
|
534
1046
|
def html?
|
535
1047
|
type == HTML_DOCUMENT_NODE
|
536
1048
|
end
|
537
1049
|
|
538
1050
|
# Returns true if this is a Document
|
539
1051
|
def document?
|
540
|
-
is_a?
|
1052
|
+
is_a?(XML::Document)
|
541
1053
|
end
|
542
1054
|
|
543
1055
|
# Returns true if this is a ProcessingInstruction node
|
@@ -556,11 +1068,11 @@ module Nokogiri
|
|
556
1068
|
end
|
557
1069
|
|
558
1070
|
###
|
559
|
-
# Fetch the Nokogiri::
|
1071
|
+
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
560
1072
|
# nil on XML documents and on unknown tags.
|
561
1073
|
def description
|
562
1074
|
return nil if document.xml?
|
563
|
-
Nokogiri::
|
1075
|
+
Nokogiri::HTML4::ElementDescription[name]
|
564
1076
|
end
|
565
1077
|
|
566
1078
|
###
|
@@ -574,7 +1086,8 @@ module Nokogiri
|
|
574
1086
|
def element?
|
575
1087
|
type == ELEMENT_NODE
|
576
1088
|
end
|
577
|
-
|
1089
|
+
|
1090
|
+
alias_method :elem?, :element?
|
578
1091
|
|
579
1092
|
###
|
580
1093
|
# Turn this node in to a string. If the document is HTML, this method
|
@@ -584,28 +1097,28 @@ module Nokogiri
|
|
584
1097
|
end
|
585
1098
|
|
586
1099
|
# Get the inner_html for this node's Node#children
|
587
|
-
def inner_html
|
1100
|
+
def inner_html(*args)
|
588
1101
|
children.map { |x| x.to_html(*args) }.join
|
589
1102
|
end
|
590
1103
|
|
591
1104
|
# Get the path to this node as a CSS expression
|
592
1105
|
def css_path
|
593
|
-
path.split(
|
594
|
-
part.
|
595
|
-
|
1106
|
+
path.split(%r{/}).map do |part|
|
1107
|
+
part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
1108
|
+
end.compact.join(" > ")
|
596
1109
|
end
|
597
1110
|
|
598
1111
|
###
|
599
1112
|
# Get a list of ancestor Node for this Node. If +selector+ is given,
|
600
1113
|
# the ancestors must match +selector+
|
601
|
-
def ancestors
|
1114
|
+
def ancestors(selector = nil)
|
602
1115
|
return NodeSet.new(document) unless respond_to?(:parent)
|
603
1116
|
return NodeSet.new(document) unless parent
|
604
1117
|
|
605
1118
|
parents = [parent]
|
606
1119
|
|
607
1120
|
while parents.last.respond_to?(:parent)
|
608
|
-
break unless ctx_parent = parents.last.parent
|
1121
|
+
break unless (ctx_parent = parents.last.parent)
|
609
1122
|
parents << ctx_parent
|
610
1123
|
end
|
611
1124
|
|
@@ -614,62 +1127,43 @@ module Nokogiri
|
|
614
1127
|
root = parents.last
|
615
1128
|
search_results = root.search(selector)
|
616
1129
|
|
617
|
-
NodeSet.new(document, parents.find_all
|
1130
|
+
NodeSet.new(document, parents.find_all do |parent|
|
618
1131
|
search_results.include?(parent)
|
619
|
-
|
620
|
-
end
|
621
|
-
|
622
|
-
###
|
623
|
-
# Adds a default namespace supplied as a string +url+ href, to self.
|
624
|
-
# The consequence is as an xmlns attribute with supplied argument were
|
625
|
-
# present in parsed XML. A default namespace set with this method will
|
626
|
-
# now show up in #attributes, but when this node is serialized to XML an
|
627
|
-
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
628
|
-
def default_namespace= url
|
629
|
-
add_namespace_definition(nil, url)
|
630
|
-
end
|
631
|
-
alias :add_namespace :add_namespace_definition
|
632
|
-
|
633
|
-
###
|
634
|
-
# Set the default namespace on this node (as would be defined with an
|
635
|
-
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
636
|
-
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
637
|
-
# for this node. You probably want #default_namespace= instead, or perhaps
|
638
|
-
# #add_namespace_definition with a nil prefix argument.
|
639
|
-
def namespace= ns
|
640
|
-
return set_namespace(ns) unless ns
|
641
|
-
|
642
|
-
unless Nokogiri::XML::Namespace === ns
|
643
|
-
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
644
|
-
end
|
645
|
-
if ns.document != document
|
646
|
-
raise ArgumentError, 'namespace must be declared on the same document'
|
647
|
-
end
|
648
|
-
|
649
|
-
set_namespace ns
|
1132
|
+
end)
|
650
1133
|
end
|
651
1134
|
|
652
1135
|
####
|
653
1136
|
# Yields self and all children to +block+ recursively.
|
654
|
-
def traverse
|
655
|
-
children.each{|j| j.traverse(&block) }
|
656
|
-
|
1137
|
+
def traverse(&block)
|
1138
|
+
children.each { |j| j.traverse(&block) }
|
1139
|
+
yield(self)
|
657
1140
|
end
|
658
1141
|
|
659
1142
|
###
|
660
1143
|
# Accept a visitor. This method calls "visit" on +visitor+ with self.
|
661
|
-
def accept
|
1144
|
+
def accept(visitor)
|
662
1145
|
visitor.visit(self)
|
663
1146
|
end
|
664
1147
|
|
665
1148
|
###
|
666
1149
|
# Test to see if this Node is equal to +other+
|
667
|
-
def ==
|
1150
|
+
def ==(other)
|
668
1151
|
return false unless other
|
669
1152
|
return false unless other.respond_to?(:pointer_id)
|
670
1153
|
pointer_id == other.pointer_id
|
671
1154
|
end
|
672
1155
|
|
1156
|
+
###
|
1157
|
+
# Compare two Node objects with respect to their Document. Nodes from
|
1158
|
+
# different documents cannot be compared.
|
1159
|
+
def <=>(other)
|
1160
|
+
return nil unless other.is_a?(Nokogiri::XML::Node)
|
1161
|
+
return nil unless document == other.document
|
1162
|
+
compare(other)
|
1163
|
+
end
|
1164
|
+
|
1165
|
+
# :section: Serialization and Generating Output
|
1166
|
+
|
673
1167
|
###
|
674
1168
|
# Serialize Node using +options+. Save options can also be set using a
|
675
1169
|
# block. See SaveOptions.
|
@@ -684,19 +1178,23 @@ module Nokogiri
|
|
684
1178
|
# config.format.as_xml
|
685
1179
|
# end
|
686
1180
|
#
|
687
|
-
def serialize
|
688
|
-
options = args.first.is_a?(Hash)
|
689
|
-
|
690
|
-
|
691
|
-
|
1181
|
+
def serialize(*args, &block)
|
1182
|
+
options = if args.first.is_a?(Hash)
|
1183
|
+
args.shift
|
1184
|
+
else
|
1185
|
+
{
|
1186
|
+
encoding: args[0],
|
1187
|
+
save_with: args[1],
|
1188
|
+
}
|
1189
|
+
end
|
692
1190
|
|
693
1191
|
encoding = options[:encoding] || document.encoding
|
694
1192
|
options[:encoding] = encoding
|
695
1193
|
|
696
|
-
outstring =
|
697
|
-
outstring.force_encoding(Encoding.find(encoding ||
|
1194
|
+
outstring = +""
|
1195
|
+
outstring.force_encoding(Encoding.find(encoding || "utf-8"))
|
698
1196
|
io = StringIO.new(outstring)
|
699
|
-
write_to
|
1197
|
+
write_to(io, options, &block)
|
700
1198
|
io.string
|
701
1199
|
end
|
702
1200
|
|
@@ -707,8 +1205,8 @@ module Nokogiri
|
|
707
1205
|
#
|
708
1206
|
# See Node#write_to for a list of +options+. For formatted output,
|
709
1207
|
# use Node#to_xhtml instead.
|
710
|
-
def to_html
|
711
|
-
to_format
|
1208
|
+
def to_html(options = {})
|
1209
|
+
to_format(SaveOptions::DEFAULT_HTML, options)
|
712
1210
|
end
|
713
1211
|
|
714
1212
|
###
|
@@ -717,7 +1215,7 @@ module Nokogiri
|
|
717
1215
|
# doc.to_xml(:indent => 5, :encoding => 'UTF-8')
|
718
1216
|
#
|
719
1217
|
# See Node#write_to for a list of +options+
|
720
|
-
def to_xml
|
1218
|
+
def to_xml(options = {})
|
721
1219
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
722
1220
|
serialize(options)
|
723
1221
|
end
|
@@ -728,8 +1226,8 @@ module Nokogiri
|
|
728
1226
|
# doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
|
729
1227
|
#
|
730
1228
|
# See Node#write_to for a list of +options+
|
731
|
-
def to_xhtml
|
732
|
-
to_format
|
1229
|
+
def to_xhtml(options = {})
|
1230
|
+
to_format(SaveOptions::DEFAULT_XHTML, options)
|
733
1231
|
end
|
734
1232
|
|
735
1233
|
###
|
@@ -749,38 +1247,43 @@ module Nokogiri
|
|
749
1247
|
#
|
750
1248
|
# node.write_to(io, :indent_text => '-', :indent => 2)
|
751
1249
|
#
|
752
|
-
def write_to
|
753
|
-
options
|
754
|
-
encoding
|
1250
|
+
def write_to(io, *options)
|
1251
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
1252
|
+
encoding = options[:encoding] || options[0]
|
755
1253
|
if Nokogiri.jruby?
|
756
|
-
save_options
|
757
|
-
indent_times
|
1254
|
+
save_options = options[:save_with] || options[1]
|
1255
|
+
indent_times = options[:indent] || 0
|
758
1256
|
else
|
759
|
-
save_options
|
760
|
-
indent_times
|
1257
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
1258
|
+
indent_times = options[:indent] || 2
|
761
1259
|
end
|
762
|
-
indent_text
|
1260
|
+
indent_text = options[:indent_text] || " "
|
1261
|
+
|
1262
|
+
# Any string times 0 returns an empty string. Therefore, use the same
|
1263
|
+
# string instead of generating a new empty string for every node with
|
1264
|
+
# zero indentation.
|
1265
|
+
indentation = indent_times.zero? ? "" : (indent_text * indent_times)
|
763
1266
|
|
764
1267
|
config = SaveOptions.new(save_options.to_i)
|
765
1268
|
yield config if block_given?
|
766
1269
|
|
767
|
-
native_write_to(io, encoding,
|
1270
|
+
native_write_to(io, encoding, indentation, config.options)
|
768
1271
|
end
|
769
1272
|
|
770
1273
|
###
|
771
1274
|
# Write Node as HTML to +io+ with +options+
|
772
1275
|
#
|
773
1276
|
# See Node#write_to for a list of +options+
|
774
|
-
def write_html_to
|
775
|
-
write_format_to
|
1277
|
+
def write_html_to(io, options = {})
|
1278
|
+
write_format_to(SaveOptions::DEFAULT_HTML, io, options)
|
776
1279
|
end
|
777
1280
|
|
778
1281
|
###
|
779
1282
|
# Write Node as XHTML to +io+ with +options+
|
780
1283
|
#
|
781
1284
|
# See Node#write_to for a list of +options+
|
782
|
-
def write_xhtml_to
|
783
|
-
write_format_to
|
1285
|
+
def write_xhtml_to(io, options = {})
|
1286
|
+
write_format_to(SaveOptions::DEFAULT_XHTML, io, options)
|
784
1287
|
end
|
785
1288
|
|
786
1289
|
###
|
@@ -789,110 +1292,105 @@ module Nokogiri
|
|
789
1292
|
# doc.write_xml_to io, :encoding => 'UTF-8'
|
790
1293
|
#
|
791
1294
|
# See Node#write_to for a list of options
|
792
|
-
def write_xml_to
|
1295
|
+
def write_xml_to(io, options = {})
|
793
1296
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
794
|
-
write_to
|
1297
|
+
write_to(io, options)
|
795
1298
|
end
|
796
1299
|
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
compare other
|
1300
|
+
def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
|
1301
|
+
c14n_root = self
|
1302
|
+
document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
|
1303
|
+
tn = node.is_a?(XML::Node) ? node : parent
|
1304
|
+
tn == c14n_root || tn.ancestors.include?(c14n_root)
|
1305
|
+
end
|
804
1306
|
end
|
805
1307
|
|
806
|
-
|
807
|
-
# Do xinclude substitution on the subtree below node. If given a block, a
|
808
|
-
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
809
|
-
# passed to it, allowing more convenient modification of the parser options.
|
810
|
-
def do_xinclude options = XML::ParseOptions::DEFAULT_XML
|
811
|
-
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
1308
|
+
# :section:
|
812
1309
|
|
813
|
-
|
814
|
-
yield options if block_given?
|
1310
|
+
protected
|
815
1311
|
|
816
|
-
|
817
|
-
|
1312
|
+
def coerce(data)
|
1313
|
+
case data
|
1314
|
+
when XML::NodeSet
|
1315
|
+
return data
|
1316
|
+
when XML::DocumentFragment
|
1317
|
+
return data.children
|
1318
|
+
when String
|
1319
|
+
return fragment(data).children
|
1320
|
+
when Document, XML::Attr
|
1321
|
+
# unacceptable
|
1322
|
+
when XML::Node
|
1323
|
+
return data
|
1324
|
+
end
|
1325
|
+
|
1326
|
+
raise ArgumentError, <<~EOERR
|
1327
|
+
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1328
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1329
|
+
EOERR
|
818
1330
|
end
|
819
1331
|
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
1332
|
+
private
|
1333
|
+
|
1334
|
+
def keywordify(keywords)
|
1335
|
+
case keywords
|
1336
|
+
when Enumerable
|
1337
|
+
keywords
|
1338
|
+
when String
|
1339
|
+
keywords.scan(/\S+/)
|
1340
|
+
else
|
1341
|
+
raise ArgumentError,
|
1342
|
+
"Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}"
|
825
1343
|
end
|
826
1344
|
end
|
827
1345
|
|
828
|
-
|
1346
|
+
def add_sibling(next_or_previous, node_or_tags)
|
1347
|
+
raise("Cannot add sibling to a node with no parent") unless parent
|
829
1348
|
|
830
|
-
|
831
|
-
|
832
|
-
iter = (next_or_previous == :next) ? :reverse_each : :each
|
1349
|
+
impl = next_or_previous == :next ? :add_next_sibling_node : :add_previous_sibling_node
|
1350
|
+
iter = next_or_previous == :next ? :reverse_each : :each
|
833
1351
|
|
834
|
-
node_or_tags = coerce
|
1352
|
+
node_or_tags = parent.coerce(node_or_tags)
|
835
1353
|
if node_or_tags.is_a?(XML::NodeSet)
|
836
1354
|
if text?
|
837
|
-
pivot = Nokogiri::XML::Node.new
|
838
|
-
send
|
1355
|
+
pivot = Nokogiri::XML::Node.new("dummy", document)
|
1356
|
+
send(impl, pivot)
|
839
1357
|
else
|
840
1358
|
pivot = self
|
841
1359
|
end
|
842
|
-
node_or_tags.send(iter) { |n| pivot.send
|
1360
|
+
node_or_tags.send(iter) { |n| pivot.send(impl, n) }
|
843
1361
|
pivot.unlink if text?
|
844
1362
|
else
|
845
|
-
send
|
1363
|
+
send(impl, node_or_tags)
|
846
1364
|
end
|
847
1365
|
node_or_tags
|
848
1366
|
end
|
849
1367
|
|
850
|
-
|
851
|
-
|
852
|
-
|
1368
|
+
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
|
1369
|
+
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1370
|
+
|
1371
|
+
def to_format(save_option, options)
|
1372
|
+
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
853
1373
|
|
854
1374
|
options[:save_with] = save_option unless options[:save_with]
|
855
1375
|
serialize(options)
|
856
1376
|
end
|
857
1377
|
|
858
|
-
def write_format_to
|
859
|
-
|
860
|
-
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
1378
|
+
def write_format_to(save_option, io, options)
|
1379
|
+
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
861
1380
|
|
862
1381
|
options[:save_with] ||= save_option
|
863
|
-
write_to
|
1382
|
+
write_to(io, options)
|
864
1383
|
end
|
865
1384
|
|
866
1385
|
def inspect_attributes
|
867
1386
|
[:name, :namespace, :attribute_nodes, :children]
|
868
1387
|
end
|
869
1388
|
|
870
|
-
|
871
|
-
case data
|
872
|
-
when XML::NodeSet
|
873
|
-
return data
|
874
|
-
when XML::DocumentFragment
|
875
|
-
return data.children
|
876
|
-
when String
|
877
|
-
return fragment(data).children
|
878
|
-
when Document, XML::Attr
|
879
|
-
# unacceptable
|
880
|
-
when XML::Node
|
881
|
-
return data
|
882
|
-
end
|
1389
|
+
IMPLIED_XPATH_CONTEXTS = [".//"].freeze
|
883
1390
|
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
EOERR
|
888
|
-
end
|
889
|
-
|
890
|
-
# @private
|
891
|
-
IMPLIED_XPATH_CONTEXTS = [ './/'.freeze ].freeze # :nodoc:
|
892
|
-
|
893
|
-
def add_child_node_and_reparent_attrs node # :nodoc:
|
894
|
-
add_child_node node
|
895
|
-
node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
|
1391
|
+
def add_child_node_and_reparent_attrs(node)
|
1392
|
+
add_child_node(node)
|
1393
|
+
node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node|
|
896
1394
|
attr_node.remove
|
897
1395
|
node[attr_node.name] = attr_node.value
|
898
1396
|
end
|
@@ -900,3 +1398,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
|
900
1398
|
end
|
901
1399
|
end
|
902
1400
|
end
|
1401
|
+
|
1402
|
+
require_relative "node/save_options"
|