nokogiri 1.11.0.rc1 → 1.11.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/README.md +164 -92
- data/ext/nokogiri/depend +476 -357
- data/ext/nokogiri/extconf.rb +467 -326
- data/ext/nokogiri/html_document.c +79 -78
- data/ext/nokogiri/html_sax_parser_context.c +4 -2
- data/ext/nokogiri/html_sax_push_parser.c +14 -8
- data/ext/nokogiri/nokogiri.c +37 -46
- data/ext/nokogiri/nokogiri.h +25 -17
- data/ext/nokogiri/test_global_handlers.c +41 -0
- data/ext/nokogiri/xml_document.c +8 -3
- data/ext/nokogiri/xml_io.c +8 -6
- data/ext/nokogiri/xml_node.c +1 -1
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_reader.c +6 -17
- data/ext/nokogiri/xml_relax_ng.c +29 -11
- data/ext/nokogiri/xml_sax_parser.c +2 -7
- data/ext/nokogiri/xml_sax_parser_context.c +4 -2
- data/ext/nokogiri/xml_sax_push_parser.c +2 -0
- data/ext/nokogiri/xml_schema.c +84 -13
- data/ext/nokogiri/xml_syntax_error.c +23 -0
- data/ext/nokogiri/xml_syntax_error.h +15 -3
- data/ext/nokogiri/xml_xpath_context.c +80 -4
- data/ext/nokogiri/xslt_stylesheet.c +1 -4
- data/lib/nokogiri.rb +20 -3
- data/lib/nokogiri/css/parser.rb +62 -62
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +38 -36
- data/lib/nokogiri/css/xpath_visitor.rb +70 -42
- data/lib/nokogiri/html/document.rb +12 -26
- data/lib/nokogiri/version.rb +2 -148
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/xml/builder.rb +2 -2
- data/lib/nokogiri/xml/document.rb +17 -7
- data/lib/nokogiri/xml/document_fragment.rb +4 -6
- data/lib/nokogiri/xml/node.rb +562 -238
- data/lib/nokogiri/xml/parse_options.rb +6 -0
- data/lib/nokogiri/xml/relax_ng.rb +6 -2
- data/lib/nokogiri/xml/schema.rb +12 -4
- data/lib/nokogiri/xml/searchable.rb +24 -16
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +32 -0
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
- data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0009-avoid-isnan-isinf.patch +81 -0
- metadata +84 -114
@@ -0,0 +1,182 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "singleton"
|
3
|
+
require "shellwords"
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
class VersionInfo # :nodoc:
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
def jruby?
|
10
|
+
::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
|
11
|
+
end
|
12
|
+
|
13
|
+
def engine
|
14
|
+
defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
|
15
|
+
end
|
16
|
+
|
17
|
+
def loaded_libxml_version
|
18
|
+
Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
|
19
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
20
|
+
.collect(&:to_i)
|
21
|
+
.join("."))
|
22
|
+
end
|
23
|
+
|
24
|
+
def compiled_libxml_version
|
25
|
+
Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
|
26
|
+
end
|
27
|
+
|
28
|
+
def loaded_libxslt_version
|
29
|
+
Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
|
30
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
31
|
+
.collect(&:to_i)
|
32
|
+
.join("."))
|
33
|
+
end
|
34
|
+
|
35
|
+
def compiled_libxslt_version
|
36
|
+
Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
|
37
|
+
end
|
38
|
+
|
39
|
+
def libxml2?
|
40
|
+
defined?(Nokogiri::LIBXML_COMPILED_VERSION)
|
41
|
+
end
|
42
|
+
|
43
|
+
def libxml2_has_iconv?
|
44
|
+
defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
|
45
|
+
end
|
46
|
+
|
47
|
+
def libxml2_using_packaged?
|
48
|
+
libxml2? && Nokogiri::PACKAGED_LIBRARIES
|
49
|
+
end
|
50
|
+
|
51
|
+
def libxml2_using_system?
|
52
|
+
libxml2? && !libxml2_using_packaged?
|
53
|
+
end
|
54
|
+
|
55
|
+
def libxml2_precompiled?
|
56
|
+
libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
|
57
|
+
end
|
58
|
+
|
59
|
+
def warnings
|
60
|
+
warnings = []
|
61
|
+
|
62
|
+
if libxml2?
|
63
|
+
if compiled_libxml_version != loaded_libxml_version
|
64
|
+
warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
|
65
|
+
end
|
66
|
+
|
67
|
+
if compiled_libxslt_version != loaded_libxslt_version
|
68
|
+
warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
warnings
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_hash
|
76
|
+
header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
|
77
|
+
{}.tap do |vi|
|
78
|
+
vi["warnings"] = []
|
79
|
+
vi["nokogiri"] = {}.tap do |nokogiri|
|
80
|
+
nokogiri["version"] = Nokogiri::VERSION
|
81
|
+
|
82
|
+
unless jruby?
|
83
|
+
cppflags = ["-I#{header_directory.shellescape}"]
|
84
|
+
if libxml2_using_packaged?
|
85
|
+
cppflags << "-I#{File.join(header_directory, "include").shellescape}"
|
86
|
+
cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
|
87
|
+
end
|
88
|
+
nokogiri["cppflags"] = cppflags
|
89
|
+
end
|
90
|
+
end
|
91
|
+
vi["ruby"] = {}.tap do |ruby|
|
92
|
+
ruby["version"] = ::RUBY_VERSION
|
93
|
+
ruby["platform"] = ::RUBY_PLATFORM
|
94
|
+
ruby["gem_platform"] = ::Gem::Platform.local.to_s
|
95
|
+
ruby["description"] = ::RUBY_DESCRIPTION
|
96
|
+
ruby["engine"] = engine
|
97
|
+
ruby["jruby"] = jruby? if jruby?
|
98
|
+
end
|
99
|
+
|
100
|
+
if libxml2?
|
101
|
+
vi["libxml"] = {}.tap do |libxml|
|
102
|
+
if libxml2_using_packaged?
|
103
|
+
libxml["source"] = "packaged"
|
104
|
+
libxml["precompiled"] = libxml2_precompiled?
|
105
|
+
libxml["patches"] = Nokogiri::LIBXML2_PATCHES
|
106
|
+
|
107
|
+
# this is for nokogumbo and shouldn't be forever
|
108
|
+
libxml["libxml2_path"] = header_directory
|
109
|
+
else
|
110
|
+
libxml["source"] = "system"
|
111
|
+
end
|
112
|
+
libxml["iconv_enabled"] = libxml2_has_iconv?
|
113
|
+
libxml["compiled"] = compiled_libxml_version.to_s
|
114
|
+
libxml["loaded"] = loaded_libxml_version.to_s
|
115
|
+
end
|
116
|
+
|
117
|
+
vi["libxslt"] = {}.tap do |libxslt|
|
118
|
+
if libxml2_using_packaged?
|
119
|
+
libxslt["source"] = "packaged"
|
120
|
+
libxslt["precompiled"] = libxml2_precompiled?
|
121
|
+
libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
|
122
|
+
else
|
123
|
+
libxslt["source"] = "system"
|
124
|
+
end
|
125
|
+
libxslt["compiled"] = compiled_libxslt_version.to_s
|
126
|
+
libxslt["loaded"] = loaded_libxslt_version.to_s
|
127
|
+
end
|
128
|
+
|
129
|
+
vi["warnings"] = warnings
|
130
|
+
end
|
131
|
+
|
132
|
+
if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
|
133
|
+
# see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
|
134
|
+
vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
|
135
|
+
elsif jruby?
|
136
|
+
vi["other_libraries"] = {}.tap do |ol|
|
137
|
+
ol["xerces"] = Nokogiri::XERCES_VERSION
|
138
|
+
ol["nekohtml"] = Nokogiri::NEKO_VERSION
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def to_markdown
|
145
|
+
begin
|
146
|
+
require "psych"
|
147
|
+
rescue LoadError
|
148
|
+
end
|
149
|
+
require "yaml"
|
150
|
+
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
151
|
+
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
152
|
+
end
|
153
|
+
|
154
|
+
instance.warnings.each do |warning|
|
155
|
+
warn "WARNING: #{warning}"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.uses_libxml?(requirement = nil) # :nodoc:
|
160
|
+
return false unless VersionInfo.instance.libxml2?
|
161
|
+
return true unless requirement
|
162
|
+
Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
|
163
|
+
end
|
164
|
+
|
165
|
+
def self.jruby? # :nodoc:
|
166
|
+
VersionInfo.instance.jruby?
|
167
|
+
end
|
168
|
+
|
169
|
+
# Ensure constants used in this file are loaded - see #1896
|
170
|
+
if Nokogiri.jruby?
|
171
|
+
require "nokogiri/jruby/dependencies"
|
172
|
+
end
|
173
|
+
begin
|
174
|
+
::RUBY_VERSION =~ /(\d+\.\d+)/
|
175
|
+
require "nokogiri/#{Regexp.last_match(1)}/nokogiri"
|
176
|
+
rescue LoadError
|
177
|
+
require "nokogiri/nokogiri"
|
178
|
+
end
|
179
|
+
|
180
|
+
# More complete version information about libxml
|
181
|
+
VERSION_INFO = VersionInfo.instance.to_hash
|
182
|
+
end
|
data/lib/nokogiri/xml/builder.rb
CHANGED
@@ -245,8 +245,8 @@ module Nokogiri
|
|
245
245
|
#
|
246
246
|
# For example:
|
247
247
|
#
|
248
|
-
# doc = Nokogiri::XML(
|
249
|
-
# Nokogiri::XML::Builder.with(doc.
|
248
|
+
# doc = Nokogiri::XML(File.read('somedoc.xml'))
|
249
|
+
# Nokogiri::XML::Builder.with(doc.at_css('some_tag')) do |xml|
|
250
250
|
# # ... Use normal builder methods here ...
|
251
251
|
# xml.awesome # add the "awesome" tag below "some_tag"
|
252
252
|
# end
|
@@ -1,4 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
|
2
5
|
module Nokogiri
|
3
6
|
module XML
|
4
7
|
##
|
@@ -44,9 +47,11 @@ module Nokogiri
|
|
44
47
|
#
|
45
48
|
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
|
46
49
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
47
|
-
|
50
|
+
|
48
51
|
yield options if block_given?
|
49
52
|
|
53
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
54
|
+
|
50
55
|
if empty_doc?(string_or_io)
|
51
56
|
if options.strict?
|
52
57
|
raise Nokogiri::XML::SyntaxError.new("Empty document")
|
@@ -56,12 +61,17 @@ module Nokogiri
|
|
56
61
|
end
|
57
62
|
|
58
63
|
doc = if string_or_io.respond_to?(:read)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
64
|
+
if string_or_io.is_a?(Pathname)
|
65
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
66
|
+
string_or_io = string_or_io.expand_path.open
|
67
|
+
url ||= string_or_io.path
|
68
|
+
end
|
69
|
+
|
70
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
71
|
+
else
|
72
|
+
# read_memory pukes on empty docs
|
73
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
74
|
+
end
|
65
75
|
|
66
76
|
# do xinclude processing
|
67
77
|
doc.do_xinclude(options) if options.xinclude?
|
@@ -141,6 +141,10 @@ module Nokogiri
|
|
141
141
|
document.errors = things
|
142
142
|
end
|
143
143
|
|
144
|
+
def fragment(data)
|
145
|
+
document.fragment(data)
|
146
|
+
end
|
147
|
+
|
144
148
|
private
|
145
149
|
|
146
150
|
# fix for issue 770
|
@@ -150,12 +154,6 @@ module Nokogiri
|
|
150
154
|
%Q{xmlns#{prefix}="#{namespace.href}"}
|
151
155
|
end.join ' '
|
152
156
|
end
|
153
|
-
|
154
|
-
def coerce data
|
155
|
-
return super unless String === data
|
156
|
-
|
157
|
-
document.fragment(data).children
|
158
|
-
end
|
159
157
|
end
|
160
158
|
end
|
161
159
|
end
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
# frozen_string_literal: true
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require "stringio"
|
4
|
+
require "nokogiri/xml/node/save_options"
|
5
5
|
|
6
6
|
module Nokogiri
|
7
7
|
module XML
|
@@ -57,49 +57,49 @@ module Nokogiri
|
|
57
57
|
include Enumerable
|
58
58
|
|
59
59
|
# Element node type, see Nokogiri::XML::Node#element?
|
60
|
-
ELEMENT_NODE =
|
60
|
+
ELEMENT_NODE = 1
|
61
61
|
# Attribute node type
|
62
|
-
ATTRIBUTE_NODE =
|
62
|
+
ATTRIBUTE_NODE = 2
|
63
63
|
# Text node type, see Nokogiri::XML::Node#text?
|
64
|
-
TEXT_NODE =
|
64
|
+
TEXT_NODE = 3
|
65
65
|
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
66
66
|
CDATA_SECTION_NODE = 4
|
67
67
|
# Entity reference node type
|
68
|
-
ENTITY_REF_NODE =
|
68
|
+
ENTITY_REF_NODE = 5
|
69
69
|
# Entity node type
|
70
|
-
ENTITY_NODE =
|
70
|
+
ENTITY_NODE = 6
|
71
71
|
# PI node type
|
72
|
-
PI_NODE =
|
72
|
+
PI_NODE = 7
|
73
73
|
# Comment node type, see Nokogiri::XML::Node#comment?
|
74
|
-
COMMENT_NODE =
|
74
|
+
COMMENT_NODE = 8
|
75
75
|
# Document node type, see Nokogiri::XML::Node#xml?
|
76
|
-
DOCUMENT_NODE =
|
76
|
+
DOCUMENT_NODE = 9
|
77
77
|
# Document type node type
|
78
78
|
DOCUMENT_TYPE_NODE = 10
|
79
79
|
# Document fragment node type
|
80
80
|
DOCUMENT_FRAG_NODE = 11
|
81
81
|
# Notation node type
|
82
|
-
NOTATION_NODE =
|
82
|
+
NOTATION_NODE = 12
|
83
83
|
# HTML document node type, see Nokogiri::XML::Node#html?
|
84
84
|
HTML_DOCUMENT_NODE = 13
|
85
85
|
# DTD node type
|
86
|
-
DTD_NODE =
|
86
|
+
DTD_NODE = 14
|
87
87
|
# Element declaration type
|
88
|
-
ELEMENT_DECL =
|
88
|
+
ELEMENT_DECL = 15
|
89
89
|
# Attribute declaration type
|
90
|
-
ATTRIBUTE_DECL =
|
90
|
+
ATTRIBUTE_DECL = 16
|
91
91
|
# Entity declaration type
|
92
|
-
ENTITY_DECL =
|
92
|
+
ENTITY_DECL = 17
|
93
93
|
# Namespace declaration type
|
94
|
-
NAMESPACE_DECL =
|
94
|
+
NAMESPACE_DECL = 18
|
95
95
|
# XInclude start type
|
96
|
-
XINCLUDE_START =
|
96
|
+
XINCLUDE_START = 19
|
97
97
|
# XInclude end type
|
98
|
-
XINCLUDE_END =
|
98
|
+
XINCLUDE_END = 20
|
99
99
|
# DOCB document node type
|
100
100
|
DOCB_DOCUMENT_NODE = 21
|
101
101
|
|
102
|
-
def initialize
|
102
|
+
def initialize(name, document) # :nodoc:
|
103
103
|
# ... Ya. This is empty on purpose.
|
104
104
|
end
|
105
105
|
|
@@ -109,24 +109,18 @@ module Nokogiri
|
|
109
109
|
document.decorate(self)
|
110
110
|
end
|
111
111
|
|
112
|
+
# @!group Searching via XPath or CSS Queries
|
113
|
+
|
112
114
|
###
|
113
115
|
# Search this node's immediate children using CSS selector +selector+
|
114
|
-
def >
|
116
|
+
def >(selector)
|
115
117
|
ns = document.root.namespaces
|
116
118
|
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
117
119
|
end
|
118
120
|
|
119
|
-
|
120
|
-
# Get the attribute value for the attribute +name+
|
121
|
-
def [] name
|
122
|
-
get(name.to_s)
|
123
|
-
end
|
121
|
+
# @!endgroup
|
124
122
|
|
125
|
-
|
126
|
-
# Set the attribute value for the attribute +name+ to +value+
|
127
|
-
def []= name, value
|
128
|
-
set name.to_s, value.to_s
|
129
|
-
end
|
123
|
+
# @!group Manipulating Document Structure
|
130
124
|
|
131
125
|
###
|
132
126
|
# Add +node_or_tags+ as a child of this Node.
|
@@ -135,7 +129,7 @@ module Nokogiri
|
|
135
129
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
136
130
|
#
|
137
131
|
# Also see related method +<<+.
|
138
|
-
def add_child
|
132
|
+
def add_child(node_or_tags)
|
139
133
|
node_or_tags = coerce(node_or_tags)
|
140
134
|
if node_or_tags.is_a?(XML::NodeSet)
|
141
135
|
node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
|
@@ -152,7 +146,7 @@ module Nokogiri
|
|
152
146
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
153
147
|
#
|
154
148
|
# Also see related method +add_child+.
|
155
|
-
def prepend_child
|
149
|
+
def prepend_child(node_or_tags)
|
156
150
|
if first = children.first
|
157
151
|
# Mimic the error add_child would raise.
|
158
152
|
raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
@@ -162,7 +156,6 @@ module Nokogiri
|
|
162
156
|
end
|
163
157
|
end
|
164
158
|
|
165
|
-
|
166
159
|
###
|
167
160
|
# Add html around this node
|
168
161
|
#
|
@@ -181,7 +174,7 @@ module Nokogiri
|
|
181
174
|
# Returns self, to support chaining of calls (e.g., root << child1 << child2)
|
182
175
|
#
|
183
176
|
# Also see related method +add_child+.
|
184
|
-
def <<
|
177
|
+
def <<(node_or_tags)
|
185
178
|
add_child node_or_tags
|
186
179
|
self
|
187
180
|
end
|
@@ -193,7 +186,7 @@ module Nokogiri
|
|
193
186
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
194
187
|
#
|
195
188
|
# Also see related method +before+.
|
196
|
-
def add_previous_sibling
|
189
|
+
def add_previous_sibling(node_or_tags)
|
197
190
|
raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
198
191
|
|
199
192
|
add_sibling :previous, node_or_tags
|
@@ -206,7 +199,7 @@ module Nokogiri
|
|
206
199
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
207
200
|
#
|
208
201
|
# Also see related method +after+.
|
209
|
-
def add_next_sibling
|
202
|
+
def add_next_sibling(node_or_tags)
|
210
203
|
raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
211
204
|
|
212
205
|
add_sibling :next, node_or_tags
|
@@ -219,7 +212,7 @@ module Nokogiri
|
|
219
212
|
# Returns self, to support chaining of calls.
|
220
213
|
#
|
221
214
|
# Also see related method +add_previous_sibling+.
|
222
|
-
def before
|
215
|
+
def before(node_or_tags)
|
223
216
|
add_previous_sibling node_or_tags
|
224
217
|
self
|
225
218
|
end
|
@@ -231,7 +224,7 @@ module Nokogiri
|
|
231
224
|
# Returns self, to support chaining of calls.
|
232
225
|
#
|
233
226
|
# Also see related method +add_next_sibling+.
|
234
|
-
def after
|
227
|
+
def after(node_or_tags)
|
235
228
|
add_next_sibling node_or_tags
|
236
229
|
self
|
237
230
|
end
|
@@ -243,7 +236,7 @@ module Nokogiri
|
|
243
236
|
# Returns self.
|
244
237
|
#
|
245
238
|
# Also see related method +children=+
|
246
|
-
def inner_html=
|
239
|
+
def inner_html=(node_or_tags)
|
247
240
|
self.children = node_or_tags
|
248
241
|
self
|
249
242
|
end
|
@@ -255,7 +248,7 @@ module Nokogiri
|
|
255
248
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
256
249
|
#
|
257
250
|
# Also see related method +inner_html=+
|
258
|
-
def children=
|
251
|
+
def children=(node_or_tags)
|
259
252
|
node_or_tags = coerce(node_or_tags)
|
260
253
|
children.unlink
|
261
254
|
if node_or_tags.is_a?(XML::NodeSet)
|
@@ -273,19 +266,21 @@ module Nokogiri
|
|
273
266
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
274
267
|
#
|
275
268
|
# Also see related method +swap+.
|
276
|
-
def replace
|
269
|
+
def replace(node_or_tags)
|
270
|
+
raise("Cannot replace a node with no parent") unless parent
|
271
|
+
|
277
272
|
# We cannot replace a text node directly, otherwise libxml will return
|
278
273
|
# an internal error at parser.c:13031, I don't know exactly why
|
279
274
|
# libxml is trying to find a parent node that is an element or document
|
280
275
|
# so I can't tell if this is bug in libxml or not. issue #775.
|
281
276
|
if text?
|
282
|
-
replacee = Nokogiri::XML::Node.new
|
277
|
+
replacee = Nokogiri::XML::Node.new "dummy", document
|
283
278
|
add_previous_sibling_node replacee
|
284
279
|
unlink
|
285
280
|
return replacee.replace node_or_tags
|
286
281
|
end
|
287
282
|
|
288
|
-
node_or_tags = coerce(node_or_tags)
|
283
|
+
node_or_tags = parent.coerce(node_or_tags)
|
289
284
|
|
290
285
|
if node_or_tags.is_a?(XML::NodeSet)
|
291
286
|
node_or_tags.each { |n| add_previous_sibling n }
|
@@ -303,33 +298,98 @@ module Nokogiri
|
|
303
298
|
# Returns self, to support chaining of calls.
|
304
299
|
#
|
305
300
|
# Also see related method +replace+.
|
306
|
-
def swap
|
301
|
+
def swap(node_or_tags)
|
307
302
|
replace node_or_tags
|
308
303
|
self
|
309
304
|
end
|
310
305
|
|
311
|
-
|
312
|
-
|
306
|
+
####
|
307
|
+
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
|
308
|
+
def content=(string)
|
309
|
+
self.native_content = encode_special_chars(string.to_s)
|
310
|
+
end
|
313
311
|
|
314
|
-
|
315
|
-
#
|
316
|
-
|
317
|
-
|
312
|
+
###
|
313
|
+
# Set the parent Node for this Node
|
314
|
+
def parent=(parent_node)
|
315
|
+
parent_node.add_child(self)
|
316
|
+
parent_node
|
317
|
+
end
|
318
318
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
319
|
+
###
|
320
|
+
# Adds a default namespace supplied as a string +url+ href, to self.
|
321
|
+
# The consequence is as an xmlns attribute with supplied argument were
|
322
|
+
# present in parsed XML. A default namespace set with this method will
|
323
|
+
# now show up in #attributes, but when this node is serialized to XML an
|
324
|
+
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
325
|
+
def default_namespace=(url)
|
326
|
+
add_namespace_definition(nil, url)
|
327
|
+
end
|
328
|
+
|
329
|
+
###
|
330
|
+
# Set the default namespace on this node (as would be defined with an
|
331
|
+
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
332
|
+
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
333
|
+
# for this node. You probably want #default_namespace= instead, or perhaps
|
334
|
+
# #add_namespace_definition with a nil prefix argument.
|
335
|
+
def namespace=(ns)
|
336
|
+
return set_namespace(ns) unless ns
|
337
|
+
|
338
|
+
unless Nokogiri::XML::Namespace === ns
|
339
|
+
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
340
|
+
end
|
341
|
+
if ns.document != document
|
342
|
+
raise ArgumentError, "namespace must be declared on the same document"
|
343
|
+
end
|
344
|
+
|
345
|
+
set_namespace ns
|
346
|
+
end
|
347
|
+
|
348
|
+
###
|
349
|
+
# Do xinclude substitution on the subtree below node. If given a block, a
|
350
|
+
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
351
|
+
# passed to it, allowing more convenient modification of the parser options.
|
352
|
+
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
|
353
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
354
|
+
|
355
|
+
# give options to user
|
356
|
+
yield options if block_given?
|
357
|
+
|
358
|
+
# call c extension
|
359
|
+
process_xincludes(options.to_i)
|
360
|
+
end
|
361
|
+
|
362
|
+
alias :next :next_sibling
|
363
|
+
alias :previous :previous_sibling
|
364
|
+
alias :next= :add_next_sibling
|
365
|
+
alias :previous= :add_previous_sibling
|
366
|
+
alias :remove :unlink
|
367
|
+
alias :name= :node_name=
|
368
|
+
alias :add_namespace :add_namespace_definition
|
369
|
+
|
370
|
+
# @!endgroup
|
371
|
+
|
372
|
+
alias :text :content
|
373
|
+
alias :inner_text :content
|
374
|
+
alias :name :node_name
|
375
|
+
alias :type :node_type
|
376
|
+
alias :to_str :text
|
377
|
+
alias :clone :dup
|
378
|
+
alias :elements :element_children
|
379
|
+
|
380
|
+
# @!group Working With Node Attributes
|
381
|
+
|
382
|
+
###
|
383
|
+
# Get the attribute value for the attribute +name+
|
384
|
+
def [](name)
|
385
|
+
get(name.to_s)
|
386
|
+
end
|
387
|
+
|
388
|
+
###
|
389
|
+
# Set the attribute value for the attribute +name+ to +value+
|
390
|
+
def []=(name, value)
|
391
|
+
set name.to_s, value.to_s
|
392
|
+
end
|
333
393
|
|
334
394
|
####
|
335
395
|
# Returns a hash containing the node's attributes. The key is
|
@@ -370,82 +430,366 @@ module Nokogiri
|
|
370
430
|
end
|
371
431
|
|
372
432
|
###
|
373
|
-
#
|
374
|
-
|
433
|
+
# Remove the attribute named +name+
|
434
|
+
def remove_attribute(name)
|
435
|
+
attr = attributes[name].remove if key? name
|
436
|
+
clear_xpath_context if Nokogiri.jruby?
|
437
|
+
attr
|
438
|
+
end
|
439
|
+
|
440
|
+
# Get the CSS class names of a Node.
|
441
|
+
#
|
442
|
+
# This is a convenience function and is equivalent to:
|
443
|
+
# node.kwattr_values("class")
|
444
|
+
#
|
445
|
+
# @see #kwattr_values
|
446
|
+
# @see #add_class
|
447
|
+
# @see #append_class
|
448
|
+
# @see #remove_class
|
449
|
+
#
|
450
|
+
# @return [Array<String>]
|
451
|
+
#
|
452
|
+
# The CSS classes present in the Node's +class+ attribute. If
|
453
|
+
# the attribute is empty or non-existent, the return value is
|
454
|
+
# an empty array.
|
455
|
+
#
|
456
|
+
# @example
|
457
|
+
# node # => <div class="section title header"></div>
|
458
|
+
# node.classes # => ["section", "title", "header"]
|
459
|
+
#
|
375
460
|
def classes
|
376
|
-
|
461
|
+
kwattr_values("class")
|
377
462
|
end
|
378
463
|
|
379
|
-
|
380
|
-
#
|
381
|
-
#
|
382
|
-
#
|
383
|
-
#
|
464
|
+
# Ensure HTML CSS classes are present on a +Node+. Any CSS
|
465
|
+
# classes in +names+ that already exist in the +Node+'s +class+
|
466
|
+
# attribute are _not_ added. Note that any existing duplicates
|
467
|
+
# in the +class+ attribute are not removed. Compare with
|
468
|
+
# {#append_class}.
|
469
|
+
#
|
470
|
+
# This is a convenience function and is equivalent to:
|
471
|
+
# node.kwattr_add("class", names)
|
472
|
+
#
|
473
|
+
# @see #kwattr_add
|
474
|
+
# @see #classes
|
475
|
+
# @see #append_class
|
476
|
+
# @see #remove_class
|
477
|
+
#
|
478
|
+
# @param names [String, Array<String>]
|
479
|
+
#
|
480
|
+
# CSS class names to be added to the Node's +class+
|
481
|
+
# attribute. May be a string containing whitespace-delimited
|
482
|
+
# names, or an Array of String names. Any class names already
|
483
|
+
# present will not be added. Any class names not present will
|
484
|
+
# be added. If no +class+ attribute exists, one is created.
|
485
|
+
#
|
486
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
487
|
+
#
|
488
|
+
# @example Ensure that a +Node+ has CSS class "section"
|
489
|
+
# node # => <div></div>
|
490
|
+
# node.add_class("section") # => <div class="section"></div>
|
491
|
+
# node.add_class("section") # => <div class="section"></div> # duplicate not added
|
492
|
+
#
|
493
|
+
# @example Ensure that a +Node+ has CSS classes "section" and "header", via a String argument.
|
494
|
+
# node # => <div class="section section"></div>
|
495
|
+
# node.add_class("section header") # => <div class="section section header"></div>
|
496
|
+
# # Note that the CSS class "section" is not added because it is already present.
|
497
|
+
# # Note also that the pre-existing duplicate CSS class "section" is not removed.
|
498
|
+
#
|
499
|
+
# @example Ensure that a +Node+ has CSS classes "section" and "header", via an Array argument.
|
500
|
+
# node # => <div></div>
|
501
|
+
# node.add_class(["section", "header"]) # => <div class="section header"></div>
|
502
|
+
#
|
503
|
+
def add_class(names)
|
504
|
+
kwattr_add("class", names)
|
505
|
+
end
|
506
|
+
|
507
|
+
# Add HTML CSS classes to a +Node+, regardless of
|
508
|
+
# duplication. Compare with {#add_class}.
|
509
|
+
#
|
510
|
+
# This is a convenience function and is equivalent to:
|
511
|
+
# node.kwattr_append("class", names)
|
512
|
+
#
|
513
|
+
# @see #kwattr_append
|
514
|
+
# @see #classes
|
515
|
+
# @see #add_class
|
516
|
+
# @see #remove_class
|
517
|
+
#
|
518
|
+
# @param names [String, Array<String>]
|
519
|
+
#
|
520
|
+
# CSS class names to be appended to the Node's +class+
|
521
|
+
# attribute. May be a string containing whitespace-delimited
|
522
|
+
# names, or an Array of String names. All class names passed
|
523
|
+
# in will be appended to the +class+ attribute even if they
|
524
|
+
# are already present in the attribute value. If no +class+
|
525
|
+
# attribute exists, one is created.
|
526
|
+
#
|
527
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
528
|
+
#
|
529
|
+
# @example Append "section" to a +Node+'s CSS +class+ attriubute
|
530
|
+
# node # => <div></div>
|
531
|
+
# node.append_class("section") # => <div class="section"></div>
|
532
|
+
# node.append_class("section") # => <div class="section section"></div> # duplicate added!
|
533
|
+
#
|
534
|
+
# @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via a String argument.
|
535
|
+
# node # => <div class="section section"></div>
|
536
|
+
# node.append_class("section header") # => <div class="section section section header"></div>
|
537
|
+
# # Note that the CSS class "section" is appended even though it is already present.
|
538
|
+
#
|
539
|
+
# @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via an Array argument.
|
540
|
+
# node # => <div></div>
|
541
|
+
# node.append_class(["section", "header"]) # => <div class="section header"></div>
|
542
|
+
# node.append_class(["section", "header"]) # => <div class="section header section header"></div>
|
543
|
+
#
|
544
|
+
def append_class(names)
|
545
|
+
kwattr_append("class", names)
|
546
|
+
end
|
547
|
+
|
548
|
+
# Remove HTML CSS classes from a +Node+. Any CSS classes in +names+ that
|
549
|
+
# exist in the +Node+'s +class+ attribute are removed, including any
|
550
|
+
# multiple entries.
|
551
|
+
#
|
552
|
+
# If no CSS classes remain after this operation, or if +names+ is
|
553
|
+
# +nil+, the +class+ attribute is deleted from the node.
|
554
|
+
#
|
555
|
+
# This is a convenience function and is equivalent to:
|
556
|
+
# node.kwattr_remove("class", names)
|
557
|
+
#
|
558
|
+
# @see #kwattr_remove
|
559
|
+
# @see #classes
|
560
|
+
# @see #add_class
|
561
|
+
# @see #append_class
|
562
|
+
#
|
563
|
+
# @param names [String, Array<String>]
|
564
|
+
#
|
565
|
+
# CSS class names to be removed from the Node's +class+ attribute. May
|
566
|
+
# be a string containing whitespace-delimited names, or an Array of
|
567
|
+
# String names. Any class names already present will be removed. If no
|
568
|
+
# CSS classes remain, the +class+ attribute is deleted.
|
569
|
+
#
|
570
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
571
|
+
#
|
572
|
+
# @example
|
573
|
+
# node # => <div class="section header"></div>
|
574
|
+
# node.remove_class("section") # => <div class="header"></div>
|
575
|
+
# node.remove_class("header") # => <div></div> # attribute is deleted when empty
|
576
|
+
#
|
577
|
+
def remove_class(names = nil)
|
578
|
+
kwattr_remove("class", names)
|
579
|
+
end
|
580
|
+
|
581
|
+
# Retrieve values from a keyword attribute of a Node.
|
582
|
+
#
|
583
|
+
# A "keyword attribute" is a node attribute that contains a set
|
584
|
+
# of space-delimited values. Perhaps the most familiar example
|
585
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
586
|
+
# classes. But other keyword attributes exist, for instance
|
587
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
588
|
+
#
|
589
|
+
# @see #classes
|
590
|
+
# @see #kwattr_add
|
591
|
+
# @see #kwattr_append
|
592
|
+
# @see #kwattr_remove
|
593
|
+
#
|
594
|
+
# @param attribute_name [String] The name of the keyword attribute to be inspected.
|
595
|
+
#
|
596
|
+
# @return [Array<String>]
|
597
|
+
#
|
598
|
+
# The values present in the Node's +attribute_name+
|
599
|
+
# attribute. If the attribute is empty or non-existent, the
|
600
|
+
# return value is an empty array.
|
601
|
+
#
|
602
|
+
# @example
|
603
|
+
# node # => <a rel="nofollow noopener external">link</a>
|
604
|
+
# node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
|
605
|
+
#
|
606
|
+
# @since v1.11.0
|
384
607
|
#
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
608
|
+
def kwattr_values(attribute_name)
|
609
|
+
keywordify(get_attribute(attribute_name) || [])
|
610
|
+
end
|
611
|
+
|
612
|
+
# Ensure that values are present in a keyword attribute.
|
613
|
+
#
|
614
|
+
# Any values in +keywords+ that already exist in the +Node+'s
|
615
|
+
# attribute values are _not_ added. Note that any existing
|
616
|
+
# duplicates in the attribute values are not removed. Compare
|
617
|
+
# with {#kwattr_append}.
|
618
|
+
#
|
619
|
+
# A "keyword attribute" is a node attribute that contains a set
|
620
|
+
# of space-delimited values. Perhaps the most familiar example
|
621
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
622
|
+
# classes. But other keyword attributes exist, for instance
|
623
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
624
|
+
#
|
625
|
+
# @see #add_class
|
626
|
+
# @see #kwattr_values
|
627
|
+
# @see #kwattr_append
|
628
|
+
# @see #kwattr_remove
|
629
|
+
#
|
630
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
631
|
+
#
|
632
|
+
# @param keywords [String, Array<String>]
|
633
|
+
#
|
634
|
+
# Keywords to be added to the attribute named
|
635
|
+
# +attribute_name+. May be a string containing
|
636
|
+
# whitespace-delimited values, or an Array of String
|
637
|
+
# values. Any values already present will not be added. Any
|
638
|
+
# values not present will be added. If the named attribute
|
639
|
+
# does not exist, it is created.
|
640
|
+
#
|
641
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
642
|
+
#
|
643
|
+
# @example Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
|
644
|
+
# node # => <a></a>
|
645
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
|
646
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a> # duplicate not added
|
647
|
+
#
|
648
|
+
# @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a String argument.
|
649
|
+
# node # => <a rel="nofollow nofollow"></a>
|
650
|
+
# node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
651
|
+
# # Note that "nofollow" is not added because it is already present.
|
652
|
+
# # Note also that the pre-existing duplicate "nofollow" is not removed.
|
653
|
+
#
|
654
|
+
# @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via an Array argument.
|
655
|
+
# node # => <a></a>
|
656
|
+
# node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
657
|
+
#
|
658
|
+
# @since v1.11.0
|
659
|
+
#
|
660
|
+
def kwattr_add(attribute_name, keywords)
|
661
|
+
keywords = keywordify(keywords)
|
662
|
+
current_kws = kwattr_values(attribute_name)
|
663
|
+
new_kws = (current_kws + (keywords - current_kws)).join(" ")
|
664
|
+
set_attribute(attribute_name, new_kws)
|
390
665
|
self
|
391
666
|
end
|
392
667
|
|
393
|
-
|
394
|
-
#
|
395
|
-
#
|
396
|
-
#
|
397
|
-
#
|
668
|
+
# Add keywords to a Node's keyword attribute, regardless of
|
669
|
+
# duplication. Compare with {#kwattr_add}.
|
670
|
+
#
|
671
|
+
# A "keyword attribute" is a node attribute that contains a set
|
672
|
+
# of space-delimited values. Perhaps the most familiar example
|
673
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
674
|
+
# classes. But other keyword attributes exist, for instance
|
675
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
676
|
+
#
|
677
|
+
# @see #append_class
|
678
|
+
# @see #kwattr_values
|
679
|
+
# @see #kwattr_add
|
680
|
+
# @see #kwattr_remove
|
681
|
+
#
|
682
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
398
683
|
#
|
399
|
-
#
|
400
|
-
#
|
401
|
-
|
402
|
-
|
684
|
+
# @param keywords [String, Array<String>]
|
685
|
+
#
|
686
|
+
# Keywords to be added to the attribute named
|
687
|
+
# +attribute_name+. May be a string containing
|
688
|
+
# whitespace-delimited values, or an Array of String
|
689
|
+
# values. All values passed in will be appended to the named
|
690
|
+
# attribute even if they are already present in the
|
691
|
+
# attribute. If the named attribute does not exist, it is
|
692
|
+
# created.
|
693
|
+
#
|
694
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
695
|
+
#
|
696
|
+
# @example Append "nofollow" to the +rel+ attribute.
|
697
|
+
# node # => <a></a>
|
698
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
|
699
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a> # duplicate added!
|
700
|
+
#
|
701
|
+
# @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
|
702
|
+
# node # => <a rel="nofollow"></a>
|
703
|
+
# node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
704
|
+
# # Note that "nofollow" is appended even though it is already present.
|
705
|
+
#
|
706
|
+
# @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
|
707
|
+
# node # => <a></a>
|
708
|
+
# node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
709
|
+
#
|
710
|
+
# @since v1.11.0
|
711
|
+
#
|
712
|
+
def kwattr_append(attribute_name, keywords)
|
713
|
+
keywords = keywordify(keywords)
|
714
|
+
current_kws = kwattr_values(attribute_name)
|
715
|
+
new_kws = (current_kws + keywords).join(" ")
|
716
|
+
set_attribute(attribute_name, new_kws)
|
403
717
|
self
|
404
718
|
end
|
405
719
|
|
406
|
-
|
407
|
-
#
|
408
|
-
#
|
409
|
-
# they are all removed.
|
720
|
+
# Remove keywords from a keyword attribute. Any matching
|
721
|
+
# keywords that exist in the named attribute are removed,
|
722
|
+
# including any multiple entries.
|
410
723
|
#
|
411
|
-
#
|
412
|
-
#
|
724
|
+
# If no keywords remain after this operation, or if +keywords+
|
725
|
+
# is +nil+, the attribute is deleted from the node.
|
413
726
|
#
|
414
|
-
#
|
415
|
-
#
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
727
|
+
# A "keyword attribute" is a node attribute that contains a set
|
728
|
+
# of space-delimited values. Perhaps the most familiar example
|
729
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
730
|
+
# classes. But other keyword attributes exist, for instance
|
731
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
732
|
+
#
|
733
|
+
# @see #remove_class
|
734
|
+
# @see #kwattr_values
|
735
|
+
# @see #kwattr_add
|
736
|
+
# @see #kwattr_append
|
737
|
+
#
|
738
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
739
|
+
#
|
740
|
+
# @param keywords [String, Array<String>]
|
741
|
+
#
|
742
|
+
# Keywords to be removed from the attribute named
|
743
|
+
# +attribute_name+. May be a string containing
|
744
|
+
# whitespace-delimited values, or an Array of String
|
745
|
+
# values. Any keywords present in the named attribute will be
|
746
|
+
# removed. If no keywords remain, or if +keywords+ is nil, the
|
747
|
+
# attribute is deleted.
|
748
|
+
#
|
749
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
750
|
+
#
|
751
|
+
# @example
|
752
|
+
# node # => <a rel="nofollow noreferrer">link</a>
|
753
|
+
# node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
|
754
|
+
# node.kwattr_remove("rel", "noreferrer") # => <a>link</a> # attribute is deleted when empty
|
755
|
+
#
|
756
|
+
# @since v1.11.0
|
757
|
+
#
|
758
|
+
def kwattr_remove(attribute_name, keywords)
|
759
|
+
if keywords.nil?
|
760
|
+
remove_attribute(attribute_name)
|
761
|
+
return self
|
762
|
+
end
|
763
|
+
|
764
|
+
keywords = keywordify(keywords)
|
765
|
+
current_kws = kwattr_values(attribute_name)
|
766
|
+
new_kws = current_kws - keywords
|
767
|
+
if new_kws.empty?
|
768
|
+
remove_attribute(attribute_name)
|
424
769
|
else
|
425
|
-
|
770
|
+
set_attribute(attribute_name, new_kws.join(" "))
|
426
771
|
end
|
427
772
|
self
|
428
773
|
end
|
429
774
|
|
430
|
-
###
|
431
|
-
# Remove the attribute named +name+
|
432
|
-
def remove_attribute name
|
433
|
-
attr = attributes[name].remove if key? name
|
434
|
-
clear_xpath_context if Nokogiri.jruby?
|
435
|
-
attr
|
436
|
-
end
|
437
775
|
alias :delete :remove_attribute
|
776
|
+
alias :get_attribute :[]
|
777
|
+
alias :attr :[]
|
778
|
+
alias :set_attribute :[]=
|
779
|
+
alias :has_attribute? :key?
|
780
|
+
|
781
|
+
# @!endgroup
|
438
782
|
|
439
783
|
###
|
440
784
|
# Returns true if this Node matches +selector+
|
441
|
-
def matches?
|
785
|
+
def matches?(selector)
|
442
786
|
ancestors.last.search(selector).include?(self)
|
443
787
|
end
|
444
788
|
|
445
789
|
###
|
446
790
|
# Create a DocumentFragment containing +tags+ that is relative to _this_
|
447
791
|
# context node.
|
448
|
-
def fragment
|
792
|
+
def fragment(tags)
|
449
793
|
type = document.html? ? Nokogiri::HTML : Nokogiri::XML
|
450
794
|
type::DocumentFragment.new(document, tags, self)
|
451
795
|
end
|
@@ -454,7 +798,7 @@ module Nokogiri
|
|
454
798
|
# Parse +string_or_io+ as a document fragment within the context of
|
455
799
|
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
|
456
800
|
# +string_or_io+.
|
457
|
-
def parse
|
801
|
+
def parse(string_or_io, options = nil)
|
458
802
|
##
|
459
803
|
# When the current node is unparented and not an element node, use the
|
460
804
|
# document as the parsing context instead. Otherwise, the in-context
|
@@ -477,30 +821,34 @@ module Nokogiri
|
|
477
821
|
|
478
822
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
479
823
|
|
480
|
-
|
481
|
-
#
|
824
|
+
# libxml2 does not obey the `recover` option after encountering errors during `in_context`
|
825
|
+
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
826
|
+
#
|
827
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
828
|
+
# would have been inherited from the context node won't be handled correctly. This hack was
|
829
|
+
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
830
|
+
# that's not easily prevented (or even detected).
|
831
|
+
#
|
832
|
+
# I think preferable behavior would be to either:
|
833
|
+
#
|
834
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
|
835
|
+
# b. don't recover, but raise a sensible exception
|
836
|
+
#
|
837
|
+
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
838
|
+
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
482
839
|
error_count = document.errors.length
|
483
840
|
node_set = in_context(contents, options.to_i)
|
484
|
-
if node_set.empty?
|
485
|
-
|
486
|
-
|
841
|
+
if (node_set.empty? && (document.errors.length > error_count))
|
842
|
+
if options.recover?
|
843
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse contents
|
844
|
+
node_set = fragment.children
|
845
|
+
else
|
846
|
+
raise document.errors[error_count]
|
847
|
+
end
|
487
848
|
end
|
488
849
|
node_set
|
489
850
|
end
|
490
851
|
|
491
|
-
####
|
492
|
-
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
|
493
|
-
def content= string
|
494
|
-
self.native_content = encode_special_chars(string.to_s)
|
495
|
-
end
|
496
|
-
|
497
|
-
###
|
498
|
-
# Set the parent Node for this Node
|
499
|
-
def parent= parent_node
|
500
|
-
parent_node.add_child(self)
|
501
|
-
parent_node
|
502
|
-
end
|
503
|
-
|
504
852
|
###
|
505
853
|
# Returns a Hash of +{prefix => value}+ for all namespaces on this
|
506
854
|
# node and its ancestors.
|
@@ -582,6 +930,7 @@ module Nokogiri
|
|
582
930
|
def element?
|
583
931
|
type == ELEMENT_NODE
|
584
932
|
end
|
933
|
+
|
585
934
|
alias :elem? :element?
|
586
935
|
|
587
936
|
###
|
@@ -592,7 +941,7 @@ module Nokogiri
|
|
592
941
|
end
|
593
942
|
|
594
943
|
# Get the inner_html for this node's Node#children
|
595
|
-
def inner_html
|
944
|
+
def inner_html(*args)
|
596
945
|
children.map { |x| x.to_html(*args) }.join
|
597
946
|
end
|
598
947
|
|
@@ -600,13 +949,13 @@ module Nokogiri
|
|
600
949
|
def css_path
|
601
950
|
path.split(/\//).map { |part|
|
602
951
|
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
603
|
-
}.compact.join(
|
952
|
+
}.compact.join(" > ")
|
604
953
|
end
|
605
954
|
|
606
955
|
###
|
607
956
|
# Get a list of ancestor Node for this Node. If +selector+ is given,
|
608
957
|
# the ancestors must match +selector+
|
609
|
-
def ancestors
|
958
|
+
def ancestors(selector = nil)
|
610
959
|
return NodeSet.new(document) unless respond_to?(:parent)
|
611
960
|
return NodeSet.new(document) unless parent
|
612
961
|
|
@@ -627,57 +976,38 @@ module Nokogiri
|
|
627
976
|
})
|
628
977
|
end
|
629
978
|
|
630
|
-
###
|
631
|
-
# Adds a default namespace supplied as a string +url+ href, to self.
|
632
|
-
# The consequence is as an xmlns attribute with supplied argument were
|
633
|
-
# present in parsed XML. A default namespace set with this method will
|
634
|
-
# now show up in #attributes, but when this node is serialized to XML an
|
635
|
-
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
636
|
-
def default_namespace= url
|
637
|
-
add_namespace_definition(nil, url)
|
638
|
-
end
|
639
|
-
alias :add_namespace :add_namespace_definition
|
640
|
-
|
641
|
-
###
|
642
|
-
# Set the default namespace on this node (as would be defined with an
|
643
|
-
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
644
|
-
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
645
|
-
# for this node. You probably want #default_namespace= instead, or perhaps
|
646
|
-
# #add_namespace_definition with a nil prefix argument.
|
647
|
-
def namespace= ns
|
648
|
-
return set_namespace(ns) unless ns
|
649
|
-
|
650
|
-
unless Nokogiri::XML::Namespace === ns
|
651
|
-
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
652
|
-
end
|
653
|
-
if ns.document != document
|
654
|
-
raise ArgumentError, 'namespace must be declared on the same document'
|
655
|
-
end
|
656
|
-
|
657
|
-
set_namespace ns
|
658
|
-
end
|
659
|
-
|
660
979
|
####
|
661
980
|
# Yields self and all children to +block+ recursively.
|
662
|
-
def traverse
|
663
|
-
children.each{|j| j.traverse(&block) }
|
981
|
+
def traverse(&block)
|
982
|
+
children.each { |j| j.traverse(&block) }
|
664
983
|
block.call(self)
|
665
984
|
end
|
666
985
|
|
667
986
|
###
|
668
987
|
# Accept a visitor. This method calls "visit" on +visitor+ with self.
|
669
|
-
def accept
|
988
|
+
def accept(visitor)
|
670
989
|
visitor.visit(self)
|
671
990
|
end
|
672
991
|
|
673
992
|
###
|
674
993
|
# Test to see if this Node is equal to +other+
|
675
|
-
def ==
|
994
|
+
def ==(other)
|
676
995
|
return false unless other
|
677
996
|
return false unless other.respond_to?(:pointer_id)
|
678
997
|
pointer_id == other.pointer_id
|
679
998
|
end
|
680
999
|
|
1000
|
+
###
|
1001
|
+
# Compare two Node objects with respect to their Document. Nodes from
|
1002
|
+
# different documents cannot be compared.
|
1003
|
+
def <=>(other)
|
1004
|
+
return nil unless other.is_a?(Nokogiri::XML::Node)
|
1005
|
+
return nil unless document == other.document
|
1006
|
+
compare other
|
1007
|
+
end
|
1008
|
+
|
1009
|
+
# @!group Serialization and Generating Output
|
1010
|
+
|
681
1011
|
###
|
682
1012
|
# Serialize Node using +options+. Save options can also be set using a
|
683
1013
|
# block. See SaveOptions.
|
@@ -692,17 +1022,17 @@ module Nokogiri
|
|
692
1022
|
# config.format.as_xml
|
693
1023
|
# end
|
694
1024
|
#
|
695
|
-
def serialize
|
1025
|
+
def serialize(*args, &block)
|
696
1026
|
options = args.first.is_a?(Hash) ? args.shift : {
|
697
|
-
:encoding
|
698
|
-
:save_with
|
1027
|
+
:encoding => args[0],
|
1028
|
+
:save_with => args[1],
|
699
1029
|
}
|
700
1030
|
|
701
1031
|
encoding = options[:encoding] || document.encoding
|
702
1032
|
options[:encoding] = encoding
|
703
1033
|
|
704
1034
|
outstring = String.new
|
705
|
-
outstring.force_encoding(Encoding.find(encoding ||
|
1035
|
+
outstring.force_encoding(Encoding.find(encoding || "utf-8"))
|
706
1036
|
io = StringIO.new(outstring)
|
707
1037
|
write_to io, options, &block
|
708
1038
|
io.string
|
@@ -715,7 +1045,7 @@ module Nokogiri
|
|
715
1045
|
#
|
716
1046
|
# See Node#write_to for a list of +options+. For formatted output,
|
717
1047
|
# use Node#to_xhtml instead.
|
718
|
-
def to_html
|
1048
|
+
def to_html(options = {})
|
719
1049
|
to_format SaveOptions::DEFAULT_HTML, options
|
720
1050
|
end
|
721
1051
|
|
@@ -725,7 +1055,7 @@ module Nokogiri
|
|
725
1055
|
# doc.to_xml(:indent => 5, :encoding => 'UTF-8')
|
726
1056
|
#
|
727
1057
|
# See Node#write_to for a list of +options+
|
728
|
-
def to_xml
|
1058
|
+
def to_xml(options = {})
|
729
1059
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
730
1060
|
serialize(options)
|
731
1061
|
end
|
@@ -736,7 +1066,7 @@ module Nokogiri
|
|
736
1066
|
# doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
|
737
1067
|
#
|
738
1068
|
# See Node#write_to for a list of +options+
|
739
|
-
def to_xhtml
|
1069
|
+
def to_xhtml(options = {})
|
740
1070
|
to_format SaveOptions::DEFAULT_XHTML, options
|
741
1071
|
end
|
742
1072
|
|
@@ -757,22 +1087,22 @@ module Nokogiri
|
|
757
1087
|
#
|
758
1088
|
# node.write_to(io, :indent_text => '-', :indent => 2)
|
759
1089
|
#
|
760
|
-
def write_to
|
761
|
-
options
|
762
|
-
encoding
|
1090
|
+
def write_to(io, *options)
|
1091
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
1092
|
+
encoding = options[:encoding] || options[0]
|
763
1093
|
if Nokogiri.jruby?
|
764
|
-
save_options
|
765
|
-
indent_times
|
1094
|
+
save_options = options[:save_with] || options[1]
|
1095
|
+
indent_times = options[:indent] || 0
|
766
1096
|
else
|
767
|
-
save_options
|
768
|
-
indent_times
|
1097
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
1098
|
+
indent_times = options[:indent] || 2
|
769
1099
|
end
|
770
|
-
indent_text
|
1100
|
+
indent_text = options[:indent_text] || " "
|
771
1101
|
|
772
1102
|
# Any string times 0 returns an empty string. Therefore, use the same
|
773
1103
|
# string instead of generating a new empty string for every node with
|
774
1104
|
# zero indentation.
|
775
|
-
indentation = indent_times.zero? ?
|
1105
|
+
indentation = indent_times.zero? ? "" : (indent_text * indent_times)
|
776
1106
|
|
777
1107
|
config = SaveOptions.new(save_options.to_i)
|
778
1108
|
yield config if block_given?
|
@@ -784,7 +1114,7 @@ module Nokogiri
|
|
784
1114
|
# Write Node as HTML to +io+ with +options+
|
785
1115
|
#
|
786
1116
|
# See Node#write_to for a list of +options+
|
787
|
-
def write_html_to
|
1117
|
+
def write_html_to(io, options = {})
|
788
1118
|
write_format_to SaveOptions::DEFAULT_HTML, io, options
|
789
1119
|
end
|
790
1120
|
|
@@ -792,7 +1122,7 @@ module Nokogiri
|
|
792
1122
|
# Write Node as XHTML to +io+ with +options+
|
793
1123
|
#
|
794
1124
|
# See Node#write_to for a list of +options+
|
795
|
-
def write_xhtml_to
|
1125
|
+
def write_xhtml_to(io, options = {})
|
796
1126
|
write_format_to SaveOptions::DEFAULT_XHTML, io, options
|
797
1127
|
end
|
798
1128
|
|
@@ -802,52 +1132,66 @@ module Nokogiri
|
|
802
1132
|
# doc.write_xml_to io, :encoding => 'UTF-8'
|
803
1133
|
#
|
804
1134
|
# See Node#write_to for a list of options
|
805
|
-
def write_xml_to
|
1135
|
+
def write_xml_to(io, options = {})
|
806
1136
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
807
1137
|
write_to io, options
|
808
1138
|
end
|
809
1139
|
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
compare other
|
1140
|
+
def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
|
1141
|
+
c14n_root = self
|
1142
|
+
document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
|
1143
|
+
tn = node.is_a?(XML::Node) ? node : parent
|
1144
|
+
tn == c14n_root || tn.ancestors.include?(c14n_root)
|
1145
|
+
end
|
817
1146
|
end
|
818
1147
|
|
819
|
-
|
820
|
-
# Do xinclude substitution on the subtree below node. If given a block, a
|
821
|
-
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
822
|
-
# passed to it, allowing more convenient modification of the parser options.
|
823
|
-
def do_xinclude options = XML::ParseOptions::DEFAULT_XML
|
824
|
-
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
1148
|
+
# @!endgroup
|
825
1149
|
|
826
|
-
|
827
|
-
yield options if block_given?
|
1150
|
+
protected
|
828
1151
|
|
829
|
-
|
830
|
-
|
1152
|
+
def coerce(data)
|
1153
|
+
case data
|
1154
|
+
when XML::NodeSet
|
1155
|
+
return data
|
1156
|
+
when XML::DocumentFragment
|
1157
|
+
return data.children
|
1158
|
+
when String
|
1159
|
+
return fragment(data).children
|
1160
|
+
when Document, XML::Attr
|
1161
|
+
# unacceptable
|
1162
|
+
when XML::Node
|
1163
|
+
return data
|
1164
|
+
end
|
1165
|
+
|
1166
|
+
raise ArgumentError, <<-EOERR
|
1167
|
+
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1168
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1169
|
+
EOERR
|
831
1170
|
end
|
832
1171
|
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
1172
|
+
private
|
1173
|
+
|
1174
|
+
def keywordify(keywords)
|
1175
|
+
case keywords
|
1176
|
+
when Enumerable
|
1177
|
+
return keywords
|
1178
|
+
when String
|
1179
|
+
return keywords.scan(/\S+/)
|
1180
|
+
else
|
1181
|
+
raise ArgumentError.new("Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}")
|
838
1182
|
end
|
839
1183
|
end
|
840
1184
|
|
841
|
-
|
1185
|
+
def add_sibling(next_or_previous, node_or_tags)
|
1186
|
+
raise("Cannot add sibling to a node with no parent") unless parent
|
842
1187
|
|
843
|
-
def add_sibling next_or_previous, node_or_tags
|
844
1188
|
impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
|
845
|
-
iter = (next_or_previous == :next) ? :reverse_each
|
1189
|
+
iter = (next_or_previous == :next) ? :reverse_each : :each
|
846
1190
|
|
847
|
-
node_or_tags = coerce
|
1191
|
+
node_or_tags = parent.coerce(node_or_tags)
|
848
1192
|
if node_or_tags.is_a?(XML::NodeSet)
|
849
1193
|
if text?
|
850
|
-
pivot = Nokogiri::XML::Node.new
|
1194
|
+
pivot = Nokogiri::XML::Node.new "dummy", document
|
851
1195
|
send impl, pivot
|
852
1196
|
else
|
853
1197
|
pivot = self
|
@@ -863,14 +1207,14 @@ module Nokogiri
|
|
863
1207
|
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
|
864
1208
|
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
865
1209
|
|
866
|
-
def to_format
|
1210
|
+
def to_format(save_option, options)
|
867
1211
|
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
868
1212
|
|
869
1213
|
options[:save_with] = save_option unless options[:save_with]
|
870
1214
|
serialize(options)
|
871
1215
|
end
|
872
1216
|
|
873
|
-
def write_format_to
|
1217
|
+
def write_format_to(save_option, io, options)
|
874
1218
|
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
875
1219
|
|
876
1220
|
options[:save_with] ||= save_option
|
@@ -881,30 +1225,10 @@ module Nokogiri
|
|
881
1225
|
[:name, :namespace, :attribute_nodes, :children]
|
882
1226
|
end
|
883
1227
|
|
884
|
-
def coerce data # :nodoc:
|
885
|
-
case data
|
886
|
-
when XML::NodeSet
|
887
|
-
return data
|
888
|
-
when XML::DocumentFragment
|
889
|
-
return data.children
|
890
|
-
when String
|
891
|
-
return fragment(data).children
|
892
|
-
when Document, XML::Attr
|
893
|
-
# unacceptable
|
894
|
-
when XML::Node
|
895
|
-
return data
|
896
|
-
end
|
897
|
-
|
898
|
-
raise ArgumentError, <<-EOERR
|
899
|
-
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
900
|
-
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
901
|
-
EOERR
|
902
|
-
end
|
903
|
-
|
904
1228
|
# @private
|
905
|
-
IMPLIED_XPATH_CONTEXTS = [
|
1229
|
+
IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
|
906
1230
|
|
907
|
-
def add_child_node_and_reparent_attrs
|
1231
|
+
def add_child_node_and_reparent_attrs(node)
|
908
1232
|
add_child_node node
|
909
1233
|
node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
|
910
1234
|
attr_node.remove
|