nokogiri 1.11.0.rc1 → 1.11.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/README.md +164 -92
  5. data/ext/nokogiri/depend +476 -357
  6. data/ext/nokogiri/extconf.rb +467 -326
  7. data/ext/nokogiri/html_document.c +79 -78
  8. data/ext/nokogiri/html_sax_parser_context.c +4 -2
  9. data/ext/nokogiri/html_sax_push_parser.c +14 -8
  10. data/ext/nokogiri/nokogiri.c +37 -46
  11. data/ext/nokogiri/nokogiri.h +25 -17
  12. data/ext/nokogiri/test_global_handlers.c +41 -0
  13. data/ext/nokogiri/xml_document.c +8 -3
  14. data/ext/nokogiri/xml_io.c +8 -6
  15. data/ext/nokogiri/xml_node.c +1 -1
  16. data/ext/nokogiri/xml_node_set.c +1 -1
  17. data/ext/nokogiri/xml_reader.c +6 -17
  18. data/ext/nokogiri/xml_relax_ng.c +29 -11
  19. data/ext/nokogiri/xml_sax_parser.c +2 -7
  20. data/ext/nokogiri/xml_sax_parser_context.c +4 -2
  21. data/ext/nokogiri/xml_sax_push_parser.c +2 -0
  22. data/ext/nokogiri/xml_schema.c +84 -13
  23. data/ext/nokogiri/xml_syntax_error.c +23 -0
  24. data/ext/nokogiri/xml_syntax_error.h +15 -3
  25. data/ext/nokogiri/xml_xpath_context.c +80 -4
  26. data/ext/nokogiri/xslt_stylesheet.c +1 -4
  27. data/lib/nokogiri.rb +20 -3
  28. data/lib/nokogiri/css/parser.rb +62 -62
  29. data/lib/nokogiri/css/parser.y +2 -2
  30. data/lib/nokogiri/css/parser_extras.rb +38 -36
  31. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  32. data/lib/nokogiri/html/document.rb +12 -26
  33. data/lib/nokogiri/version.rb +2 -148
  34. data/lib/nokogiri/version/constant.rb +5 -0
  35. data/lib/nokogiri/version/info.rb +182 -0
  36. data/lib/nokogiri/xml/builder.rb +2 -2
  37. data/lib/nokogiri/xml/document.rb +17 -7
  38. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  39. data/lib/nokogiri/xml/node.rb +562 -238
  40. data/lib/nokogiri/xml/parse_options.rb +6 -0
  41. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  42. data/lib/nokogiri/xml/schema.rb +12 -4
  43. data/lib/nokogiri/xml/searchable.rb +24 -16
  44. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +32 -0
  45. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
  46. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
  47. data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
  48. data/patches/libxml2/0009-avoid-isnan-isinf.patch +81 -0
  49. metadata +84 -114
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ # The version of Nokogiri you are using
4
+ VERSION = "1.11.1"
5
+ end
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+ require "singleton"
3
+ require "shellwords"
4
+
5
+ module Nokogiri
6
+ class VersionInfo # :nodoc:
7
+ include Singleton
8
+
9
+ def jruby?
10
+ ::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
11
+ end
12
+
13
+ def engine
14
+ defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
15
+ end
16
+
17
+ def loaded_libxml_version
18
+ Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
19
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
20
+ .collect(&:to_i)
21
+ .join("."))
22
+ end
23
+
24
+ def compiled_libxml_version
25
+ Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
26
+ end
27
+
28
+ def loaded_libxslt_version
29
+ Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
30
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
31
+ .collect(&:to_i)
32
+ .join("."))
33
+ end
34
+
35
+ def compiled_libxslt_version
36
+ Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
37
+ end
38
+
39
+ def libxml2?
40
+ defined?(Nokogiri::LIBXML_COMPILED_VERSION)
41
+ end
42
+
43
+ def libxml2_has_iconv?
44
+ defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
45
+ end
46
+
47
+ def libxml2_using_packaged?
48
+ libxml2? && Nokogiri::PACKAGED_LIBRARIES
49
+ end
50
+
51
+ def libxml2_using_system?
52
+ libxml2? && !libxml2_using_packaged?
53
+ end
54
+
55
+ def libxml2_precompiled?
56
+ libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
57
+ end
58
+
59
+ def warnings
60
+ warnings = []
61
+
62
+ if libxml2?
63
+ if compiled_libxml_version != loaded_libxml_version
64
+ warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
65
+ end
66
+
67
+ if compiled_libxslt_version != loaded_libxslt_version
68
+ warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
69
+ end
70
+ end
71
+
72
+ warnings
73
+ end
74
+
75
+ def to_hash
76
+ header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
77
+ {}.tap do |vi|
78
+ vi["warnings"] = []
79
+ vi["nokogiri"] = {}.tap do |nokogiri|
80
+ nokogiri["version"] = Nokogiri::VERSION
81
+
82
+ unless jruby?
83
+ cppflags = ["-I#{header_directory.shellescape}"]
84
+ if libxml2_using_packaged?
85
+ cppflags << "-I#{File.join(header_directory, "include").shellescape}"
86
+ cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
87
+ end
88
+ nokogiri["cppflags"] = cppflags
89
+ end
90
+ end
91
+ vi["ruby"] = {}.tap do |ruby|
92
+ ruby["version"] = ::RUBY_VERSION
93
+ ruby["platform"] = ::RUBY_PLATFORM
94
+ ruby["gem_platform"] = ::Gem::Platform.local.to_s
95
+ ruby["description"] = ::RUBY_DESCRIPTION
96
+ ruby["engine"] = engine
97
+ ruby["jruby"] = jruby? if jruby?
98
+ end
99
+
100
+ if libxml2?
101
+ vi["libxml"] = {}.tap do |libxml|
102
+ if libxml2_using_packaged?
103
+ libxml["source"] = "packaged"
104
+ libxml["precompiled"] = libxml2_precompiled?
105
+ libxml["patches"] = Nokogiri::LIBXML2_PATCHES
106
+
107
+ # this is for nokogumbo and shouldn't be forever
108
+ libxml["libxml2_path"] = header_directory
109
+ else
110
+ libxml["source"] = "system"
111
+ end
112
+ libxml["iconv_enabled"] = libxml2_has_iconv?
113
+ libxml["compiled"] = compiled_libxml_version.to_s
114
+ libxml["loaded"] = loaded_libxml_version.to_s
115
+ end
116
+
117
+ vi["libxslt"] = {}.tap do |libxslt|
118
+ if libxml2_using_packaged?
119
+ libxslt["source"] = "packaged"
120
+ libxslt["precompiled"] = libxml2_precompiled?
121
+ libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
122
+ else
123
+ libxslt["source"] = "system"
124
+ end
125
+ libxslt["compiled"] = compiled_libxslt_version.to_s
126
+ libxslt["loaded"] = loaded_libxslt_version.to_s
127
+ end
128
+
129
+ vi["warnings"] = warnings
130
+ end
131
+
132
+ if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
133
+ # see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
134
+ vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
135
+ elsif jruby?
136
+ vi["other_libraries"] = {}.tap do |ol|
137
+ ol["xerces"] = Nokogiri::XERCES_VERSION
138
+ ol["nekohtml"] = Nokogiri::NEKO_VERSION
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ def to_markdown
145
+ begin
146
+ require "psych"
147
+ rescue LoadError
148
+ end
149
+ require "yaml"
150
+ "# Nokogiri (#{Nokogiri::VERSION})\n" +
151
+ YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
152
+ end
153
+
154
+ instance.warnings.each do |warning|
155
+ warn "WARNING: #{warning}"
156
+ end
157
+ end
158
+
159
+ def self.uses_libxml?(requirement = nil) # :nodoc:
160
+ return false unless VersionInfo.instance.libxml2?
161
+ return true unless requirement
162
+ Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
163
+ end
164
+
165
+ def self.jruby? # :nodoc:
166
+ VersionInfo.instance.jruby?
167
+ end
168
+
169
+ # Ensure constants used in this file are loaded - see #1896
170
+ if Nokogiri.jruby?
171
+ require "nokogiri/jruby/dependencies"
172
+ end
173
+ begin
174
+ ::RUBY_VERSION =~ /(\d+\.\d+)/
175
+ require "nokogiri/#{Regexp.last_match(1)}/nokogiri"
176
+ rescue LoadError
177
+ require "nokogiri/nokogiri"
178
+ end
179
+
180
+ # More complete version information about libxml
181
+ VERSION_INFO = VersionInfo.instance.to_hash
182
+ end
@@ -245,8 +245,8 @@ module Nokogiri
245
245
  #
246
246
  # For example:
247
247
  #
248
- # doc = Nokogiri::XML(open('somedoc.xml'))
249
- # Nokogiri::XML::Builder.with(doc.at('some_tag')) do |xml|
248
+ # doc = Nokogiri::XML(File.read('somedoc.xml'))
249
+ # Nokogiri::XML::Builder.with(doc.at_css('some_tag')) do |xml|
250
250
  # # ... Use normal builder methods here ...
251
251
  # xml.awesome # add the "awesome" tag below "some_tag"
252
252
  # end
@@ -1,4 +1,7 @@
1
1
  # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
2
5
  module Nokogiri
3
6
  module XML
4
7
  ##
@@ -44,9 +47,11 @@ module Nokogiri
44
47
  #
45
48
  def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
46
49
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
47
- # Give the options to the user
50
+
48
51
  yield options if block_given?
49
52
 
53
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
54
+
50
55
  if empty_doc?(string_or_io)
51
56
  if options.strict?
52
57
  raise Nokogiri::XML::SyntaxError.new("Empty document")
@@ -56,12 +61,17 @@ module Nokogiri
56
61
  end
57
62
 
58
63
  doc = if string_or_io.respond_to?(:read)
59
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
60
- read_io(string_or_io, url, encoding, options.to_i)
61
- else
62
- # read_memory pukes on empty docs
63
- read_memory(string_or_io, url, encoding, options.to_i)
64
- end
64
+ if string_or_io.is_a?(Pathname)
65
+ # resolve the Pathname to the file and open it as an IO object, see #2110
66
+ string_or_io = string_or_io.expand_path.open
67
+ url ||= string_or_io.path
68
+ end
69
+
70
+ read_io(string_or_io, url, encoding, options.to_i)
71
+ else
72
+ # read_memory pukes on empty docs
73
+ read_memory(string_or_io, url, encoding, options.to_i)
74
+ end
65
75
 
66
76
  # do xinclude processing
67
77
  doc.do_xinclude(options) if options.xinclude?
@@ -141,6 +141,10 @@ module Nokogiri
141
141
  document.errors = things
142
142
  end
143
143
 
144
+ def fragment(data)
145
+ document.fragment(data)
146
+ end
147
+
144
148
  private
145
149
 
146
150
  # fix for issue 770
@@ -150,12 +154,6 @@ module Nokogiri
150
154
  %Q{xmlns#{prefix}="#{namespace.href}"}
151
155
  end.join ' '
152
156
  end
153
-
154
- def coerce data
155
- return super unless String === data
156
-
157
- document.fragment(data).children
158
- end
159
157
  end
160
158
  end
161
159
  end
@@ -1,7 +1,7 @@
1
1
  # encoding: UTF-8
2
2
  # frozen_string_literal: true
3
- require 'stringio'
4
- require 'nokogiri/xml/node/save_options'
3
+ require "stringio"
4
+ require "nokogiri/xml/node/save_options"
5
5
 
6
6
  module Nokogiri
7
7
  module XML
@@ -57,49 +57,49 @@ module Nokogiri
57
57
  include Enumerable
58
58
 
59
59
  # Element node type, see Nokogiri::XML::Node#element?
60
- ELEMENT_NODE = 1
60
+ ELEMENT_NODE = 1
61
61
  # Attribute node type
62
- ATTRIBUTE_NODE = 2
62
+ ATTRIBUTE_NODE = 2
63
63
  # Text node type, see Nokogiri::XML::Node#text?
64
- TEXT_NODE = 3
64
+ TEXT_NODE = 3
65
65
  # CDATA node type, see Nokogiri::XML::Node#cdata?
66
66
  CDATA_SECTION_NODE = 4
67
67
  # Entity reference node type
68
- ENTITY_REF_NODE = 5
68
+ ENTITY_REF_NODE = 5
69
69
  # Entity node type
70
- ENTITY_NODE = 6
70
+ ENTITY_NODE = 6
71
71
  # PI node type
72
- PI_NODE = 7
72
+ PI_NODE = 7
73
73
  # Comment node type, see Nokogiri::XML::Node#comment?
74
- COMMENT_NODE = 8
74
+ COMMENT_NODE = 8
75
75
  # Document node type, see Nokogiri::XML::Node#xml?
76
- DOCUMENT_NODE = 9
76
+ DOCUMENT_NODE = 9
77
77
  # Document type node type
78
78
  DOCUMENT_TYPE_NODE = 10
79
79
  # Document fragment node type
80
80
  DOCUMENT_FRAG_NODE = 11
81
81
  # Notation node type
82
- NOTATION_NODE = 12
82
+ NOTATION_NODE = 12
83
83
  # HTML document node type, see Nokogiri::XML::Node#html?
84
84
  HTML_DOCUMENT_NODE = 13
85
85
  # DTD node type
86
- DTD_NODE = 14
86
+ DTD_NODE = 14
87
87
  # Element declaration type
88
- ELEMENT_DECL = 15
88
+ ELEMENT_DECL = 15
89
89
  # Attribute declaration type
90
- ATTRIBUTE_DECL = 16
90
+ ATTRIBUTE_DECL = 16
91
91
  # Entity declaration type
92
- ENTITY_DECL = 17
92
+ ENTITY_DECL = 17
93
93
  # Namespace declaration type
94
- NAMESPACE_DECL = 18
94
+ NAMESPACE_DECL = 18
95
95
  # XInclude start type
96
- XINCLUDE_START = 19
96
+ XINCLUDE_START = 19
97
97
  # XInclude end type
98
- XINCLUDE_END = 20
98
+ XINCLUDE_END = 20
99
99
  # DOCB document node type
100
100
  DOCB_DOCUMENT_NODE = 21
101
101
 
102
- def initialize name, document # :nodoc:
102
+ def initialize(name, document) # :nodoc:
103
103
  # ... Ya. This is empty on purpose.
104
104
  end
105
105
 
@@ -109,24 +109,18 @@ module Nokogiri
109
109
  document.decorate(self)
110
110
  end
111
111
 
112
+ # @!group Searching via XPath or CSS Queries
113
+
112
114
  ###
113
115
  # Search this node's immediate children using CSS selector +selector+
114
- def > selector
116
+ def >(selector)
115
117
  ns = document.root.namespaces
116
118
  xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
117
119
  end
118
120
 
119
- ###
120
- # Get the attribute value for the attribute +name+
121
- def [] name
122
- get(name.to_s)
123
- end
121
+ # @!endgroup
124
122
 
125
- ###
126
- # Set the attribute value for the attribute +name+ to +value+
127
- def []= name, value
128
- set name.to_s, value.to_s
129
- end
123
+ # @!group Manipulating Document Structure
130
124
 
131
125
  ###
132
126
  # Add +node_or_tags+ as a child of this Node.
@@ -135,7 +129,7 @@ module Nokogiri
135
129
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
136
130
  #
137
131
  # Also see related method +<<+.
138
- def add_child node_or_tags
132
+ def add_child(node_or_tags)
139
133
  node_or_tags = coerce(node_or_tags)
140
134
  if node_or_tags.is_a?(XML::NodeSet)
141
135
  node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
@@ -152,7 +146,7 @@ module Nokogiri
152
146
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
153
147
  #
154
148
  # Also see related method +add_child+.
155
- def prepend_child node_or_tags
149
+ def prepend_child(node_or_tags)
156
150
  if first = children.first
157
151
  # Mimic the error add_child would raise.
158
152
  raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
@@ -162,7 +156,6 @@ module Nokogiri
162
156
  end
163
157
  end
164
158
 
165
-
166
159
  ###
167
160
  # Add html around this node
168
161
  #
@@ -181,7 +174,7 @@ module Nokogiri
181
174
  # Returns self, to support chaining of calls (e.g., root << child1 << child2)
182
175
  #
183
176
  # Also see related method +add_child+.
184
- def << node_or_tags
177
+ def <<(node_or_tags)
185
178
  add_child node_or_tags
186
179
  self
187
180
  end
@@ -193,7 +186,7 @@ module Nokogiri
193
186
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
194
187
  #
195
188
  # Also see related method +before+.
196
- def add_previous_sibling node_or_tags
189
+ def add_previous_sibling(node_or_tags)
197
190
  raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
198
191
 
199
192
  add_sibling :previous, node_or_tags
@@ -206,7 +199,7 @@ module Nokogiri
206
199
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
207
200
  #
208
201
  # Also see related method +after+.
209
- def add_next_sibling node_or_tags
202
+ def add_next_sibling(node_or_tags)
210
203
  raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
211
204
 
212
205
  add_sibling :next, node_or_tags
@@ -219,7 +212,7 @@ module Nokogiri
219
212
  # Returns self, to support chaining of calls.
220
213
  #
221
214
  # Also see related method +add_previous_sibling+.
222
- def before node_or_tags
215
+ def before(node_or_tags)
223
216
  add_previous_sibling node_or_tags
224
217
  self
225
218
  end
@@ -231,7 +224,7 @@ module Nokogiri
231
224
  # Returns self, to support chaining of calls.
232
225
  #
233
226
  # Also see related method +add_next_sibling+.
234
- def after node_or_tags
227
+ def after(node_or_tags)
235
228
  add_next_sibling node_or_tags
236
229
  self
237
230
  end
@@ -243,7 +236,7 @@ module Nokogiri
243
236
  # Returns self.
244
237
  #
245
238
  # Also see related method +children=+
246
- def inner_html= node_or_tags
239
+ def inner_html=(node_or_tags)
247
240
  self.children = node_or_tags
248
241
  self
249
242
  end
@@ -255,7 +248,7 @@ module Nokogiri
255
248
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
256
249
  #
257
250
  # Also see related method +inner_html=+
258
- def children= node_or_tags
251
+ def children=(node_or_tags)
259
252
  node_or_tags = coerce(node_or_tags)
260
253
  children.unlink
261
254
  if node_or_tags.is_a?(XML::NodeSet)
@@ -273,19 +266,21 @@ module Nokogiri
273
266
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
274
267
  #
275
268
  # Also see related method +swap+.
276
- def replace node_or_tags
269
+ def replace(node_or_tags)
270
+ raise("Cannot replace a node with no parent") unless parent
271
+
277
272
  # We cannot replace a text node directly, otherwise libxml will return
278
273
  # an internal error at parser.c:13031, I don't know exactly why
279
274
  # libxml is trying to find a parent node that is an element or document
280
275
  # so I can't tell if this is bug in libxml or not. issue #775.
281
276
  if text?
282
- replacee = Nokogiri::XML::Node.new 'dummy', document
277
+ replacee = Nokogiri::XML::Node.new "dummy", document
283
278
  add_previous_sibling_node replacee
284
279
  unlink
285
280
  return replacee.replace node_or_tags
286
281
  end
287
282
 
288
- node_or_tags = coerce(node_or_tags)
283
+ node_or_tags = parent.coerce(node_or_tags)
289
284
 
290
285
  if node_or_tags.is_a?(XML::NodeSet)
291
286
  node_or_tags.each { |n| add_previous_sibling n }
@@ -303,33 +298,98 @@ module Nokogiri
303
298
  # Returns self, to support chaining of calls.
304
299
  #
305
300
  # Also see related method +replace+.
306
- def swap node_or_tags
301
+ def swap(node_or_tags)
307
302
  replace node_or_tags
308
303
  self
309
304
  end
310
305
 
311
- alias :next :next_sibling
312
- alias :previous :previous_sibling
306
+ ####
307
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
308
+ def content=(string)
309
+ self.native_content = encode_special_chars(string.to_s)
310
+ end
313
311
 
314
- # :stopdoc:
315
- # HACK: This is to work around an RDoc bug
316
- alias :next= :add_next_sibling
317
- # :startdoc:
312
+ ###
313
+ # Set the parent Node for this Node
314
+ def parent=(parent_node)
315
+ parent_node.add_child(self)
316
+ parent_node
317
+ end
318
318
 
319
- alias :previous= :add_previous_sibling
320
- alias :remove :unlink
321
- alias :get_attribute :[]
322
- alias :attr :[]
323
- alias :set_attribute :[]=
324
- alias :text :content
325
- alias :inner_text :content
326
- alias :has_attribute? :key?
327
- alias :name :node_name
328
- alias :name= :node_name=
329
- alias :type :node_type
330
- alias :to_str :text
331
- alias :clone :dup
332
- alias :elements :element_children
319
+ ###
320
+ # Adds a default namespace supplied as a string +url+ href, to self.
321
+ # The consequence is as an xmlns attribute with supplied argument were
322
+ # present in parsed XML. A default namespace set with this method will
323
+ # now show up in #attributes, but when this node is serialized to XML an
324
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
325
+ def default_namespace=(url)
326
+ add_namespace_definition(nil, url)
327
+ end
328
+
329
+ ###
330
+ # Set the default namespace on this node (as would be defined with an
331
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
332
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
333
+ # for this node. You probably want #default_namespace= instead, or perhaps
334
+ # #add_namespace_definition with a nil prefix argument.
335
+ def namespace=(ns)
336
+ return set_namespace(ns) unless ns
337
+
338
+ unless Nokogiri::XML::Namespace === ns
339
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
340
+ end
341
+ if ns.document != document
342
+ raise ArgumentError, "namespace must be declared on the same document"
343
+ end
344
+
345
+ set_namespace ns
346
+ end
347
+
348
+ ###
349
+ # Do xinclude substitution on the subtree below node. If given a block, a
350
+ # Nokogiri::XML::ParseOptions object initialized from +options+, will be
351
+ # passed to it, allowing more convenient modification of the parser options.
352
+ def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
353
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
354
+
355
+ # give options to user
356
+ yield options if block_given?
357
+
358
+ # call c extension
359
+ process_xincludes(options.to_i)
360
+ end
361
+
362
+ alias :next :next_sibling
363
+ alias :previous :previous_sibling
364
+ alias :next= :add_next_sibling
365
+ alias :previous= :add_previous_sibling
366
+ alias :remove :unlink
367
+ alias :name= :node_name=
368
+ alias :add_namespace :add_namespace_definition
369
+
370
+ # @!endgroup
371
+
372
+ alias :text :content
373
+ alias :inner_text :content
374
+ alias :name :node_name
375
+ alias :type :node_type
376
+ alias :to_str :text
377
+ alias :clone :dup
378
+ alias :elements :element_children
379
+
380
+ # @!group Working With Node Attributes
381
+
382
+ ###
383
+ # Get the attribute value for the attribute +name+
384
+ def [](name)
385
+ get(name.to_s)
386
+ end
387
+
388
+ ###
389
+ # Set the attribute value for the attribute +name+ to +value+
390
+ def []=(name, value)
391
+ set name.to_s, value.to_s
392
+ end
333
393
 
334
394
  ####
335
395
  # Returns a hash containing the node's attributes. The key is
@@ -370,82 +430,366 @@ module Nokogiri
370
430
  end
371
431
 
372
432
  ###
373
- # Get the list of class names of this Node, without
374
- # deduplication or sorting.
433
+ # Remove the attribute named +name+
434
+ def remove_attribute(name)
435
+ attr = attributes[name].remove if key? name
436
+ clear_xpath_context if Nokogiri.jruby?
437
+ attr
438
+ end
439
+
440
+ # Get the CSS class names of a Node.
441
+ #
442
+ # This is a convenience function and is equivalent to:
443
+ # node.kwattr_values("class")
444
+ #
445
+ # @see #kwattr_values
446
+ # @see #add_class
447
+ # @see #append_class
448
+ # @see #remove_class
449
+ #
450
+ # @return [Array<String>]
451
+ #
452
+ # The CSS classes present in the Node's +class+ attribute. If
453
+ # the attribute is empty or non-existent, the return value is
454
+ # an empty array.
455
+ #
456
+ # @example
457
+ # node # => <div class="section title header"></div>
458
+ # node.classes # => ["section", "title", "header"]
459
+ #
375
460
  def classes
376
- self['class'].to_s.scan(/\S+/)
461
+ kwattr_values("class")
377
462
  end
378
463
 
379
- ###
380
- # Add +name+ to the "class" attribute value of this Node and
381
- # return self. If the value is already in the current value, it
382
- # is not added. If no "class" attribute exists yet, one is
383
- # created with the given value.
464
+ # Ensure HTML CSS classes are present on a +Node+. Any CSS
465
+ # classes in +names+ that already exist in the +Node+'s +class+
466
+ # attribute are _not_ added. Note that any existing duplicates
467
+ # in the +class+ attribute are not removed. Compare with
468
+ # {#append_class}.
469
+ #
470
+ # This is a convenience function and is equivalent to:
471
+ # node.kwattr_add("class", names)
472
+ #
473
+ # @see #kwattr_add
474
+ # @see #classes
475
+ # @see #append_class
476
+ # @see #remove_class
477
+ #
478
+ # @param names [String, Array<String>]
479
+ #
480
+ # CSS class names to be added to the Node's +class+
481
+ # attribute. May be a string containing whitespace-delimited
482
+ # names, or an Array of String names. Any class names already
483
+ # present will not be added. Any class names not present will
484
+ # be added. If no +class+ attribute exists, one is created.
485
+ #
486
+ # @return [Node] Returns +self+ for ease of chaining method calls.
487
+ #
488
+ # @example Ensure that a +Node+ has CSS class "section"
489
+ # node # => <div></div>
490
+ # node.add_class("section") # => <div class="section"></div>
491
+ # node.add_class("section") # => <div class="section"></div> # duplicate not added
492
+ #
493
+ # @example Ensure that a +Node+ has CSS classes "section" and "header", via a String argument.
494
+ # node # => <div class="section section"></div>
495
+ # node.add_class("section header") # => <div class="section section header"></div>
496
+ # # Note that the CSS class "section" is not added because it is already present.
497
+ # # Note also that the pre-existing duplicate CSS class "section" is not removed.
498
+ #
499
+ # @example Ensure that a +Node+ has CSS classes "section" and "header", via an Array argument.
500
+ # node # => <div></div>
501
+ # node.add_class(["section", "header"]) # => <div class="section header"></div>
502
+ #
503
+ def add_class(names)
504
+ kwattr_add("class", names)
505
+ end
506
+
507
+ # Add HTML CSS classes to a +Node+, regardless of
508
+ # duplication. Compare with {#add_class}.
509
+ #
510
+ # This is a convenience function and is equivalent to:
511
+ # node.kwattr_append("class", names)
512
+ #
513
+ # @see #kwattr_append
514
+ # @see #classes
515
+ # @see #add_class
516
+ # @see #remove_class
517
+ #
518
+ # @param names [String, Array<String>]
519
+ #
520
+ # CSS class names to be appended to the Node's +class+
521
+ # attribute. May be a string containing whitespace-delimited
522
+ # names, or an Array of String names. All class names passed
523
+ # in will be appended to the +class+ attribute even if they
524
+ # are already present in the attribute value. If no +class+
525
+ # attribute exists, one is created.
526
+ #
527
+ # @return [Node] Returns +self+ for ease of chaining method calls.
528
+ #
529
+ # @example Append "section" to a +Node+'s CSS +class+ attriubute
530
+ # node # => <div></div>
531
+ # node.append_class("section") # => <div class="section"></div>
532
+ # node.append_class("section") # => <div class="section section"></div> # duplicate added!
533
+ #
534
+ # @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via a String argument.
535
+ # node # => <div class="section section"></div>
536
+ # node.append_class("section header") # => <div class="section section section header"></div>
537
+ # # Note that the CSS class "section" is appended even though it is already present.
538
+ #
539
+ # @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via an Array argument.
540
+ # node # => <div></div>
541
+ # node.append_class(["section", "header"]) # => <div class="section header"></div>
542
+ # node.append_class(["section", "header"]) # => <div class="section header section header"></div>
543
+ #
544
+ def append_class(names)
545
+ kwattr_append("class", names)
546
+ end
547
+
548
+ # Remove HTML CSS classes from a +Node+. Any CSS classes in +names+ that
549
+ # exist in the +Node+'s +class+ attribute are removed, including any
550
+ # multiple entries.
551
+ #
552
+ # If no CSS classes remain after this operation, or if +names+ is
553
+ # +nil+, the +class+ attribute is deleted from the node.
554
+ #
555
+ # This is a convenience function and is equivalent to:
556
+ # node.kwattr_remove("class", names)
557
+ #
558
+ # @see #kwattr_remove
559
+ # @see #classes
560
+ # @see #add_class
561
+ # @see #append_class
562
+ #
563
+ # @param names [String, Array<String>]
564
+ #
565
+ # CSS class names to be removed from the Node's +class+ attribute. May
566
+ # be a string containing whitespace-delimited names, or an Array of
567
+ # String names. Any class names already present will be removed. If no
568
+ # CSS classes remain, the +class+ attribute is deleted.
569
+ #
570
+ # @return [Node] Returns +self+ for ease of chaining method calls.
571
+ #
572
+ # @example
573
+ # node # => <div class="section header"></div>
574
+ # node.remove_class("section") # => <div class="header"></div>
575
+ # node.remove_class("header") # => <div></div> # attribute is deleted when empty
576
+ #
577
+ def remove_class(names = nil)
578
+ kwattr_remove("class", names)
579
+ end
580
+
581
+ # Retrieve values from a keyword attribute of a Node.
582
+ #
583
+ # A "keyword attribute" is a node attribute that contains a set
584
+ # of space-delimited values. Perhaps the most familiar example
585
+ # of this is the HTML +class+ attribute used to contain CSS
586
+ # classes. But other keyword attributes exist, for instance
587
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
588
+ #
589
+ # @see #classes
590
+ # @see #kwattr_add
591
+ # @see #kwattr_append
592
+ # @see #kwattr_remove
593
+ #
594
+ # @param attribute_name [String] The name of the keyword attribute to be inspected.
595
+ #
596
+ # @return [Array<String>]
597
+ #
598
+ # The values present in the Node's +attribute_name+
599
+ # attribute. If the attribute is empty or non-existent, the
600
+ # return value is an empty array.
601
+ #
602
+ # @example
603
+ # node # => <a rel="nofollow noopener external">link</a>
604
+ # node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
605
+ #
606
+ # @since v1.11.0
384
607
  #
385
- # More than one class may be added at a time, separated by a
386
- # space.
387
- def add_class name
388
- names = classes
389
- self['class'] = (names + (name.scan(/\S+/) - names)).join(' ')
608
+ def kwattr_values(attribute_name)
609
+ keywordify(get_attribute(attribute_name) || [])
610
+ end
611
+
612
+ # Ensure that values are present in a keyword attribute.
613
+ #
614
+ # Any values in +keywords+ that already exist in the +Node+'s
615
+ # attribute values are _not_ added. Note that any existing
616
+ # duplicates in the attribute values are not removed. Compare
617
+ # with {#kwattr_append}.
618
+ #
619
+ # A "keyword attribute" is a node attribute that contains a set
620
+ # of space-delimited values. Perhaps the most familiar example
621
+ # of this is the HTML +class+ attribute used to contain CSS
622
+ # classes. But other keyword attributes exist, for instance
623
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
624
+ #
625
+ # @see #add_class
626
+ # @see #kwattr_values
627
+ # @see #kwattr_append
628
+ # @see #kwattr_remove
629
+ #
630
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
631
+ #
632
+ # @param keywords [String, Array<String>]
633
+ #
634
+ # Keywords to be added to the attribute named
635
+ # +attribute_name+. May be a string containing
636
+ # whitespace-delimited values, or an Array of String
637
+ # values. Any values already present will not be added. Any
638
+ # values not present will be added. If the named attribute
639
+ # does not exist, it is created.
640
+ #
641
+ # @return [Node] Returns +self+ for ease of chaining method calls.
642
+ #
643
+ # @example Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
644
+ # node # => <a></a>
645
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
646
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a> # duplicate not added
647
+ #
648
+ # @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a String argument.
649
+ # node # => <a rel="nofollow nofollow"></a>
650
+ # node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
651
+ # # Note that "nofollow" is not added because it is already present.
652
+ # # Note also that the pre-existing duplicate "nofollow" is not removed.
653
+ #
654
+ # @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via an Array argument.
655
+ # node # => <a></a>
656
+ # node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
657
+ #
658
+ # @since v1.11.0
659
+ #
660
+ def kwattr_add(attribute_name, keywords)
661
+ keywords = keywordify(keywords)
662
+ current_kws = kwattr_values(attribute_name)
663
+ new_kws = (current_kws + (keywords - current_kws)).join(" ")
664
+ set_attribute(attribute_name, new_kws)
390
665
  self
391
666
  end
392
667
 
393
- ###
394
- # Append +name+ to the "class" attribute value of this Node and
395
- # return self. The value is simply appended without checking if
396
- # it is already in the current value. If no "class" attribute
397
- # exists yet, one is created with the given value.
668
+ # Add keywords to a Node's keyword attribute, regardless of
669
+ # duplication. Compare with {#kwattr_add}.
670
+ #
671
+ # A "keyword attribute" is a node attribute that contains a set
672
+ # of space-delimited values. Perhaps the most familiar example
673
+ # of this is the HTML +class+ attribute used to contain CSS
674
+ # classes. But other keyword attributes exist, for instance
675
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
676
+ #
677
+ # @see #append_class
678
+ # @see #kwattr_values
679
+ # @see #kwattr_add
680
+ # @see #kwattr_remove
681
+ #
682
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
398
683
  #
399
- # More than one class may be appended at a time, separated by a
400
- # space.
401
- def append_class name
402
- self['class'] = (classes + name.scan(/\S+/)).join(' ')
684
+ # @param keywords [String, Array<String>]
685
+ #
686
+ # Keywords to be added to the attribute named
687
+ # +attribute_name+. May be a string containing
688
+ # whitespace-delimited values, or an Array of String
689
+ # values. All values passed in will be appended to the named
690
+ # attribute even if they are already present in the
691
+ # attribute. If the named attribute does not exist, it is
692
+ # created.
693
+ #
694
+ # @return [Node] Returns +self+ for ease of chaining method calls.
695
+ #
696
+ # @example Append "nofollow" to the +rel+ attribute.
697
+ # node # => <a></a>
698
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
699
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a> # duplicate added!
700
+ #
701
+ # @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
702
+ # node # => <a rel="nofollow"></a>
703
+ # node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
704
+ # # Note that "nofollow" is appended even though it is already present.
705
+ #
706
+ # @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
707
+ # node # => <a></a>
708
+ # node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
709
+ #
710
+ # @since v1.11.0
711
+ #
712
+ def kwattr_append(attribute_name, keywords)
713
+ keywords = keywordify(keywords)
714
+ current_kws = kwattr_values(attribute_name)
715
+ new_kws = (current_kws + keywords).join(" ")
716
+ set_attribute(attribute_name, new_kws)
403
717
  self
404
718
  end
405
719
 
406
- ###
407
- # Remove +name+ from the "class" attribute value of this Node
408
- # and return self. If there are many occurrences of the name,
409
- # they are all removed.
720
+ # Remove keywords from a keyword attribute. Any matching
721
+ # keywords that exist in the named attribute are removed,
722
+ # including any multiple entries.
410
723
  #
411
- # More than one class may be removed at a time, separated by a
412
- # space.
724
+ # If no keywords remain after this operation, or if +keywords+
725
+ # is +nil+, the attribute is deleted from the node.
413
726
  #
414
- # If no class name is left after removal, or when +name+ is nil,
415
- # the "class" attribute is removed from this Node.
416
- def remove_class name = nil
417
- if name
418
- names = classes - name.scan(/\S+/)
419
- if names.empty?
420
- delete 'class'
421
- else
422
- self['class'] = names.join(' ')
423
- end
727
+ # A "keyword attribute" is a node attribute that contains a set
728
+ # of space-delimited values. Perhaps the most familiar example
729
+ # of this is the HTML +class+ attribute used to contain CSS
730
+ # classes. But other keyword attributes exist, for instance
731
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
732
+ #
733
+ # @see #remove_class
734
+ # @see #kwattr_values
735
+ # @see #kwattr_add
736
+ # @see #kwattr_append
737
+ #
738
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
739
+ #
740
+ # @param keywords [String, Array<String>]
741
+ #
742
+ # Keywords to be removed from the attribute named
743
+ # +attribute_name+. May be a string containing
744
+ # whitespace-delimited values, or an Array of String
745
+ # values. Any keywords present in the named attribute will be
746
+ # removed. If no keywords remain, or if +keywords+ is nil, the
747
+ # attribute is deleted.
748
+ #
749
+ # @return [Node] Returns +self+ for ease of chaining method calls.
750
+ #
751
+ # @example
752
+ # node # => <a rel="nofollow noreferrer">link</a>
753
+ # node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
754
+ # node.kwattr_remove("rel", "noreferrer") # => <a>link</a> # attribute is deleted when empty
755
+ #
756
+ # @since v1.11.0
757
+ #
758
+ def kwattr_remove(attribute_name, keywords)
759
+ if keywords.nil?
760
+ remove_attribute(attribute_name)
761
+ return self
762
+ end
763
+
764
+ keywords = keywordify(keywords)
765
+ current_kws = kwattr_values(attribute_name)
766
+ new_kws = current_kws - keywords
767
+ if new_kws.empty?
768
+ remove_attribute(attribute_name)
424
769
  else
425
- delete "class"
770
+ set_attribute(attribute_name, new_kws.join(" "))
426
771
  end
427
772
  self
428
773
  end
429
774
 
430
- ###
431
- # Remove the attribute named +name+
432
- def remove_attribute name
433
- attr = attributes[name].remove if key? name
434
- clear_xpath_context if Nokogiri.jruby?
435
- attr
436
- end
437
775
  alias :delete :remove_attribute
776
+ alias :get_attribute :[]
777
+ alias :attr :[]
778
+ alias :set_attribute :[]=
779
+ alias :has_attribute? :key?
780
+
781
+ # @!endgroup
438
782
 
439
783
  ###
440
784
  # Returns true if this Node matches +selector+
441
- def matches? selector
785
+ def matches?(selector)
442
786
  ancestors.last.search(selector).include?(self)
443
787
  end
444
788
 
445
789
  ###
446
790
  # Create a DocumentFragment containing +tags+ that is relative to _this_
447
791
  # context node.
448
- def fragment tags
792
+ def fragment(tags)
449
793
  type = document.html? ? Nokogiri::HTML : Nokogiri::XML
450
794
  type::DocumentFragment.new(document, tags, self)
451
795
  end
@@ -454,7 +798,7 @@ module Nokogiri
454
798
  # Parse +string_or_io+ as a document fragment within the context of
455
799
  # *this* node. Returns a XML::NodeSet containing the nodes parsed from
456
800
  # +string_or_io+.
457
- def parse string_or_io, options = nil
801
+ def parse(string_or_io, options = nil)
458
802
  ##
459
803
  # When the current node is unparented and not an element node, use the
460
804
  # document as the parsing context instead. Otherwise, the in-context
@@ -477,30 +821,34 @@ module Nokogiri
477
821
 
478
822
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
479
823
 
480
- ##
481
- # This is a horrible hack, but I don't care. See #313 for background.
824
+ # libxml2 does not obey the `recover` option after encountering errors during `in_context`
825
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
826
+ #
827
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
828
+ # would have been inherited from the context node won't be handled correctly. This hack was
829
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
830
+ # that's not easily prevented (or even detected).
831
+ #
832
+ # I think preferable behavior would be to either:
833
+ #
834
+ # a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
835
+ # b. don't recover, but raise a sensible exception
836
+ #
837
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
838
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
482
839
  error_count = document.errors.length
483
840
  node_set = in_context(contents, options.to_i)
484
- if node_set.empty? and document.errors.length > error_count and options.recover?
485
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
486
- node_set = fragment.children
841
+ if (node_set.empty? && (document.errors.length > error_count))
842
+ if options.recover?
843
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
844
+ node_set = fragment.children
845
+ else
846
+ raise document.errors[error_count]
847
+ end
487
848
  end
488
849
  node_set
489
850
  end
490
851
 
491
- ####
492
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
493
- def content= string
494
- self.native_content = encode_special_chars(string.to_s)
495
- end
496
-
497
- ###
498
- # Set the parent Node for this Node
499
- def parent= parent_node
500
- parent_node.add_child(self)
501
- parent_node
502
- end
503
-
504
852
  ###
505
853
  # Returns a Hash of +{prefix => value}+ for all namespaces on this
506
854
  # node and its ancestors.
@@ -582,6 +930,7 @@ module Nokogiri
582
930
  def element?
583
931
  type == ELEMENT_NODE
584
932
  end
933
+
585
934
  alias :elem? :element?
586
935
 
587
936
  ###
@@ -592,7 +941,7 @@ module Nokogiri
592
941
  end
593
942
 
594
943
  # Get the inner_html for this node's Node#children
595
- def inner_html *args
944
+ def inner_html(*args)
596
945
  children.map { |x| x.to_html(*args) }.join
597
946
  end
598
947
 
@@ -600,13 +949,13 @@ module Nokogiri
600
949
  def css_path
601
950
  path.split(/\//).map { |part|
602
951
  part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
603
- }.compact.join(' > ')
952
+ }.compact.join(" > ")
604
953
  end
605
954
 
606
955
  ###
607
956
  # Get a list of ancestor Node for this Node. If +selector+ is given,
608
957
  # the ancestors must match +selector+
609
- def ancestors selector = nil
958
+ def ancestors(selector = nil)
610
959
  return NodeSet.new(document) unless respond_to?(:parent)
611
960
  return NodeSet.new(document) unless parent
612
961
 
@@ -627,57 +976,38 @@ module Nokogiri
627
976
  })
628
977
  end
629
978
 
630
- ###
631
- # Adds a default namespace supplied as a string +url+ href, to self.
632
- # The consequence is as an xmlns attribute with supplied argument were
633
- # present in parsed XML. A default namespace set with this method will
634
- # now show up in #attributes, but when this node is serialized to XML an
635
- # "xmlns" attribute will appear. See also #namespace and #namespace=
636
- def default_namespace= url
637
- add_namespace_definition(nil, url)
638
- end
639
- alias :add_namespace :add_namespace_definition
640
-
641
- ###
642
- # Set the default namespace on this node (as would be defined with an
643
- # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
644
- # a Namespace added this way will NOT be serialized as an xmlns attribute
645
- # for this node. You probably want #default_namespace= instead, or perhaps
646
- # #add_namespace_definition with a nil prefix argument.
647
- def namespace= ns
648
- return set_namespace(ns) unless ns
649
-
650
- unless Nokogiri::XML::Namespace === ns
651
- raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
652
- end
653
- if ns.document != document
654
- raise ArgumentError, 'namespace must be declared on the same document'
655
- end
656
-
657
- set_namespace ns
658
- end
659
-
660
979
  ####
661
980
  # Yields self and all children to +block+ recursively.
662
- def traverse &block
663
- children.each{|j| j.traverse(&block) }
981
+ def traverse(&block)
982
+ children.each { |j| j.traverse(&block) }
664
983
  block.call(self)
665
984
  end
666
985
 
667
986
  ###
668
987
  # Accept a visitor. This method calls "visit" on +visitor+ with self.
669
- def accept visitor
988
+ def accept(visitor)
670
989
  visitor.visit(self)
671
990
  end
672
991
 
673
992
  ###
674
993
  # Test to see if this Node is equal to +other+
675
- def == other
994
+ def ==(other)
676
995
  return false unless other
677
996
  return false unless other.respond_to?(:pointer_id)
678
997
  pointer_id == other.pointer_id
679
998
  end
680
999
 
1000
+ ###
1001
+ # Compare two Node objects with respect to their Document. Nodes from
1002
+ # different documents cannot be compared.
1003
+ def <=>(other)
1004
+ return nil unless other.is_a?(Nokogiri::XML::Node)
1005
+ return nil unless document == other.document
1006
+ compare other
1007
+ end
1008
+
1009
+ # @!group Serialization and Generating Output
1010
+
681
1011
  ###
682
1012
  # Serialize Node using +options+. Save options can also be set using a
683
1013
  # block. See SaveOptions.
@@ -692,17 +1022,17 @@ module Nokogiri
692
1022
  # config.format.as_xml
693
1023
  # end
694
1024
  #
695
- def serialize *args, &block
1025
+ def serialize(*args, &block)
696
1026
  options = args.first.is_a?(Hash) ? args.shift : {
697
- :encoding => args[0],
698
- :save_with => args[1]
1027
+ :encoding => args[0],
1028
+ :save_with => args[1],
699
1029
  }
700
1030
 
701
1031
  encoding = options[:encoding] || document.encoding
702
1032
  options[:encoding] = encoding
703
1033
 
704
1034
  outstring = String.new
705
- outstring.force_encoding(Encoding.find(encoding || 'utf-8'))
1035
+ outstring.force_encoding(Encoding.find(encoding || "utf-8"))
706
1036
  io = StringIO.new(outstring)
707
1037
  write_to io, options, &block
708
1038
  io.string
@@ -715,7 +1045,7 @@ module Nokogiri
715
1045
  #
716
1046
  # See Node#write_to for a list of +options+. For formatted output,
717
1047
  # use Node#to_xhtml instead.
718
- def to_html options = {}
1048
+ def to_html(options = {})
719
1049
  to_format SaveOptions::DEFAULT_HTML, options
720
1050
  end
721
1051
 
@@ -725,7 +1055,7 @@ module Nokogiri
725
1055
  # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
726
1056
  #
727
1057
  # See Node#write_to for a list of +options+
728
- def to_xml options = {}
1058
+ def to_xml(options = {})
729
1059
  options[:save_with] ||= SaveOptions::DEFAULT_XML
730
1060
  serialize(options)
731
1061
  end
@@ -736,7 +1066,7 @@ module Nokogiri
736
1066
  # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
737
1067
  #
738
1068
  # See Node#write_to for a list of +options+
739
- def to_xhtml options = {}
1069
+ def to_xhtml(options = {})
740
1070
  to_format SaveOptions::DEFAULT_XHTML, options
741
1071
  end
742
1072
 
@@ -757,22 +1087,22 @@ module Nokogiri
757
1087
  #
758
1088
  # node.write_to(io, :indent_text => '-', :indent => 2)
759
1089
  #
760
- def write_to io, *options
761
- options = options.first.is_a?(Hash) ? options.shift : {}
762
- encoding = options[:encoding] || options[0]
1090
+ def write_to(io, *options)
1091
+ options = options.first.is_a?(Hash) ? options.shift : {}
1092
+ encoding = options[:encoding] || options[0]
763
1093
  if Nokogiri.jruby?
764
- save_options = options[:save_with] || options[1]
765
- indent_times = options[:indent] || 0
1094
+ save_options = options[:save_with] || options[1]
1095
+ indent_times = options[:indent] || 0
766
1096
  else
767
- save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
768
- indent_times = options[:indent] || 2
1097
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
1098
+ indent_times = options[:indent] || 2
769
1099
  end
770
- indent_text = options[:indent_text] || ' '
1100
+ indent_text = options[:indent_text] || " "
771
1101
 
772
1102
  # Any string times 0 returns an empty string. Therefore, use the same
773
1103
  # string instead of generating a new empty string for every node with
774
1104
  # zero indentation.
775
- indentation = indent_times.zero? ? '' : (indent_text * indent_times)
1105
+ indentation = indent_times.zero? ? "" : (indent_text * indent_times)
776
1106
 
777
1107
  config = SaveOptions.new(save_options.to_i)
778
1108
  yield config if block_given?
@@ -784,7 +1114,7 @@ module Nokogiri
784
1114
  # Write Node as HTML to +io+ with +options+
785
1115
  #
786
1116
  # See Node#write_to for a list of +options+
787
- def write_html_to io, options = {}
1117
+ def write_html_to(io, options = {})
788
1118
  write_format_to SaveOptions::DEFAULT_HTML, io, options
789
1119
  end
790
1120
 
@@ -792,7 +1122,7 @@ module Nokogiri
792
1122
  # Write Node as XHTML to +io+ with +options+
793
1123
  #
794
1124
  # See Node#write_to for a list of +options+
795
- def write_xhtml_to io, options = {}
1125
+ def write_xhtml_to(io, options = {})
796
1126
  write_format_to SaveOptions::DEFAULT_XHTML, io, options
797
1127
  end
798
1128
 
@@ -802,52 +1132,66 @@ module Nokogiri
802
1132
  # doc.write_xml_to io, :encoding => 'UTF-8'
803
1133
  #
804
1134
  # See Node#write_to for a list of options
805
- def write_xml_to io, options = {}
1135
+ def write_xml_to(io, options = {})
806
1136
  options[:save_with] ||= SaveOptions::DEFAULT_XML
807
1137
  write_to io, options
808
1138
  end
809
1139
 
810
- ###
811
- # Compare two Node objects with respect to their Document. Nodes from
812
- # different documents cannot be compared.
813
- def <=> other
814
- return nil unless other.is_a?(Nokogiri::XML::Node)
815
- return nil unless document == other.document
816
- compare other
1140
+ def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
1141
+ c14n_root = self
1142
+ document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
1143
+ tn = node.is_a?(XML::Node) ? node : parent
1144
+ tn == c14n_root || tn.ancestors.include?(c14n_root)
1145
+ end
817
1146
  end
818
1147
 
819
- ###
820
- # Do xinclude substitution on the subtree below node. If given a block, a
821
- # Nokogiri::XML::ParseOptions object initialized from +options+, will be
822
- # passed to it, allowing more convenient modification of the parser options.
823
- def do_xinclude options = XML::ParseOptions::DEFAULT_XML
824
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
1148
+ # @!endgroup
825
1149
 
826
- # give options to user
827
- yield options if block_given?
1150
+ protected
828
1151
 
829
- # call c extension
830
- process_xincludes(options.to_i)
1152
+ def coerce(data)
1153
+ case data
1154
+ when XML::NodeSet
1155
+ return data
1156
+ when XML::DocumentFragment
1157
+ return data.children
1158
+ when String
1159
+ return fragment(data).children
1160
+ when Document, XML::Attr
1161
+ # unacceptable
1162
+ when XML::Node
1163
+ return data
1164
+ end
1165
+
1166
+ raise ArgumentError, <<-EOERR
1167
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1168
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1169
+ EOERR
831
1170
  end
832
1171
 
833
- def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
834
- c14n_root = self
835
- document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
836
- tn = node.is_a?(XML::Node) ? node : parent
837
- tn == c14n_root || tn.ancestors.include?(c14n_root)
1172
+ private
1173
+
1174
+ def keywordify(keywords)
1175
+ case keywords
1176
+ when Enumerable
1177
+ return keywords
1178
+ when String
1179
+ return keywords.scan(/\S+/)
1180
+ else
1181
+ raise ArgumentError.new("Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}")
838
1182
  end
839
1183
  end
840
1184
 
841
- private
1185
+ def add_sibling(next_or_previous, node_or_tags)
1186
+ raise("Cannot add sibling to a node with no parent") unless parent
842
1187
 
843
- def add_sibling next_or_previous, node_or_tags
844
1188
  impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
845
- iter = (next_or_previous == :next) ? :reverse_each : :each
1189
+ iter = (next_or_previous == :next) ? :reverse_each : :each
846
1190
 
847
- node_or_tags = coerce node_or_tags
1191
+ node_or_tags = parent.coerce(node_or_tags)
848
1192
  if node_or_tags.is_a?(XML::NodeSet)
849
1193
  if text?
850
- pivot = Nokogiri::XML::Node.new 'dummy', document
1194
+ pivot = Nokogiri::XML::Node.new "dummy", document
851
1195
  send impl, pivot
852
1196
  else
853
1197
  pivot = self
@@ -863,14 +1207,14 @@ module Nokogiri
863
1207
  USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
864
1208
  private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
865
1209
 
866
- def to_format save_option, options
1210
+ def to_format(save_option, options)
867
1211
  return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
868
1212
 
869
1213
  options[:save_with] = save_option unless options[:save_with]
870
1214
  serialize(options)
871
1215
  end
872
1216
 
873
- def write_format_to save_option, io, options
1217
+ def write_format_to(save_option, io, options)
874
1218
  return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
875
1219
 
876
1220
  options[:save_with] ||= save_option
@@ -881,30 +1225,10 @@ module Nokogiri
881
1225
  [:name, :namespace, :attribute_nodes, :children]
882
1226
  end
883
1227
 
884
- def coerce data # :nodoc:
885
- case data
886
- when XML::NodeSet
887
- return data
888
- when XML::DocumentFragment
889
- return data.children
890
- when String
891
- return fragment(data).children
892
- when Document, XML::Attr
893
- # unacceptable
894
- when XML::Node
895
- return data
896
- end
897
-
898
- raise ArgumentError, <<-EOERR
899
- Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
900
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
901
- EOERR
902
- end
903
-
904
1228
  # @private
905
- IMPLIED_XPATH_CONTEXTS = [ './/'.freeze ].freeze # :nodoc:
1229
+ IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
906
1230
 
907
- def add_child_node_and_reparent_attrs node # :nodoc:
1231
+ def add_child_node_and_reparent_attrs(node)
908
1232
  add_child_node node
909
1233
  node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
910
1234
  attr_node.remove