nokogiri 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

@@ -12,8 +12,8 @@ module Nokogiri
12
12
  if hash[:css]
13
13
  list = css("#{name}#{hash[:css]}")
14
14
  elsif hash[:xpath]
15
- conds = Array(hash[:xpath]).collect{|j| "[#{j}]"}
16
- list = xpath("./#{name}#{conds}")
15
+ conds = Array(hash[:xpath]).join(' and ')
16
+ list = xpath("./#{name}[#{conds}]")
17
17
  end
18
18
  else
19
19
  CSS::Parser.without_cache do
@@ -1,3 +1,3 @@
1
1
  module Nokogiri
2
- VERSION = '1.1.0'
2
+ VERSION = '1.1.1'
3
3
  end
@@ -1,8 +1,8 @@
1
1
  require 'nokogiri/xml/sax'
2
- require 'nokogiri/xml/xpath_handler'
3
2
  require 'nokogiri/xml/before_handler'
4
3
  require 'nokogiri/xml/after_handler'
5
4
  require 'nokogiri/xml/node'
5
+ require 'nokogiri/xml/attr'
6
6
  require 'nokogiri/xml/dtd'
7
7
  require 'nokogiri/xml/text'
8
8
  require 'nokogiri/xml/cdata'
@@ -0,0 +1,10 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Attr < Node
4
+ def value
5
+ children.first.to_s
6
+ end
7
+ alias :to_s :value
8
+ end
9
+ end
10
+ end
@@ -16,7 +16,6 @@ module Nokogiri
16
16
 
17
17
  ###
18
18
  # Explore a document with shortcut methods.
19
-
20
19
  def slop!
21
20
  unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
22
21
  decorators(XML::Node) << Nokogiri::Decorators::Slop
@@ -40,9 +40,9 @@ module Nokogiri
40
40
  first = self.child
41
41
  return list unless first # Empty list
42
42
 
43
- list << first unless first.blank?
43
+ list << first
44
44
  while first = first.next
45
- list << first unless first.blank?
45
+ list << first
46
46
  end
47
47
  list
48
48
  end
@@ -54,6 +54,7 @@ module Nokogiri
54
54
  def search *paths
55
55
  ns = paths.last.is_a?(Hash) ? paths.pop : {}
56
56
  xpath(*(paths.map { |path|
57
+ path = path.to_s
57
58
  path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(path, :prefix => ".//")
58
59
  }.flatten.uniq) + [ns])
59
60
  end
@@ -68,17 +69,21 @@ module Nokogiri
68
69
  # node.xpath('.//xmlns:name', node.root.namespaces)
69
70
  #
70
71
  # Custom XPath functions may also be defined. To define custom functions
71
- # create a class which subclasses XPathHandler and implement the
72
- # function you want to define. For example:
72
+ # create a class and implement the # function you want to define.
73
+ # For example:
73
74
  #
74
- # node.xpath('.//title[regex(., "\w+")]', Class.new(XPathHandler) {
75
+ # node.xpath('.//title[regex(., "\w+")]', Class.new {
75
76
  # def regex node_set, regex
76
77
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
77
78
  # end
78
79
  # })
79
80
  #
80
81
  def xpath *paths
81
- handler = paths.last.is_a?(XPathHandler) ? paths.pop : nil
82
+ # Pop off our custom function handler if it exists
83
+ handler = ![
84
+ Hash, String, Symbol
85
+ ].include?(paths.last.class) ? paths.pop : nil
86
+
82
87
  ns = paths.last.is_a?(Hash) ? paths.pop : {}
83
88
 
84
89
  return NodeSet.new(document) unless document.root
@@ -112,19 +117,23 @@ module Nokogiri
112
117
  # node.css('div + p.green', 'div#one')
113
118
  #
114
119
  # Custom CSS pseudo classes may also be defined. To define custom pseudo
115
- # classes, create a class which subclasses SelectorHandler and implement
116
- # the the custom pseudo class you want defined. The first argument to
117
- # the method will be the current matching NodeSet. Any other arguments
118
- # are ones that you pass in. For example:
120
+ # classes, create a class and implement the custom pseudo class you
121
+ # want defined. The first argument to the method will be the current
122
+ # matching NodeSet. Any other arguments are ones that you pass in.
123
+ # For example:
119
124
  #
120
- # node.css('title:regex("\w+")', Class.new(SelectorHandler) {
125
+ # node.css('title:regex("\w+")', Class.new {
121
126
  # def regex node_set, regex
122
127
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
123
128
  # end
124
129
  # })
125
130
  #
126
131
  def css *rules
127
- handler = rules.last.is_a?(XPathHandler) ? rules.pop : nil
132
+ # Pop off our custom function handler if it exists
133
+ handler = ![
134
+ Hash, String, Symbol
135
+ ].include?(rules.last.class) ? rules.pop : nil
136
+
128
137
  ns = rules.last.is_a?(Hash) ? rules.pop : {}
129
138
 
130
139
  rules = rules.map { |rule|
@@ -135,7 +144,7 @@ module Nokogiri
135
144
  end
136
145
 
137
146
  def at path, ns = {}
138
- search("#{path}", ns).first
147
+ search(path, ns).first
139
148
  end
140
149
 
141
150
  def [](property)
@@ -147,10 +156,30 @@ module Nokogiri
147
156
  next_sibling
148
157
  end
149
158
 
159
+ def previous
160
+ previous_sibling
161
+ end
162
+
150
163
  def remove
151
164
  unlink
152
165
  end
153
166
 
167
+ ####
168
+ # Returns a hash containing the node's attributes. The key is the
169
+ # attribute name, the value is the string value of the attribute.
170
+ def attributes
171
+ Hash[*(attribute_nodes.map { |node|
172
+ [node.name, node]
173
+ }.flatten)]
174
+ end
175
+
176
+ ###
177
+ # Remove the attribute named +name+
178
+ def remove_attribute name
179
+ attributes[name].remove if key? name
180
+ end
181
+ alias :delete :remove_attribute
182
+
154
183
  ####
155
184
  # Create nodes from +data+ and insert them before this node
156
185
  # (as a sibling).
@@ -225,10 +254,23 @@ module Nokogiri
225
254
  type == HTML_DOCUMENT_NODE
226
255
  end
227
256
 
228
- def to_html
229
- to_xml
257
+ def text?
258
+ type == TEXT_NODE
259
+ end
260
+
261
+ def read_only?
262
+ # According to gdome2, these are read-only node types
263
+ [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
264
+ end
265
+
266
+ def element?
267
+ type == ELEMENT_NODE
268
+ end
269
+ alias :elem? :element?
270
+
271
+ def to_s
272
+ document.xml? ? to_xml : to_html
230
273
  end
231
- alias :to_s :to_html
232
274
 
233
275
  def inner_html
234
276
  children.map { |x| x.to_html }.join
@@ -280,6 +322,12 @@ Node.replace requires a Node argument, and cannot accept a Document.
280
322
  end
281
323
  replace_with_node new_node
282
324
  end
325
+
326
+ def == other
327
+ return false unless other
328
+ return false unless other.respond_to?(:pointer_id)
329
+ pointer_id == other.pointer_id
330
+ end
283
331
  end
284
332
  end
285
333
  end
@@ -152,6 +152,12 @@ module Nokogiri
152
152
  end
153
153
  alias :text :inner_text
154
154
 
155
+ ###
156
+ # Get the inner html of all contained Node objects
157
+ def inner_html
158
+ collect{|j| j.inner_html}.join('')
159
+ end
160
+
155
161
  ###
156
162
  # Wrap this NodeSet with +html+ or the results of the builder in +blk+
157
163
  def wrap(html, &blk)
@@ -175,6 +181,10 @@ module Nokogiri
175
181
  map { |x| x.to_html }.join('')
176
182
  end
177
183
 
184
+ def to_xml *args
185
+ map { |x| x.to_xml(*args) }.join('')
186
+ end
187
+
178
188
  def size
179
189
  length
180
190
  end
@@ -3,6 +3,12 @@ module Nokogiri
3
3
  class Reader
4
4
  include Enumerable
5
5
 
6
+ def attributes
7
+ Hash[*(attribute_nodes.map { |node|
8
+ [node.name, node.to_s]
9
+ }.flatten)].merge(namespaces || {})
10
+ end
11
+
6
12
  def each(&block)
7
13
  while node = self.read
8
14
  block.call(node)
@@ -11,13 +11,13 @@ class TestAlter < Nokogiri::TestCase
11
11
  def test_before
12
12
  test0 = "<link rel='stylesheet' href='test0.css' />"
13
13
  @basic.at("link").before(test0)
14
- assert_equal 'test0.css', @basic.at("link").attributes['href']
14
+ assert_equal 'test0.css', @basic.at("link").attributes['href'].to_s
15
15
  end
16
16
 
17
17
  def test_after
18
18
  test_inf = "<link rel='stylesheet' href='test_inf.css' />"
19
19
  @basic.search("link")[-1].after(test_inf)
20
- assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
20
+ assert_equal 'test_inf.css', @basic.search("link")[-1]['href']
21
21
  end
22
22
 
23
23
  def test_wrap
@@ -29,10 +29,10 @@ class TestAlter < Nokogiri::TestCase
29
29
  def test_add_class
30
30
  first_p = (@basic/"p:first").add_class("testing123")
31
31
  assert first_p[0].get_attribute("class").split(" ").include?("testing123")
32
- assert((Nokogiri.Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123"))
32
+ assert((Nokogiri.Hpricot(@basic.to_html)/"p:first")[0]["class"].split(" ").include?("testing123"))
33
33
  ####
34
34
  # Modified. We do not support OB1 bug.
35
- assert !(Nokogiri.Hpricot(@basic.to_html)/"p:gt(1)")[0].attributes["class"].split(" ").include?("testing123")
35
+ assert !(Nokogiri.Hpricot(@basic.to_html)/"p:gt(1)")[0]["class"].split(" ").include?("testing123")
36
36
  end
37
37
 
38
38
  def test_change_attributes
@@ -40,9 +40,9 @@ class TestAlter < Nokogiri::TestCase
40
40
  all_as = (@basic/"a").attr("href", "http://my_new_href.com")
41
41
  all_lb = (@basic/"link").attr("href") { |e| e.name }
42
42
  GC.start # try to shake out GC bugs with xpath and node sets.
43
- assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
44
- assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
45
- assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
43
+ assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"].to_s == "Some Title"}
44
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"].to_s == "http://my_new_href.com"}
45
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"].to_s == "link" }
46
46
  end
47
47
 
48
48
  def test_remove_attr
@@ -33,7 +33,7 @@ class TestParser < Nokogiri::TestCase
33
33
 
34
34
  def test_filter_contains
35
35
  @basic = Hpricot.parse(TestFiles::BASIC)
36
- assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
36
+ assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s.chomp
37
37
  end
38
38
 
39
39
  def test_get_element_by_id
@@ -98,9 +98,9 @@ class TestParser < Nokogiri::TestCase
98
98
 
99
99
  def test_positional
100
100
  h = Nokogiri.Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
101
- assert_equal "<p>one</p>", h.search("div/p:eq(1)").to_s # MODIFIED: eq(0) -> eq(1), and removed initial '//'
102
- assert_equal "<p>one</p>", h.search("div/p:first").to_s # MODIFIED: removed initial '//'
103
- assert_equal "<p>one</p>", h.search("div/p:first()").to_s # MODIFIED: removed initial '//'
101
+ assert_equal "<p>one</p>", h.search("div/p:eq(1)").to_s.chomp # MODIFIED: eq(0) -> eq(1), and removed initial '//'
102
+ assert_equal "<p>one</p>", h.search("div/p:first").to_s.chomp # MODIFIED: removed initial '//'
103
+ assert_equal "<p>one</p>", h.search("div/p:first()").to_s.chomp # MODIFIED: removed initial '//'
104
104
  end
105
105
 
106
106
  def test_pace
@@ -209,7 +209,7 @@ class TestParser < Nokogiri::TestCase
209
209
  @basic.search('p:eq(3)').to_html # under Hpricot this was eq(2)
210
210
  ##
211
211
  # MODIFIED: to be blank-agnostic, because Nokogiri's to_html is slightly different compared to Hpricot.
212
- assert_equal '<p class="last final"> <b>THE FINAL PARAGRAPH</b> </p>',
212
+ assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
213
213
  @basic.search('p:last').to_html.gsub(/\s+/,' ')
214
214
  assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class') # MODIFIED to not have '//' prefix
215
215
  end
@@ -409,7 +409,8 @@ class TestParser < Nokogiri::TestCase
409
409
  def test_keep_cdata
410
410
  str = %{<script> /*<![CDATA[*/
411
411
  /*]]>*/ </script>}
412
- assert_match str, Nokogiri.Hpricot(str).to_html
412
+ # MODIFIED: if you want the cdata, to_xml it
413
+ assert_match str, Nokogiri.Hpricot(str).to_xml
413
414
  end
414
415
 
415
416
  def test_namespace
@@ -66,7 +66,6 @@ class TestPreserved < Nokogiri::TestCase
66
66
  str = %{<a href="http://google.com/search?q=nokogiri&amp;l=en">Google</a>}
67
67
  link = (doc = Nokogiri.Hpricot(str)).at(:a)
68
68
  assert_equal "http://google.com/search?q=nokogiri&l=en", link['href']
69
- assert_equal "http://google.com/search?q=nokogiri&l=en", link.attributes['href']
70
69
  assert_equal "http://google.com/search?q=nokogiri&l=en", link.get_attribute('href')
71
70
  assert_equal "http://google.com/search?q=nokogiri&l=en", link.raw_attributes['href']
72
71
  assert_equal str, link.to_html
@@ -0,0 +1,21 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ require 'nkf'
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ class TestNode < Nokogiri::TestCase
8
+ def test_to_html_does_not_contain_entities
9
+ html = NKF.nkf("-e --msdos", <<-EOH)
10
+ <html><body>
11
+ <p> test paragraph
12
+ foo bar </p>
13
+ </body></html>
14
+ EOH
15
+ nokogiri = Nokogiri::HTML.parse(html)
16
+ assert_equal "<p>testparagraph\r\nfoobar</p>",
17
+ nokogiri.at("p").to_html.gsub(/ /, '')
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class TestAttr < Nokogiri::TestCase
6
+ def test_unlink
7
+ xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
8
+ address = xml.xpath('/staff/employee/address').first
9
+ assert_equal 'Yes', address['domestic']
10
+ address.attribute_nodes.first.unlink
11
+ assert_nil address['domestic']
12
+ end
13
+ end
14
+ end
15
+ end
@@ -11,6 +11,44 @@ module Nokogiri
11
11
  address.ancestors.map { |x| x.name }
12
12
  end
13
13
 
14
+ def test_read_only?
15
+ xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
16
+ assert entity_decl = xml.internal_subset.children.find { |x|
17
+ x.type == Node::ENTITY_DECL
18
+ }
19
+ assert entity_decl.read_only?
20
+ end
21
+
22
+ def test_remove_attribute
23
+ xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
24
+ address = xml.xpath('/staff/employee/address').first
25
+ assert_equal 'Yes', address['domestic']
26
+ address.remove_attribute 'domestic'
27
+ assert_nil address['domestic']
28
+ end
29
+
30
+ def test_delete
31
+ xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
32
+ address = xml.xpath('/staff/employee/address').first
33
+ assert_equal 'Yes', address['domestic']
34
+ address.delete 'domestic'
35
+ assert_nil address['domestic']
36
+ end
37
+
38
+ def test_angry_add_child
39
+ xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
40
+ child = xml.css('employee').first
41
+
42
+ assert new_child = child.children.first
43
+
44
+ last = child.children.last
45
+
46
+ # Magic! Don't try this at home folks
47
+ child.add_child(new_child)
48
+ assert_equal new_child, child.children.last
49
+ assert_equal last, child.children.last
50
+ end
51
+
14
52
  def test_add_child
15
53
  xml = Nokogiri::XML(<<-eoxml)
16
54
  <root>
@@ -18,6 +56,7 @@ module Nokogiri
18
56
  </root>
19
57
  eoxml
20
58
  text_node = Nokogiri::XML::Text.new('hello', xml)
59
+ assert_equal Nokogiri::XML::Node::TEXT_NODE, text_node.type
21
60
  xml.root.add_child text_node
22
61
  assert_match 'hello', xml.to_s
23
62
  end
@@ -40,12 +79,49 @@ module Nokogiri
40
79
  </root>
41
80
  eoxml
42
81
  b_node = Nokogiri::XML::Node.new('a', xml)
82
+ assert_equal Nokogiri::XML::Node::ELEMENT_NODE, b_node.type
43
83
  b_node.content = 'first'
44
84
  a_node = xml.xpath('//a').first
45
85
  a_node.add_previous_sibling(b_node)
46
86
  assert_equal('first', xml.xpath('//a').first.text)
47
87
  end
48
88
 
89
+ def test_add_previous_sibling_merge
90
+ xml = Nokogiri::XML(<<-eoxml)
91
+ <root>
92
+ <a>Hello world</a>
93
+ </root>
94
+ eoxml
95
+
96
+ assert a_tag = xml.css('a').first
97
+
98
+ left_space = a_tag.previous
99
+ right_space = a_tag.next
100
+ assert left_space.text?
101
+ assert right_space.text?
102
+
103
+ left_space.add_previous_sibling(right_space)
104
+ assert_equal left_space, right_space
105
+ end
106
+
107
+ def test_add_next_sibling_merge
108
+ xml = Nokogiri::XML(<<-eoxml)
109
+ <root>
110
+ <a>Hello world</a>
111
+ </root>
112
+ eoxml
113
+
114
+ assert a_tag = xml.css('a').first
115
+
116
+ left_space = a_tag.previous
117
+ right_space = a_tag.next
118
+ assert left_space.text?
119
+ assert right_space.text?
120
+
121
+ right_space.add_next_sibling(left_space)
122
+ assert_equal left_space, right_space
123
+ end
124
+
49
125
  def test_find_by_css_with_tilde_eql
50
126
  xml = Nokogiri::XML.parse(<<-eoxml)
51
127
  <root>
@@ -177,8 +253,9 @@ module Nokogiri
177
253
  assert node = xml.search('//address')[2]
178
254
  attr = node.attributes
179
255
  assert_equal 2, attr.size
180
- assert_equal 'Yes', attr['domestic']
181
- assert_equal 'No', attr['street']
256
+ assert_equal 'Yes', attr['domestic'].value
257
+ assert_equal 'Yes', attr['domestic'].to_s
258
+ assert_equal 'No', attr['street'].value
182
259
  end
183
260
 
184
261
  def test_path
@@ -188,6 +265,15 @@ module Nokogiri
188
265
  assert_equal('/staff/employee[1]', node.path)
189
266
  end
190
267
 
268
+ def test_search_by_symbol
269
+ xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
270
+ assert set = xml.search(:employee)
271
+ assert 5, set.length
272
+
273
+ assert node = xml.at(:employee)
274
+ assert node.text =~ /EMP0001/
275
+ end
276
+
191
277
  def test_new_node
192
278
  xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
193
279
  node = Nokogiri::XML::Node.new('form', xml)
@@ -204,6 +290,13 @@ module Nokogiri
204
290
  assert_equal('hello world!', node.content)
205
291
  end
206
292
 
293
+ def test_whitespace_nodes
294
+ doc = Nokogiri::XML.parse("<root><b>Foo</b>\n<i>Bar</i> <p>Bazz</p></root>")
295
+ children = doc.at('//root').children.collect{|j| j.to_s}
296
+ assert_equal "\n", children[1]
297
+ assert_equal " ", children[3]
298
+ end
299
+
207
300
  def test_replace
208
301
  xml = Nokogiri::XML.parse(File.read(XML_FILE))
209
302
  set = xml.search('//employee')
@@ -278,6 +371,24 @@ EOF
278
371
  assert_equal "hello c", xml.search("//c:div", xml.namespaces).first.inner_text
279
372
  end
280
373
 
374
+ def test_namespace
375
+ xml = Nokogiri::XML.parse(<<-EOF)
376
+ <x xmlns:a='http://foo.com/' xmlns:b='http://bar.com/'>
377
+ <y xmlns:c='http://bazz.com/'>
378
+ <a:div>hello a</a:div>
379
+ <b:div>hello b</b:div>
380
+ <c:div>hello c</c:div>
381
+ <div>hello moon</div>
382
+ </y>
383
+ </x>
384
+ EOF
385
+ set = xml.search("//y/*")
386
+ assert_equal "a", set[0].namespace
387
+ assert_equal "b", set[1].namespace
388
+ assert_equal "c", set[2].namespace
389
+ assert_equal nil, set[3].namespace
390
+ end
391
+
281
392
  end
282
393
  end
283
394
  end