nokogiri 1.2.1 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (75) hide show
  1. data/.autotest +15 -0
  2. data/{History.ja.txt → CHANGELOG.ja.rdoc} +30 -2
  3. data/{History.txt → CHANGELOG.rdoc} +28 -2
  4. data/Manifest.txt +13 -7
  5. data/{README.ja.txt → README.ja.rdoc} +3 -1
  6. data/{README.txt → README.rdoc} +7 -1
  7. data/Rakefile +8 -25
  8. data/ext/nokogiri/extconf.rb +4 -4
  9. data/ext/nokogiri/html_entity_lookup.c +30 -0
  10. data/ext/nokogiri/html_entity_lookup.h +8 -0
  11. data/ext/nokogiri/native.c +22 -0
  12. data/ext/nokogiri/native.h +27 -4
  13. data/ext/nokogiri/xml_document.c +31 -4
  14. data/ext/nokogiri/xml_document.h +11 -0
  15. data/ext/nokogiri/xml_document_fragment.c +1 -1
  16. data/ext/nokogiri/xml_node.c +71 -58
  17. data/ext/nokogiri/xml_node_set.c +26 -0
  18. data/ext/nokogiri/xml_reader.c +4 -2
  19. data/ext/nokogiri/xml_sax_parser.c +0 -37
  20. data/ext/nokogiri/xml_sax_push_parser.c +2 -2
  21. data/ext/nokogiri/xml_xpath_context.c +34 -7
  22. data/lib/nokogiri.rb +25 -0
  23. data/lib/nokogiri/css/generated_tokenizer.rb +2 -2
  24. data/lib/nokogiri/css/node.rb +2 -0
  25. data/lib/nokogiri/css/parser.rb +3 -2
  26. data/lib/nokogiri/html.rb +9 -52
  27. data/lib/nokogiri/html/document.rb +2 -0
  28. data/lib/nokogiri/html/entity_lookup.rb +11 -0
  29. data/lib/nokogiri/version.rb +1 -1
  30. data/lib/nokogiri/xml.rb +1 -2
  31. data/lib/nokogiri/xml/builder.rb +18 -5
  32. data/lib/nokogiri/xml/document.rb +15 -1
  33. data/lib/nokogiri/xml/fragment_handler.rb +34 -0
  34. data/lib/nokogiri/xml/node.rb +104 -29
  35. data/lib/nokogiri/xml/node_set.rb +12 -10
  36. data/lib/nokogiri/xml/sax/parser.rb +3 -3
  37. data/lib/xsd/xmlparser/nokogiri.rb +53 -0
  38. data/tasks/test.rb +7 -5
  39. data/test/css/test_nthiness.rb +1 -0
  40. data/test/css/test_parser.rb +1 -0
  41. data/test/css/test_tokenizer.rb +1 -0
  42. data/test/css/test_xpath_visitor.rb +1 -0
  43. data/test/helper.rb +4 -0
  44. data/test/hpricot/test_alter.rb +1 -0
  45. data/test/html/sax/test_parser.rb +13 -0
  46. data/test/html/test_builder.rb +21 -0
  47. data/test/html/test_document.rb +36 -0
  48. data/test/html/test_document_encoding.rb +46 -0
  49. data/test/html/test_named_characters.rb +14 -0
  50. data/test/html/test_node.rb +80 -0
  51. data/test/test_convert_xpath.rb +1 -0
  52. data/test/test_css_cache.rb +1 -0
  53. data/test/test_nokogiri.rb +8 -0
  54. data/test/xml/sax/test_parser.rb +6 -0
  55. data/test/xml/sax/test_push_parser.rb +1 -0
  56. data/test/xml/test_builder.rb +9 -0
  57. data/test/xml/test_cdata.rb +1 -0
  58. data/test/xml/test_comment.rb +1 -0
  59. data/test/xml/test_document.rb +58 -0
  60. data/test/xml/test_document_encoding.rb +15 -14
  61. data/test/xml/test_document_fragment.rb +6 -0
  62. data/test/xml/test_dtd.rb +1 -0
  63. data/test/xml/test_dtd_encoding.rb +1 -0
  64. data/test/xml/test_entity_reference.rb +1 -0
  65. data/test/xml/test_node.rb +52 -4
  66. data/test/xml/test_node_encoding.rb +1 -0
  67. data/test/xml/test_node_set.rb +21 -1
  68. data/test/xml/test_processing_instruction.rb +1 -0
  69. data/test/xml/test_reader_encoding.rb +1 -0
  70. data/test/xml/test_unparented_node.rb +381 -0
  71. data/test/xml/test_xpath.rb +1 -0
  72. metadata +34 -16
  73. data/lib/nokogiri/xml/after_handler.rb +0 -18
  74. data/lib/nokogiri/xml/before_handler.rb +0 -33
  75. data/vendor/hoe.rb +0 -1020
@@ -1,10 +1,16 @@
1
1
  module Nokogiri
2
2
  module XML
3
+ ####
4
+ # A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
5
+ # a NodeSet is return as a result of searching a Document via
6
+ # Nokogiri::XML::Node#css or Nokogiri::XML::Node#xpath
3
7
  class NodeSet
4
8
  include Enumerable
5
9
 
10
+ # The Document this NodeSet is associated with
6
11
  attr_accessor :document
7
12
 
13
+ # Create a NodeSet with +document+ defaulting to +list+
8
14
  def initialize document, list = []
9
15
  @document = document
10
16
  list.each { |x| self << x }
@@ -42,23 +48,19 @@ module Nokogiri
42
48
  end
43
49
 
44
50
  alias :<< :push
45
-
46
- ###
47
- # Unlink this NodeSet and all Node objects it contains from their
48
- # current context.
49
- def unlink
50
- each { |node| node.unlink }
51
- self
52
- end
53
51
  alias :remove :unlink
54
52
 
55
53
  ###
56
54
  # Search this document for +paths+
55
+ #
56
+ # For more information see Nokogiri::XML::Node#css and
57
+ # Nokogiri::XML::Node#xpath
57
58
  def search *paths
59
+ ns = paths.last.is_a?(Hash) ? paths.pop : document.root.namespaces
58
60
  sub_set = NodeSet.new(document)
59
61
  document.decorate(sub_set)
60
62
  each do |node|
61
- node.search(*paths).each do |sub_node|
63
+ node.search(*(paths + [ns])).each do |sub_node|
62
64
  sub_set << sub_node
63
65
  end
64
66
  end
@@ -71,7 +73,7 @@ module Nokogiri
71
73
  ###
72
74
  # If path is a string, search this document for +path+ returning the
73
75
  # first Node. Otherwise, index in to the array with +path+.
74
- def at path, ns = {}
76
+ def at path, ns = document.root ? document.root.namespaces : {}
75
77
  return self[path] if path.is_a?(Numeric)
76
78
  search(path, ns).first
77
79
  end
@@ -28,9 +28,9 @@ module Nokogiri
28
28
  'ASCII' => 22, # pure ASCII
29
29
  }
30
30
 
31
- attr_accessor :document
32
- def initialize(doc = XML::SAX::Document.new)
33
- @encoding = 'ASCII'
31
+ attr_accessor :document, :encoding
32
+ def initialize(doc = XML::SAX::Document.new, encoding = 'ASCII')
33
+ @encoding = encoding
34
34
  @document = doc
35
35
  end
36
36
 
@@ -0,0 +1,53 @@
1
+ require 'nokogiri'
2
+
3
+ module XSD
4
+ module XMLParser
5
+ ###
6
+ # Nokogiri XML parser for soap4r.
7
+ #
8
+ # Nokogiri may be used as the XML parser in soap4r. Simply require
9
+ # 'xsd/xmlparser/nokogiri' in your soap4r applications, and soap4r
10
+ # will use Nokogiri as it's XML parser. No other changes should be
11
+ # required to use Nokogiri as the XML parser.
12
+ #
13
+ # Example (using UW ITS Web Services):
14
+ #
15
+ # require 'rubygems'
16
+ # gem 'soap4r'
17
+ # require 'xsd/xmlparser/nokogiri'
18
+ # require 'defaultDriver'
19
+ #
20
+ # obj = AvlPortType.new
21
+ # obj.getLatestByRout(obj.getAgencies, 8).each do |event|
22
+ # ...
23
+ # end
24
+ class Nokogiri < XSD::XMLParser::Parser
25
+ def initialize host, opt = {}
26
+ super
27
+ @parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || 'UTF-8')
28
+ end
29
+
30
+ def do_parse string_or_readable
31
+ @parser.parse(string_or_readable)
32
+ end
33
+
34
+ def start_element name, attrs = []
35
+ super(name, Hash[*attrs])
36
+ end
37
+
38
+ def error msg
39
+ raise ParseError.new(msg)
40
+ end
41
+ alias :warning :error
42
+
43
+ def cdata_block string
44
+ characters string
45
+ end
46
+
47
+ %w{ start_document end_document comment }.each do |name|
48
+ class_eval %{ def #{name}(*args); end }
49
+ end
50
+ add_factory(self)
51
+ end
52
+ end
53
+ end
@@ -11,7 +11,7 @@ class NokogiriTestTask < Rake::TestTask
11
11
  end
12
12
  self.test_files = FileList['test/**/test_*.rb'] +
13
13
  FileList['test/**/*_test.rb']
14
- self.verbose = true
14
+ self.verbose = "verbose"
15
15
  self.warning = true
16
16
  end
17
17
  end
@@ -19,7 +19,7 @@ end
19
19
  desc "run test suite under valgrind with basic ruby options"
20
20
  NokogiriTestTask.new('test:valgrind').extend(Module.new {
21
21
  def ruby *args
22
- cmd = "valgrind #{VALGRIND_BASIC_OPTS} #{RUBY} #{args.join(' ')}"
22
+ cmd = "valgrind #{VALGRIND_BASIC_OPTS} #{RUBY} #{args.join(' ')} test/test_nokogiri.rb --verbose=verbose"
23
23
  puts cmd
24
24
  system cmd
25
25
  end
@@ -28,7 +28,7 @@ NokogiriTestTask.new('test:valgrind').extend(Module.new {
28
28
  desc "run test suite under valgrind with memory-fill ruby options"
29
29
  NokogiriTestTask.new('test:valgrind_mem').extend(Module.new {
30
30
  def ruby *args
31
- cmd = "valgrind #{VALGRIND_BASIC_OPTS} --freelist-vol=100000000 --malloc-fill=6D --free-fill=66 #{RUBY} #{args.join(' ')}"
31
+ cmd = "valgrind #{VALGRIND_BASIC_OPTS} --freelist-vol=100000000 --malloc-fill=6D --free-fill=66 #{RUBY} #{args.join(' ')} test/test_nokogiri.rb --verbose=verbose"
32
32
  puts cmd
33
33
  system cmd
34
34
  end
@@ -37,7 +37,7 @@ NokogiriTestTask.new('test:valgrind_mem').extend(Module.new {
37
37
  desc "run test suite under valgrind with memory-zero ruby options"
38
38
  NokogiriTestTask.new('test:valgrind_mem0').extend(Module.new {
39
39
  def ruby *args
40
- cmd = "valgrind #{VALGRIND_BASIC_OPTS} --freelist-vol=100000000 --malloc-fill=00 --free-fill=00 #{RUBY} #{args.join(' ')}"
40
+ cmd = "valgrind #{VALGRIND_BASIC_OPTS} --freelist-vol=100000000 --malloc-fill=00 --free-fill=00 #{RUBY} #{args.join(' ')} test/test_nokogiri.rb --verbose=verbose"
41
41
  puts cmd
42
42
  system cmd
43
43
  end
@@ -116,8 +116,10 @@ namespace :test do
116
116
  end
117
117
 
118
118
  test_results = {}
119
+ libxslt = Dir[File.join(MULTI_XML, 'install', 'libxslt*')].first
119
120
  Dir[File.join(MULTI_XML, 'install', '*')].each do |xml2_version|
120
- extopts = "--with-xml2-include=#{xml2_version}/include/libxml2 --with-xml2-lib=#{xml2_version}/lib --with-xslt-dir=/usr/local"
121
+ next unless xml2_version =~ /libxml2/
122
+ extopts = "--with-xml2-include=#{xml2_version}/include/libxml2 --with-xml2-lib=#{xml2_version}/lib --with-xslt-dir=#{libxslt}"
121
123
  cmd = "#{$0} clean test EXTOPTS='#{extopts}'"
122
124
 
123
125
  version = File.basename(xml2_version)
@@ -4,6 +4,7 @@ module Nokogiri
4
4
  module CSS
5
5
  class TestNthiness < Nokogiri::TestCase
6
6
  def setup
7
+ super
7
8
  doc = <<EOF
8
9
  <html>
9
10
  <table>
@@ -4,6 +4,7 @@ module Nokogiri
4
4
  module CSS
5
5
  class TestParser < Nokogiri::TestCase
6
6
  def setup
7
+ super
7
8
  @parser = Nokogiri::CSS::Parser.new
8
9
  end
9
10
 
@@ -4,6 +4,7 @@ module Nokogiri
4
4
  module CSS
5
5
  class TestTokenizer < Nokogiri::TestCase
6
6
  def setup
7
+ super
7
8
  @scanner = Nokogiri::CSS::Tokenizer.new
8
9
  end
9
10
 
@@ -4,6 +4,7 @@ module Nokogiri
4
4
  module CSS
5
5
  class TestXPathVisitor < Nokogiri::TestCase
6
6
  def setup
7
+ super
7
8
  @parser = Nokogiri::CSS::Parser.new
8
9
  end
9
10
 
@@ -20,6 +20,10 @@ module Nokogiri
20
20
  undef :default_test
21
21
  end
22
22
 
23
+ def setup
24
+ warn "#{name}" if ENV['TESTOPTS'] == '-v'
25
+ end
26
+
23
27
  def teardown
24
28
  if ENV['NOKOGIRI_GC']
25
29
  STDOUT.putc '!'
@@ -5,6 +5,7 @@ class TestAlter < Nokogiri::TestCase
5
5
  include Nokogiri
6
6
 
7
7
  def setup
8
+ super
8
9
  @basic = Hpricot.parse(TestFiles::BASIC)
9
10
  end
10
11
 
@@ -5,6 +5,7 @@ module Nokogiri
5
5
  module SAX
6
6
  class TestParser < Nokogiri::SAX::TestCase
7
7
  def setup
8
+ super
8
9
  @parser = HTML::SAX::Parser.new(Doc.new)
9
10
  end
10
11
 
@@ -13,6 +14,18 @@ module Nokogiri
13
14
  assert_equal 1110, @parser.document.end_elements.length
14
15
  end
15
16
 
17
+ def test_parse_file_non_existant
18
+ assert_raise Errno::ENOENT do
19
+ @parser.parse_file('foo')
20
+ end
21
+ end
22
+
23
+ def test_parse_file_with_dir
24
+ assert_raise Errno::EISDIR do
25
+ @parser.parse_file(File.dirname(__FILE__))
26
+ end
27
+ end
28
+
16
29
  def test_parse_document
17
30
  @parser.parse_memory(<<-eoxml)
18
31
  <p>Paragraph 1</p>
@@ -3,6 +3,15 @@ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
3
3
  module Nokogiri
4
4
  module HTML
5
5
  class TestBuilder < Nokogiri::TestCase
6
+ def test_hash_as_attributes_for_attribute_method
7
+ html = Nokogiri::HTML::Builder.new {
8
+ div.slide(:class => 'another_class') {
9
+ span 'Slide 1'
10
+ }
11
+ }.to_html
12
+ assert_match 'class="slide another_class"', html
13
+ end
14
+
6
15
  def test_hash_as_attributes
7
16
  builder = Nokogiri::HTML::Builder.new do
8
17
  div(:id => 'awesome') {
@@ -13,6 +22,18 @@ module Nokogiri
13
22
  builder.doc.root.to_html.gsub(/\n/, '').gsub(/>\s*</, '><'))
14
23
  end
15
24
 
25
+ def test_tag_nesting
26
+ builder = Nokogiri::HTML::Builder.new do
27
+ span.left ''
28
+ span.middle {
29
+ div.icon ''
30
+ }
31
+ span.right ''
32
+ end
33
+ assert node = builder.doc.css('span.right').first
34
+ assert_equal 'middle', node.previous_sibling['class']
35
+ end
36
+
16
37
  def test_has_ampersand
17
38
  builder = Nokogiri::HTML::Builder.new do
18
39
  div.rad.thing! {
@@ -4,9 +4,35 @@ module Nokogiri
4
4
  module HTML
5
5
  class TestDocument < Nokogiri::TestCase
6
6
  def setup
7
+ super
7
8
  @html = Nokogiri::HTML.parse(File.read(HTML_FILE))
8
9
  end
9
10
 
11
+ def test_swap_should_not_exist
12
+ assert_raises(NoMethodError) {
13
+ @html.swap
14
+ }
15
+ end
16
+
17
+ def test_namespace_should_not_exist
18
+ assert_raises(NoMethodError) {
19
+ @html.namespace
20
+ }
21
+ end
22
+
23
+ def test_root_node_parent_is_document
24
+ parent = @html.root.parent
25
+ assert_equal @html, parent
26
+ assert_instance_of Nokogiri::HTML::Document, parent
27
+ end
28
+
29
+ def test_parse_empty_document
30
+ doc = Nokogiri::HTML("\n")
31
+ assert_equal 0, doc.css('a').length
32
+ assert_equal 0, doc.xpath('//a').length
33
+ assert_equal 0, doc.search('//a').length
34
+ end
35
+
10
36
  def test_HTML_function
11
37
  html = Nokogiri::HTML(File.read(HTML_FILE))
12
38
  assert html.html?
@@ -136,6 +162,10 @@ module Nokogiri
136
162
  def test_dup_document
137
163
  assert dup = @html.dup
138
164
  assert_not_equal dup, @html
165
+ assert @html.html?
166
+ assert_instance_of Nokogiri::HTML::Document, dup
167
+ assert dup.html?, 'duplicate should be html'
168
+ assert_equal @html.to_s, dup.to_s
139
169
  end
140
170
 
141
171
  def test_dup_document_shallow
@@ -167,6 +197,12 @@ module Nokogiri
167
197
  assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
168
198
  end
169
199
 
200
+ def test_fragment_contains_text_node
201
+ fragment = Nokogiri::HTML.fragment('fooo')
202
+ assert_equal 1, fragment.children.length
203
+ assert_equal 'fooo', fragment.inner_text
204
+ end
205
+
170
206
  def test_fragment_includes_two_tags
171
207
  assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
172
208
  end
@@ -0,0 +1,46 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
3
+
4
+ module Nokogiri
5
+ module HTML
6
+ if RUBY_VERSION =~ /^1\.9/
7
+ class TestDocumentEncoding < Nokogiri::TestCase
8
+ def test_default_to_encoding_from_string
9
+ bad_charset = <<-eohtml
10
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
11
+ <html>
12
+ <head>
13
+ <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
14
+ </head>
15
+ <body>
16
+ <a href="http://tenderlovemaking.com/">blah!</a>
17
+ </body>
18
+ </html>
19
+ eohtml
20
+ doc = Nokogiri::HTML(bad_charset)
21
+ assert_equal bad_charset.encoding.name, doc.encoding
22
+
23
+ doc = Nokogiri.parse(bad_charset)
24
+ assert_equal bad_charset.encoding.name, doc.encoding
25
+ end
26
+
27
+ def test_encoding_with_a_bad_name
28
+ bad_charset = <<-eohtml
29
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
30
+ <html>
31
+ <head>
32
+ <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
33
+ </head>
34
+ <body>
35
+ <a href="http://tenderlovemaking.com/">blah!</a>
36
+ </body>
37
+ </html>
38
+ eohtml
39
+ doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
40
+ assert_equal ['http://tenderlovemaking.com/'],
41
+ doc.css('a').map { |a| a['href'] }
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,14 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestNamedCharacters < Nokogiri::TestCase
6
+ def test_named_character
7
+ copy = NamedCharacters.get('copy')
8
+ assert_equal 169, NamedCharacters['copy']
9
+ assert_equal copy.value, NamedCharacters['copy']
10
+ assert copy.description
11
+ end
12
+ end
13
+ end
14
+ end
@@ -5,6 +5,86 @@ require 'nkf'
5
5
  module Nokogiri
6
6
  module HTML
7
7
  class TestNode < Nokogiri::TestCase
8
+ def setup
9
+ super
10
+ @html = Nokogiri::HTML(<<-eohtml)
11
+ <html>
12
+ <head></head>
13
+ <body>
14
+ <div>first</div>
15
+ </body>
16
+ </html>
17
+ eohtml
18
+ end
19
+
20
+ def test_swap
21
+ @html.at('div').swap('<a href="foo">bar</a>')
22
+ a_tag = @html.css('a').first
23
+ assert_equal 'body', a_tag.parent.name
24
+ assert_equal 0, @html.css('div').length
25
+ end
26
+
27
+ def test_attribute_decodes_entities
28
+ node = @html.at('div')
29
+ node['href'] = 'foo&bar'
30
+ assert_equal 'foo&bar', node['href']
31
+ node['href'] += '&baz'
32
+ assert_equal 'foo&bar&baz', node['href']
33
+ end
34
+
35
+
36
+ def test_before_will_prepend_text_nodes
37
+ assert node = @html.at('//body').children.first
38
+ node.before "some text"
39
+ assert_equal 'some text', @html.at('//body').children[0].content.strip
40
+ end
41
+
42
+ def test_inner_html=
43
+ assert div = @html.at('//div')
44
+ div.inner_html = '<span>testing</span>'
45
+ assert_equal 'span', div.children.first.name
46
+
47
+ div.inner_html = 'testing'
48
+ assert_equal 'testing', div.content
49
+ end
50
+
51
+ def test_fragment
52
+ fragment = @html.fragment(<<-eohtml)
53
+ hello
54
+ <div class="foo">
55
+ <p>bar</p>
56
+ </div>
57
+ world
58
+ eohtml
59
+ assert_match(/^hello/, fragment.inner_html.strip)
60
+ assert_equal 3, fragment.children.length
61
+ assert p_tag = fragment.css('p').first
62
+ assert_equal 'div', p_tag.parent.name
63
+ assert_equal 'foo', p_tag.parent['class']
64
+ end
65
+
66
+ def test_after_will_append_text_nodes
67
+ assert node = @html.at('//body/div')
68
+ node.after "some text"
69
+ assert_equal 'some text', node.next.text.strip
70
+ end
71
+
72
+ def test_replace
73
+ doc = Nokogiri::HTML(<<-eohtml)
74
+ <html>
75
+ <head></head>
76
+ <body>
77
+ <center><img src='logo.gif' /></center>
78
+ </body>
79
+ </html>
80
+ eohtml
81
+ center = doc.at("//center")
82
+ img = center.search("//img")
83
+ assert_raises ArgumentError do
84
+ center.replace img
85
+ end
86
+ end
87
+
8
88
  def test_to_html_does_not_contain_entities
9
89
  html = NKF.nkf("-e --msdos", <<-EOH)
10
90
  <html><body>