nokogiri 1.1.1-java → 1.2.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (88) hide show
  1. data/History.ja.txt +34 -0
  2. data/History.txt +36 -0
  3. data/Manifest.txt +21 -0
  4. data/README.ja.txt +1 -1
  5. data/README.txt +1 -1
  6. data/Rakefile +26 -88
  7. data/ext/nokogiri/extconf.rb +48 -63
  8. data/ext/nokogiri/html_document.c +90 -29
  9. data/ext/nokogiri/html_sax_parser.c +23 -2
  10. data/ext/nokogiri/native.c +18 -8
  11. data/ext/nokogiri/native.h +22 -0
  12. data/ext/nokogiri/xml_attr.c +83 -0
  13. data/ext/nokogiri/xml_attr.h +9 -0
  14. data/ext/nokogiri/xml_cdata.c +1 -1
  15. data/ext/nokogiri/xml_document.c +84 -18
  16. data/ext/nokogiri/xml_document_fragment.c +38 -0
  17. data/ext/nokogiri/xml_document_fragment.h +10 -0
  18. data/ext/nokogiri/xml_dtd.c +2 -22
  19. data/ext/nokogiri/xml_entity_reference.c +41 -0
  20. data/ext/nokogiri/xml_entity_reference.h +9 -0
  21. data/ext/nokogiri/xml_io.c +10 -3
  22. data/ext/nokogiri/xml_io.h +1 -0
  23. data/ext/nokogiri/xml_node.c +116 -66
  24. data/ext/nokogiri/xml_node_set.c +5 -1
  25. data/ext/nokogiri/xml_processing_instruction.c +44 -0
  26. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  27. data/ext/nokogiri/xml_reader.c +20 -4
  28. data/ext/nokogiri/xml_sax_parser.c +51 -15
  29. data/ext/nokogiri/xml_sax_push_parser.c +85 -0
  30. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  31. data/ext/nokogiri/xml_syntax_error.c +12 -8
  32. data/ext/nokogiri/xml_syntax_error.h +2 -1
  33. data/ext/nokogiri/xml_xpath_context.c +11 -2
  34. data/ext/nokogiri/xslt_stylesheet.c +1 -6
  35. data/lib/nokogiri.rb +10 -13
  36. data/lib/nokogiri/css.rb +1 -1
  37. data/lib/nokogiri/css/generated_parser.rb +287 -295
  38. data/lib/nokogiri/css/generated_tokenizer.rb +36 -51
  39. data/lib/nokogiri/css/node.rb +1 -3
  40. data/lib/nokogiri/css/parser.rb +21 -12
  41. data/lib/nokogiri/css/parser.y +55 -44
  42. data/lib/nokogiri/css/syntax_error.rb +2 -1
  43. data/lib/nokogiri/css/tokenizer.rex +23 -32
  44. data/lib/nokogiri/decorators/hpricot/node_set.rb +1 -1
  45. data/lib/nokogiri/html.rb +10 -4
  46. data/lib/nokogiri/html/document.rb +6 -2
  47. data/lib/nokogiri/syntax_error.rb +4 -0
  48. data/lib/nokogiri/version.rb +2 -1
  49. data/lib/nokogiri/xml.rb +3 -1
  50. data/lib/nokogiri/xml/attr.rb +3 -4
  51. data/lib/nokogiri/xml/cdata.rb +1 -1
  52. data/lib/nokogiri/xml/document.rb +4 -7
  53. data/lib/nokogiri/xml/document_fragment.rb +9 -0
  54. data/lib/nokogiri/xml/dtd.rb +3 -0
  55. data/lib/nokogiri/xml/node.rb +144 -40
  56. data/lib/nokogiri/xml/node/save_options.rb +32 -0
  57. data/lib/nokogiri/xml/node_set.rb +11 -20
  58. data/lib/nokogiri/xml/processing_instruction.rb +6 -0
  59. data/lib/nokogiri/xml/reader.rb +5 -0
  60. data/lib/nokogiri/xml/sax.rb +1 -0
  61. data/lib/nokogiri/xml/sax/push_parser.rb +47 -0
  62. data/lib/nokogiri/xml/syntax_error.rb +3 -1
  63. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  64. data/tasks/test.rb +136 -0
  65. data/test/css/test_parser.rb +4 -0
  66. data/test/css/test_tokenizer.rb +30 -17
  67. data/test/css/test_xpath_visitor.rb +11 -0
  68. data/test/helper.rb +11 -0
  69. data/test/hpricot/test_builder.rb +2 -9
  70. data/test/hpricot/test_parser.rb +4 -4
  71. data/test/html/test_builder.rb +7 -7
  72. data/test/html/test_document.rb +90 -4
  73. data/test/html/test_node.rb +1 -0
  74. data/test/test_css_cache.rb +1 -3
  75. data/test/test_reader.rb +19 -1
  76. data/test/test_xslt_transforms.rb +1 -1
  77. data/test/xml/node/test_save_options.rb +20 -0
  78. data/test/xml/sax/test_parser.rb +17 -0
  79. data/test/xml/sax/test_push_parser.rb +67 -0
  80. data/test/xml/test_attr.rb +16 -0
  81. data/test/xml/test_cdata.rb +1 -1
  82. data/test/xml/test_document.rb +45 -0
  83. data/test/xml/test_document_fragment.rb +18 -0
  84. data/test/xml/test_dtd.rb +2 -4
  85. data/test/xml/test_entity_reference.rb +16 -0
  86. data/test/xml/test_node.rb +149 -80
  87. data/test/xml/test_processing_instruction.rb +24 -0
  88. metadata +28 -2
data/test/helper.rb CHANGED
@@ -34,6 +34,7 @@ module Nokogiri
34
34
  attr_reader :start_elements, :start_document_called
35
35
  attr_reader :end_elements, :end_document_called
36
36
  attr_reader :data, :comments, :cdata_blocks
37
+ attr_reader :errors, :warnings
37
38
 
38
39
  def start_document
39
40
  @start_document_called = true
@@ -45,6 +46,16 @@ module Nokogiri
45
46
  super
46
47
  end
47
48
 
49
+ def error error
50
+ (@errors ||= []) << error
51
+ super
52
+ end
53
+
54
+ def warning warning
55
+ (@warning ||= []) << warning
56
+ super
57
+ end
58
+
48
59
  def start_element *args
49
60
  (@start_elements ||= []) << args
50
61
  super
@@ -5,7 +5,7 @@ class TestBuilder < Nokogiri::TestCase
5
5
  # Modified
6
6
  def test_escaping_text
7
7
  doc = Nokogiri.Hpricot() { b "<a\"b>" }
8
- assert_equal "<b>&lt;a\"b&gt;</b>", doc.to_html.chomp
8
+ assert_match "<b>&lt;a\"b&gt;</b>", doc.to_html.chomp
9
9
  assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
10
10
  end
11
11
 
@@ -13,15 +13,8 @@ class TestBuilder < Nokogiri::TestCase
13
13
  # Modified
14
14
  def test_no_escaping_text
15
15
  doc = Nokogiri.Hpricot() { div.test.me! { text "<a\"b>" } }
16
- assert_equal %{<div class="test" id="me">&lt;a"b&gt;</div>},
16
+ assert_match %{<div class="test" id="me">&lt;a"b&gt;</div>},
17
17
  doc.to_html.chomp
18
18
  assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
19
19
  end
20
-
21
- ####
22
- # Modified
23
- def test_latin1_entities
24
- doc = Nokogiri.Hpricot() { b "\200\225" }
25
- assert_equal "<b>&#21;</b>", doc.to_html.chomp
26
- end
27
20
  end
@@ -206,11 +206,11 @@ class TestParser < Nokogiri::TestCase
206
206
  # MODIFIED:
207
207
  # hpricot has an off-by-one bug eith eq-and-friends.
208
208
  assert_equal "<p>The third paragraph</p>",
209
- @basic.search('p:eq(3)').to_html # under Hpricot this was eq(2)
209
+ @basic.search('p:eq(3)').to_html.chomp # under Hpricot this was eq(2)
210
210
  ##
211
211
  # MODIFIED: to be blank-agnostic, because Nokogiri's to_html is slightly different compared to Hpricot.
212
212
  assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
213
- @basic.search('p:last').to_html.gsub(/\s+/,' ')
213
+ @basic.search('p:last').to_html.gsub(/\s+/,' ').gsub(/>\s*</, '><')
214
214
  assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class') # MODIFIED to not have '//' prefix
215
215
  end
216
216
 
@@ -220,7 +220,7 @@ class TestParser < Nokogiri::TestCase
220
220
  element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
221
221
  end
222
222
  assert_match '<div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p>',
223
- doc.to_html.gsub(/\n/, '')
223
+ doc.to_html.gsub(/\n/, '').gsub(/>\s*</, '><')
224
224
  end
225
225
 
226
226
  def test_insert_before # ticket #61
@@ -229,7 +229,7 @@ class TestParser < Nokogiri::TestCase
229
229
  element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
230
230
  end
231
231
  assert_match '<p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div>',
232
- doc.to_html.gsub(/\n/, '')
232
+ doc.to_html.gsub(/\n/, '').gsub(/>\s*</, '><')
233
233
  end
234
234
 
235
235
  def test_many_paths
@@ -10,7 +10,7 @@ module Nokogiri
10
10
  }
11
11
  end
12
12
  assert_equal('<div id="awesome"><h1>america</h1></div>',
13
- builder.to_html.gsub(/\n/, ''))
13
+ builder.doc.root.to_html.gsub(/\n/, '').gsub(/>\s*</, '><'))
14
14
  end
15
15
 
16
16
  def test_has_ampersand
@@ -22,7 +22,7 @@ module Nokogiri
22
22
  end
23
23
  assert_equal(
24
24
  '<div class="rad" id="thing">&lt;awe&amp;some&gt;<b>hello &amp; world</b></div>',
25
- builder.to_html.gsub(/\n/, ''))
25
+ builder.doc.root.to_html.gsub(/\n/, ''))
26
26
  end
27
27
 
28
28
  def test_multi_tags
@@ -34,7 +34,7 @@ module Nokogiri
34
34
  end
35
35
  assert_equal(
36
36
  '<div class="rad" id="thing">&lt;awesome&gt;<b>hello</b></div>',
37
- builder.doc.to_html.gsub(/\n/, ''))
37
+ builder.doc.root.to_html.gsub(/\n/, ''))
38
38
  end
39
39
 
40
40
  def test_attributes_plus_block
@@ -44,7 +44,7 @@ module Nokogiri
44
44
  }
45
45
  end
46
46
  assert_equal('<div class="rad" id="thing">&lt;awesome&gt;</div>',
47
- builder.doc.to_html.chomp)
47
+ builder.doc.root.to_html.chomp)
48
48
  end
49
49
 
50
50
  def test_builder_adds_attributes
@@ -52,14 +52,14 @@ module Nokogiri
52
52
  div.rad.thing! "tender div"
53
53
  end
54
54
  assert_equal('<div class="rad" id="thing">tender div</div>',
55
- builder.doc.to_html.chomp)
55
+ builder.doc.root.to_html.chomp)
56
56
  end
57
57
 
58
58
  def test_bold_tag
59
59
  builder = Nokogiri::HTML::Builder.new do
60
60
  b "bold tag"
61
61
  end
62
- assert_equal('<b>bold tag</b>', builder.doc.to_html.chomp)
62
+ assert_equal('<b>bold tag</b>', builder.doc.root.to_html.chomp)
63
63
  end
64
64
 
65
65
  def test_html_then_body_tag
@@ -71,7 +71,7 @@ module Nokogiri
71
71
  }
72
72
  end
73
73
  assert_equal('<html><body><b>bold tag</b></body></html>',
74
- builder.doc.to_html.chomp)
74
+ builder.doc.root.to_html.chomp.gsub(/>\s*</, '><'))
75
75
  end
76
76
 
77
77
  def test_instance_eval_with_delegation_to_block_context
@@ -12,6 +12,41 @@ module Nokogiri
12
12
  assert html.html?
13
13
  end
14
14
 
15
+ def test_parse_io
16
+ assert doc = File.open(HTML_FILE, 'rb') { |f|
17
+ Document.read_io(f, nil, 'UTF-8', PARSE_NOERROR | PARSE_NOWARNING)
18
+ }
19
+ end
20
+
21
+ def test_to_xhtml
22
+ assert_match 'XHTML', @html.to_xhtml
23
+ assert_match 'XHTML', @html.to_xhtml('UTF-8')
24
+ assert_match 'UTF-8', @html.to_xhtml('UTF-8')
25
+ end
26
+
27
+ def test_no_xml_header
28
+ html = Nokogiri::HTML(<<-eohtml)
29
+ <html>
30
+ </html>
31
+ eohtml
32
+ assert html.to_html.length > 0, 'html length is too short'
33
+ assert_no_match(/^<\?xml/, html.to_html)
34
+ end
35
+
36
+ def test_document_has_error
37
+ html = Nokogiri::HTML(<<-eohtml)
38
+ <html>
39
+ <body>
40
+ <div awesome="asdf>
41
+ <p>inside div tag</p>
42
+ </div>
43
+ <p>outside div tag</p>
44
+ </body>
45
+ </html>
46
+ eohtml
47
+ assert html.errors.length > 0
48
+ end
49
+
15
50
  def test_relative_css
16
51
  html = Nokogiri::HTML(<<-eohtml)
17
52
  <html>
@@ -60,6 +95,48 @@ module Nokogiri
60
95
  assert_equal('Hello world!', node.inner_text.strip)
61
96
  end
62
97
 
98
+ def test_find_by_xpath
99
+ found = @html.xpath('//div/a')
100
+ assert_equal 3, found.length
101
+ end
102
+
103
+ def test_find_by_css
104
+ found = @html.css('div > a')
105
+ assert_equal 3, found.length
106
+ end
107
+
108
+ def test_dup_shallow
109
+ found = @html.search('//div/a').first
110
+ dup = found.dup(0)
111
+ assert dup
112
+ assert_equal '', dup.content
113
+ end
114
+
115
+ def test_search_can_handle_xpath_and_css
116
+ found = @html.search('//div/a', 'div > p')
117
+ length = @html.xpath('//div/a').length +
118
+ @html.css('div > p').length
119
+ assert_equal length, found.length
120
+ end
121
+
122
+ def test_dup_document
123
+ assert dup = @html.dup
124
+ assert_not_equal dup, @html
125
+ end
126
+
127
+ def test_dup_document_shallow
128
+ assert dup = @html.dup(0)
129
+ assert_not_equal dup, @html
130
+ end
131
+
132
+ def test_dup
133
+ found = @html.search('//div/a').first
134
+ dup = found.dup
135
+ assert dup
136
+ assert_equal found.content, dup.content
137
+ assert_equal found.document, dup.document
138
+ end
139
+
63
140
  def test_inner_html
64
141
  html = Nokogiri::HTML(<<-eohtml)
65
142
  <html>
@@ -76,15 +153,24 @@ module Nokogiri
76
153
  assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
77
154
  end
78
155
 
156
+ def test_fragment_includes_two_tags
157
+ assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
158
+ end
159
+
79
160
  def test_fragment
80
- node_set = Nokogiri::HTML.fragment(<<-eohtml)
161
+ fragment = Nokogiri::HTML.fragment(<<-eohtml)
81
162
  <div>
82
163
  <b>Hello World</b>
83
164
  </div>
84
165
  eohtml
85
- assert_equal 1, node_set.length
86
- assert_equal 'div', node_set.first.name
87
- assert_match(/Hello World/, node_set.to_html)
166
+ assert_equal 1, fragment.children.length
167
+ assert_equal 'div', fragment.children.first.name
168
+ assert_match(/Hello World/, fragment.to_html)
169
+
170
+ # libxml2 is broken in 2.6.16 and 2.6.17
171
+ unless [16, 17].include?(Nokogiri::LIBXML_VERSION.split('.').last.to_i)
172
+ assert_equal 1, fragment.css('div').length
173
+ end
88
174
  end
89
175
 
90
176
  def test_relative_css_finder
@@ -13,6 +13,7 @@ module Nokogiri
13
13
  </body></html>
14
14
  EOH
15
15
  nokogiri = Nokogiri::HTML.parse(html)
16
+
16
17
  assert_equal "<p>testparagraph\r\nfoobar</p>",
17
18
  nokogiri.at("p").to_html.gsub(/ /, '')
18
19
  end
@@ -27,14 +27,12 @@ class TestCssCache < Nokogiri::TestCase
27
27
 
28
28
  [ false, true ].each do |cache_setting|
29
29
  define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do
30
- times = cache_setting ? 6 : nil
30
+ times = cache_setting ? 4 : nil
31
31
 
32
32
  Nokogiri::CSS::Parser.set_cache cache_setting
33
33
 
34
34
  Nokogiri::CSS.xpath_for(@css)
35
35
  Nokogiri::CSS.xpath_for(@css)
36
- Nokogiri::CSS::Parser.xpath_for(@css)
37
- Nokogiri::CSS::Parser.xpath_for(@css)
38
36
  Nokogiri::CSS::Parser.new.xpath_for(@css)
39
37
  Nokogiri::CSS::Parser.new.xpath_for(@css)
40
38
 
data/test/test_reader.rb CHANGED
@@ -24,6 +24,23 @@ class TestReader < Nokogiri::TestCase
24
24
  reader.map { |x| x.value? }
25
25
  end
26
26
 
27
+ def test_read_error_document
28
+ reader = Nokogiri::XML::Reader.from_memory(<<-eoxml)
29
+ <x xmlns:tenderlove='http://tenderlovemaking.com/'>
30
+ <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
31
+ <foo>
32
+ </x>
33
+ eoxml
34
+ error_happened = false
35
+ begin
36
+ reader.each { |node| }
37
+ rescue Nokogiri::XML::SyntaxError => ex
38
+ error_happened = true
39
+ end
40
+ assert error_happened
41
+ assert 1, reader.errors.length
42
+ end
43
+
27
44
  def test_attributes?
28
45
  reader = Nokogiri::XML::Reader.from_memory(<<-eoxml)
29
46
  <x xmlns:tenderlove='http://tenderlovemaking.com/'>
@@ -112,12 +129,13 @@ class TestReader < Nokogiri::TestCase
112
129
  end
113
130
 
114
131
  def test_encoding
115
- reader = Nokogiri::XML::Reader.from_memory(<<-eoxml)
132
+ string = <<-eoxml
116
133
  <awesome>
117
134
  <p xml:lang="en">The quick brown fox jumps over the lazy dog.</p>
118
135
  <p xml:lang="ja">日本語が上手です</p>
119
136
  </awesome>
120
137
  eoxml
138
+ reader = Nokogiri::XML::Reader.from_memory(string, nil, 'UTF-8')
121
139
  assert_nil reader.encoding
122
140
  assert_equal [nil], reader.map { |x| x.encoding }.uniq
123
141
  end
@@ -73,7 +73,7 @@ class TestXsltTransforms < Nokogiri::TestCase
73
73
  assert_equal 'func-result', result_doc.at('/root/function').content
74
74
  assert_equal 3, result_doc.at('/root/max').content.to_i
75
75
  assert_match(
76
- /\d{4}-\d\d-\d\d-\d\d:\d\d/,
76
+ /\d{4}-\d\d-\d\d[-|+]\d\d:\d\d/,
77
77
  result_doc.at('/root/date').content
78
78
  )
79
79
  result_doc.xpath('/root/params/*').each do |p|
@@ -0,0 +1,20 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..',"helper"))
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class Node
6
+ class TestSaveOptions < Nokogiri::TestCase
7
+ SaveOptions.constants.each do |constant|
8
+ class_eval %{
9
+ def test_predicate_#{constant.downcase}
10
+ options = SaveOptions.new(SaveOptions::#{constant})
11
+ assert options.#{constant.downcase}?
12
+
13
+ assert SaveOptions.new.#{constant.downcase}.#{constant.downcase}?
14
+ end
15
+ }
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -8,6 +8,23 @@ module Nokogiri
8
8
  @parser = XML::SAX::Parser.new(Doc.new)
9
9
  end
10
10
 
11
+ def test_bad_document_calls_error_handler
12
+ @parser.parse('<foo><bar></foo>')
13
+ assert @parser.document.errors
14
+ assert @parser.document.errors.length > 0
15
+ end
16
+
17
+ def test_errors_set_after_parsing_bad_dom
18
+ doc = Nokogiri::XML('<foo><bar></foo>')
19
+ assert doc.errors
20
+
21
+ @parser.parse('<foo><bar></foo>')
22
+ assert @parser.document.errors
23
+ assert @parser.document.errors.length > 0
24
+
25
+ assert_equal doc.errors.length, @parser.document.errors.length
26
+ end
27
+
11
28
  def test_parse
12
29
  File.open(XML_FILE, 'rb') { |f|
13
30
  @parser.parse(f)
@@ -0,0 +1,67 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module XML
5
+ module SAX
6
+ class TestPushParser < Nokogiri::SAX::TestCase
7
+ def setup
8
+ @parser = XML::SAX::PushParser.new(Doc.new)
9
+ end
10
+
11
+ def test_end_document_called
12
+ @parser.<<(<<-eoxml)
13
+ <p id="asdfasdf">
14
+ <!-- This is a comment -->
15
+ Paragraph 1
16
+ </p>
17
+ eoxml
18
+ assert ! @parser.document.end_document_called
19
+ @parser.finish
20
+ assert @parser.document.end_document_called
21
+ end
22
+
23
+ def test_start_element
24
+ @parser.<<(<<-eoxml)
25
+ <p id="asdfasdf">
26
+ eoxml
27
+
28
+ assert_equal [["p", ["id", "asdfasdf"]]],
29
+ @parser.document.start_elements
30
+
31
+ @parser.<<(<<-eoxml)
32
+ <!-- This is a comment -->
33
+ Paragraph 1
34
+ </p>
35
+ eoxml
36
+ assert_equal [' This is a comment '], @parser.document.comments
37
+ @parser.finish
38
+ end
39
+
40
+ def test_chevron_partial_xml
41
+ @parser.<<(<<-eoxml)
42
+ <p id="asdfasdf">
43
+ eoxml
44
+
45
+ @parser.<<(<<-eoxml)
46
+ <!-- This is a comment -->
47
+ Paragraph 1
48
+ </p>
49
+ eoxml
50
+ assert_equal [' This is a comment '], @parser.document.comments
51
+ @parser.finish
52
+ end
53
+
54
+ def test_chevron
55
+ @parser.<<(<<-eoxml)
56
+ <p id="asdfasdf">
57
+ <!-- This is a comment -->
58
+ Paragraph 1
59
+ </p>
60
+ eoxml
61
+ @parser.finish
62
+ assert_equal [' This is a comment '], @parser.document.comments
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end