nokogiri 1.5.6.rc3-java → 1.5.7-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (50) hide show
  1. data/CHANGELOG.ja.rdoc +87 -26
  2. data/CHANGELOG.rdoc +94 -32
  3. data/Manifest.txt +1 -0
  4. data/Rakefile +28 -15
  5. data/build_all +13 -5
  6. data/ext/java/nokogiri/NokogiriService.java +8 -1
  7. data/ext/java/nokogiri/XmlDocument.java +4 -4
  8. data/ext/java/nokogiri/XmlDtd.java +13 -2
  9. data/ext/java/nokogiri/XmlElement.java +3 -12
  10. data/ext/java/nokogiri/XmlEntityReference.java +11 -31
  11. data/ext/java/nokogiri/XmlNode.java +76 -32
  12. data/ext/java/nokogiri/XmlReader.java +257 -181
  13. data/ext/java/nokogiri/XmlSaxPushParser.java +17 -2
  14. data/ext/java/nokogiri/internals/NokogiriHelpers.java +23 -16
  15. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +18 -1
  16. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +9 -0
  17. data/ext/java/nokogiri/internals/ReaderNode.java +37 -37
  18. data/ext/java/nokogiri/internals/SaveContextVisitor.java +23 -16
  19. data/ext/java/nokogiri/internals/UncloseableInputStream.java +102 -0
  20. data/ext/java/nokogiri/internals/XmlDomParserContext.java +7 -4
  21. data/ext/nokogiri/extconf.rb +1 -0
  22. data/ext/nokogiri/nokogiri.h +4 -0
  23. data/ext/nokogiri/xml_node.c +33 -1
  24. data/ext/nokogiri/xml_reader.c +0 -3
  25. data/ext/nokogiri/xml_sax_parser.c +4 -1
  26. data/lib/nekodtd.jar +0 -0
  27. data/lib/nokogiri.rb +1 -0
  28. data/lib/nokogiri/css/xpath_visitor.rb +1 -1
  29. data/lib/nokogiri/nokogiri.jar +0 -0
  30. data/lib/nokogiri/version.rb +4 -1
  31. data/lib/nokogiri/xml/builder.rb +12 -2
  32. data/lib/nokogiri/xml/document.rb +3 -1
  33. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  34. data/tasks/cross_compile.rb +15 -15
  35. data/test/css/test_parser.rb +9 -9
  36. data/test/css/test_xpath_visitor.rb +1 -1
  37. data/test/helper.rb +1 -0
  38. data/test/html/sax/test_parser.rb +5 -2
  39. data/test/html/test_document_fragment.rb +4 -2
  40. data/test/namespaces/test_namespaces_in_builder_doc.rb +60 -0
  41. data/test/namespaces/test_namespaces_in_created_doc.rb +62 -0
  42. data/test/namespaces/test_namespaces_in_parsed_doc.rb +60 -0
  43. data/test/test_reader.rb +38 -4
  44. data/test/xml/sax/test_parser.rb +10 -1
  45. data/test/xml/test_builder.rb +40 -1
  46. data/test/xml/test_document.rb +50 -2
  47. data/test/xml/test_entity_reference.rb +2 -4
  48. data/test/xml/test_node.rb +30 -1
  49. data/test_all +2 -2
  50. metadata +142 -232
@@ -45,7 +45,7 @@ module Nokogiri
45
45
  end
46
46
 
47
47
  def test_class_selectors
48
- assert_xpath "//*[contains(concat(' ', @class, ' '), ' red ')]",
48
+ assert_xpath "//*[contains(concat(' ', normalize-space(@class), ' '), ' red ')]",
49
49
  @parser.parse(".red")
50
50
  end
51
51
 
data/test/helper.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  #Process.setrlimit(Process::RLIMIT_CORE, Process::RLIM_INFINITY) unless RUBY_PLATFORM =~ /(java|mswin|mingw)/i
2
2
  $VERBOSE = true
3
3
  require 'minitest/autorun'
4
+ require 'minitest/pride'
4
5
  require 'fileutils'
5
6
  require 'tempfile'
6
7
  require 'pp'
@@ -17,8 +17,7 @@ module Nokogiri
17
17
 
18
18
  def test_parse_empty_file
19
19
  # Make sure empty files don't break stuff
20
- empty_file_name = File.join(Dir.tmpdir, 'bogus.xml')
21
- FileUtils.touch empty_file_name
20
+ empty_file_name = File.join(ASSETS_DIR, 'bogus.xml')
22
21
  # assert_nothing_raised do
23
22
  @parser.parse_file empty_file_name
24
23
  # end
@@ -132,6 +131,10 @@ module Nokogiri
132
131
  ]]
133
132
  ], @parser.document.start_elements
134
133
  end
134
+
135
+ def test_empty_processing_instruction
136
+ @parser.parse_memory("<strong>this will segfault<?strong>")
137
+ end
135
138
  end
136
139
  end
137
140
  end
@@ -26,7 +26,7 @@ module Nokogiri
26
26
 
27
27
  def test_colons_are_not_removed
28
28
  doc = Nokogiri::HTML::DocumentFragment.parse("<span>3:30pm</span>")
29
- assert_match /3:30/, doc.to_s
29
+ assert_match(/3:30/, doc.to_s)
30
30
  end
31
31
 
32
32
  def test_parse_encoding
@@ -195,9 +195,11 @@ module Nokogiri
195
195
  def test_to_xhtml
196
196
  doc = "<span>foo<br></span><span>bar</span>"
197
197
  fragment = Nokogiri::HTML::Document.new.fragment(doc)
198
- if !Nokogiri.jruby? && Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
198
+ if Nokogiri.jruby? || Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
199
199
  assert_equal "<span>foo<br /></span><span>bar</span>", fragment.to_xhtml
200
200
  else
201
+ # FIXME: why are we doing this ? this violates the spec,
202
+ # see http://www.w3.org/TR/xhtml1/#C_2
201
203
  assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_xhtml
202
204
  end
203
205
  end
@@ -0,0 +1,60 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class TestNamespacesInBuilderDoc < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ b = Nokogiri::XML::Builder.new do |x|
9
+ x.fruit(:xmlns => 'ns:fruit', :'xmlns:veg' => 'ns:veg', :'xmlns:xlink' => 'http://www.w3.org/1999/xlink') do
10
+ x.pear { x.bosc }
11
+ x.orange
12
+ x[:veg].carrot do
13
+ x.cheese(:xmlns => 'ns:dairy', :'xlink:href' => 'http://example.com/cheese/')
14
+ end
15
+ x[:meat].bacon(:'xmlns:meat' => 'ns:meat') do
16
+ x.apple :count => 2
17
+ x[:veg].tomato
18
+ end
19
+ end
20
+ end
21
+
22
+ @doc = b.doc
23
+ end
24
+
25
+ def check_namespace e
26
+ e.namespace.nil? ? nil : e.namespace.href
27
+ end
28
+
29
+ def test_builder_default_ns
30
+ assert_equal 'ns:fruit', check_namespace(@doc.root)
31
+ end
32
+ def test_builder_parent_default_ns
33
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[0])
34
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[1])
35
+ end
36
+ def test_builder_grandparent_default_ns
37
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[0].elements[0])
38
+ end
39
+ def test_builder_parent_nondefault_ns
40
+ assert_equal 'ns:veg', check_namespace(@doc.root.elements[2])
41
+ end
42
+ def test_builder_single_decl_ns_1
43
+ assert_equal 'ns:dairy', check_namespace(@doc.root.elements[2].elements[0])
44
+ end
45
+ def test_builder_nondefault_attr_ns
46
+ assert_equal 'http://www.w3.org/1999/xlink',
47
+ check_namespace(@doc.root.elements[2].elements[0].attribute_nodes.find { |a| a.name =~ /href/ })
48
+ end
49
+ def test_builder_single_decl_ns_2
50
+ assert_equal 'ns:meat', check_namespace(@doc.root.elements[3])
51
+ end
52
+ def test_builder_buried_default_ns
53
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[3].elements[0])
54
+ end
55
+ def test_builder_buried_decl_ns
56
+ assert_equal 'ns:veg', check_namespace(@doc.root.elements[3].elements[1])
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,62 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class TestNamespacesInCreatedDoc < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @doc = Nokogiri::XML('<fruit xmlns="ns:fruit" xmlns:veg="ns:veg" xmlns:xlink="http://www.w3.org/1999/xlink"/>')
9
+ pear = @doc.create_element('pear')
10
+ bosc = @doc.create_element('bosc')
11
+ pear.add_child(bosc)
12
+ @doc.root << pear
13
+ @doc.root.add_child('<orange/>')
14
+ carrot = @doc.create_element('veg:carrot')
15
+ @doc.root << carrot
16
+ cheese = @doc.create_element('cheese', :xmlns => 'ns:dairy', :'xlink:href' => 'http://example.com/cheese/')
17
+ carrot << cheese
18
+ bacon = @doc.create_element('meat:bacon', :'xmlns:meat' => 'ns:meat')
19
+ apple = @doc.create_element('apple')
20
+ apple['count'] = 2
21
+ bacon << apple
22
+ tomato = @doc.create_element('veg:tomato')
23
+ bacon << tomato
24
+ @doc.root << bacon
25
+ end
26
+
27
+ def check_namespace e
28
+ e.namespace.nil? ? nil : e.namespace.href
29
+ end
30
+
31
+ def test_created_default_ns
32
+ assert_equal 'ns:fruit', check_namespace(@doc.root)
33
+ end
34
+ def test_created_parent_default_ns
35
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[0])
36
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[1])
37
+ end
38
+ def test_created_grandparent_default_ns
39
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[0].elements[0])
40
+ end
41
+ def test_created_parent_nondefault_ns
42
+ assert_equal 'ns:veg', check_namespace(@doc.root.elements[2])
43
+ end
44
+ def test_created_single_decl_ns_1
45
+ assert_equal 'ns:dairy', check_namespace(@doc.root.elements[2].elements[0])
46
+ end
47
+ def test_created_nondefault_attr_ns
48
+ assert_equal 'http://www.w3.org/1999/xlink',
49
+ check_namespace(@doc.root.elements[2].elements[0].attribute_nodes.find { |a| a.name =~ /href/ })
50
+ end
51
+ def test_created_single_decl_ns_2
52
+ assert_equal 'ns:meat', check_namespace(@doc.root.elements[3])
53
+ end
54
+ def test_created_buried_default_ns
55
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[3].elements[0])
56
+ end
57
+ def test_created_buried_decl_ns
58
+ assert_equal 'ns:veg', check_namespace(@doc.root.elements[3].elements[1])
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,60 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class TestNamespacesInParsedDoc < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @doc = Nokogiri::XML <<-eoxml
9
+ <fruit xmlns="ns:fruit" xmlns:veg="ns:veg" xmlns:xlink="http://www.w3.org/1999/xlink">
10
+ <pear>
11
+ <bosc/>
12
+ </pear>
13
+ <orange/>
14
+ <veg:carrot>
15
+ <cheese xmlns="ns:dairy" xlink:href="http://example.com/cheese/"/>
16
+ </veg:carrot>
17
+ <meat:bacon xmlns:meat="ns:meat">
18
+ <apple count="2"/>
19
+ <veg:tomato/>
20
+ </meat:bacon>
21
+ </fruit>
22
+ eoxml
23
+ end
24
+
25
+ def check_namespace e
26
+ e.namespace.nil? ? nil : e.namespace.href
27
+ end
28
+
29
+ def test_parsed_default_ns
30
+ assert_equal 'ns:fruit', check_namespace(@doc.root)
31
+ end
32
+ def test_parsed_parent_default_ns
33
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[0])
34
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[1])
35
+ end
36
+ def test_parsed_grandparent_default_ns
37
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[0].elements[0])
38
+ end
39
+ def test_parsed_parent_nondefault_ns
40
+ assert_equal 'ns:veg', check_namespace(@doc.root.elements[2])
41
+ end
42
+ def test_parsed_single_decl_ns_1
43
+ assert_equal 'ns:dairy', check_namespace(@doc.root.elements[2].elements[0])
44
+ end
45
+ def test_parsed_nondefault_attr_ns
46
+ assert_equal 'http://www.w3.org/1999/xlink',
47
+ check_namespace(@doc.root.elements[2].elements[0].attribute_nodes.find { |a| a.name =~ /href/ })
48
+ end
49
+ def test_parsed_single_decl_ns_2
50
+ assert_equal 'ns:meat', check_namespace(@doc.root.elements[3])
51
+ end
52
+ def test_parsed_buried_default_ns
53
+ assert_equal 'ns:fruit', check_namespace(@doc.root.elements[3].elements[0])
54
+ end
55
+ def test_parsed_buried_decl_ns
56
+ assert_equal 'ns:veg', check_namespace(@doc.root.elements[3].elements[1])
57
+ end
58
+ end
59
+ end
60
+ end
data/test/test_reader.rb CHANGED
@@ -34,6 +34,29 @@ class TestReader < Nokogiri::TestCase
34
34
  assert_equal [false, false, nil, nil, true, nil], results
35
35
  end
36
36
 
37
+ # Issue #831
38
+ # Make sure that the reader doesn't block reading the entire input
39
+ def test_reader_blocking
40
+ rd, wr = IO.pipe()
41
+ node_out = nil
42
+ t = Thread.start do
43
+ reader = Nokogiri::XML::Reader(rd, 'UTF-8')
44
+ reader.each do |node|
45
+ node_out = node
46
+ break
47
+ end
48
+ end
49
+ sleep(1) # sleep for one second to make sure the reader will actually block for input
50
+ wr.puts "<foo>"
51
+ wr.puts "<bar/>" * 10000
52
+ wr.flush
53
+ res = t.join(5) # wait 5 seconds for the thread to finish
54
+ wr.close
55
+ rd.close
56
+ refute_nil node_out, "Didn't read any nodes, exclude the trivial case"
57
+ refute_nil res, "Reader blocks trying to read the entire stream"
58
+ end
59
+
37
60
  def test_reader_takes_block
38
61
  options = nil
39
62
  Nokogiri::XML::Reader(File.read(XML_FILE), XML_FILE) do |cfg|
@@ -319,12 +342,23 @@ class TestReader < Nokogiri::TestCase
319
342
  end
320
343
 
321
344
  def test_outer_xml
322
- str = "<x><y>hello</y></x>"
323
- reader = Nokogiri::XML::Reader.from_memory(str)
345
+ str = ["<x><y>hello</y></x>", "<y>hello</y>", "hello", "<y/>", "<x/>"]
346
+ reader = Nokogiri::XML::Reader.from_memory(str.first)
324
347
 
325
- reader.read
348
+ xml = []
349
+ reader.map { |node| xml << node.outer_xml }
350
+
351
+ assert_equal str, xml
352
+ end
353
+
354
+ def test_outer_xml_with_empty_nodes
355
+ str = ["<x><y/></x>", "<y/>", "<x/>"]
356
+ reader = Nokogiri::XML::Reader.from_memory(str.first)
357
+
358
+ xml = []
359
+ reader.map { |node| xml << node.outer_xml }
326
360
 
327
- assert_equal str, reader.outer_xml
361
+ assert_equal str, xml
328
362
  end
329
363
 
330
364
  def test_state
@@ -194,8 +194,17 @@ module Nokogiri
194
194
  end
195
195
 
196
196
  def test_parse_io
197
+ call_parse_io_with_encoding 'UTF-8'
198
+ end
199
+
200
+ # issue #828
201
+ def test_parse_io_lower_case_encoding
202
+ call_parse_io_with_encoding 'utf-8'
203
+ end
204
+
205
+ def call_parse_io_with_encoding encoding
197
206
  File.open(XML_FILE, 'rb') { |f|
198
- @parser.parse_io(f, 'UTF-8')
207
+ @parser.parse_io(f, encoding)
199
208
  }
200
209
  assert(@parser.document.cdata_blocks.length > 0)
201
210
  if RUBY_VERSION =~ /^1\.9/
@@ -1,3 +1,5 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  require "helper"
2
4
 
3
5
  module Nokogiri
@@ -11,6 +13,12 @@ module Nokogiri
11
13
  assert_equal 'world', doc.root['abcDef']
12
14
  end
13
15
 
16
+ def test_builder_with_utf8_text
17
+ text = "test ﺵ "
18
+ doc = Nokogiri::XML::Builder.new(:encoding => "UTF-8") { |xml| xml.test text }.doc
19
+ assert_equal text, doc.content
20
+ end
21
+
14
22
  def test_builder_escape
15
23
  xml = Nokogiri::XML::Builder.new { |x|
16
24
  x.condition "value < 1", :attr => "value < 1"
@@ -103,6 +111,21 @@ module Nokogiri
103
111
  assert_equal 'bar', doc.at('foo|baz', 'foo' => 'bar').namespace.href
104
112
  end
105
113
 
114
+ def test_dtd_in_builder_output
115
+ builder = Nokogiri::XML::Builder.new do |xml|
116
+ xml.doc.create_internal_subset(
117
+ 'html',
118
+ "-//W3C//DTD HTML 4.01 Transitional//EN",
119
+ "http://www.w3.org/TR/html4/loose.dtd"
120
+ )
121
+ xml.root do
122
+ xml.foo
123
+ end
124
+ end
125
+ assert_match(/<!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD HTML 4.01 Transitional\/\/EN" "http:\/\/www.w3.org\/TR\/html4\/loose.dtd">/,
126
+ builder.to_xml)
127
+ end
128
+
106
129
  def test_specify_namespace_nested
107
130
  b = Nokogiri::XML::Builder.new { |xml|
108
131
  xml.root('xmlns:foo' => 'bar') do
@@ -120,11 +143,27 @@ module Nokogiri
120
143
  assert_equal 'bar', doc.at('foo|baz', 'foo' => 'bar').namespace.href
121
144
  end
122
145
 
146
+ def test_specified_namespace_postdeclared
147
+ doc = Nokogiri::XML::Builder.new { |xml|
148
+ xml.a do
149
+ xml[:foo].b("xmlns:foo" => "bar")
150
+ end
151
+ }.doc
152
+ a = doc.at('a')
153
+ assert_equal({}, a.namespaces)
154
+
155
+ b = doc.at_xpath('//foo:b', {:foo=>'bar'})
156
+ assert b
157
+ assert_equal({"xmlns:foo"=>"bar"}, b.namespaces)
158
+ assert_equal("b", b.name)
159
+ assert_equal("bar", b.namespace.href)
160
+ end
161
+
123
162
  def test_specified_namespace_undeclared
124
163
  Nokogiri::XML::Builder.new { |xml|
125
164
  xml.root do
126
165
  assert_raises(ArgumentError) do
127
- xml[:foo]
166
+ xml[:foo].bar
128
167
  end
129
168
  end
130
169
  }
@@ -16,6 +16,40 @@ module Nokogiri
16
16
  @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
17
17
  end
18
18
 
19
+ def test_dtd_with_empty_internal_subset
20
+ doc = Nokogiri::XML <<-eoxml
21
+ <?xml version="1.0"?>
22
+ <!DOCTYPE people >
23
+ <people>
24
+ </people>
25
+ eoxml
26
+ assert doc.root
27
+ end
28
+
29
+ # issue #838
30
+ def test_document_with_invalid_prolog
31
+ doc = Nokogiri::XML '<? ?>'
32
+ assert_empty doc.content
33
+ end
34
+
35
+ # issue #837
36
+ def test_document_with_refentity
37
+ doc = Nokogiri::XML '&amp;'
38
+ assert_equal '', doc.content
39
+ end
40
+
41
+ # issue #835
42
+ def test_manually_adding_reference_entities
43
+ d = Nokogiri::XML::Document.new
44
+ root = Nokogiri::XML::Element.new('bar', d)
45
+ txt = Nokogiri::XML::Text.new('foo', d)
46
+ ent = Nokogiri::XML::EntityReference.new(d, '#8217')
47
+ root << txt
48
+ root << ent
49
+ d << root
50
+ assert_match d.to_html, /&#8217;/
51
+ end
52
+
19
53
  def test_document_with_initial_space
20
54
  doc = Nokogiri::XML(" <?xml version='1.0' encoding='utf-8' ?><first \>")
21
55
  assert_equal 2, doc.children.size
@@ -29,11 +63,13 @@ module Nokogiri
29
63
  def test_ignore_unknown_namespace
30
64
  doc = Nokogiri::XML(<<-eoxml)
31
65
  <xml>
32
- <unknown:foo xmlns='hello' />
66
+ <unknown:foo xmlns='http://hello.com/' />
33
67
  <bar />
34
68
  </xml>
35
69
  eoxml
36
- refute doc.xpath('//foo').first.namespace # assert that the namespace is nil
70
+ if Nokogiri.jruby?
71
+ refute doc.xpath('//foo').first.namespace # assert that the namespace is nil
72
+ end
37
73
  refute_empty doc.xpath('//bar'), "bar wasn't found in the document" # bar should be part of the doc
38
74
  end
39
75
 
@@ -699,6 +735,18 @@ module Nokogiri
699
735
  assert_match %r{foo attr}, doc.to_xml
700
736
  end
701
737
 
738
+ # issue #785
739
+ def test_attribute_decoration
740
+ decorator = Module.new do
741
+ def test_method
742
+ end
743
+ end
744
+
745
+ util_decorate(@xml, decorator)
746
+
747
+ assert @xml.search('//@street').first.respond_to?(:test_method)
748
+ end
749
+
702
750
  def test_subset_is_decorated
703
751
  x = Module.new do
704
752
  def awesome!