nokogiri 1.5.5.rc3-java → 1.5.6-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (67) hide show
  1. data/CHANGELOG.ja.rdoc +42 -1
  2. data/CHANGELOG.rdoc +41 -1
  3. data/Manifest.txt +8 -1
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +5 -8
  6. data/ROADMAP.md +6 -2
  7. data/Rakefile +29 -7
  8. data/bin/nokogiri +19 -4
  9. data/build_all +56 -17
  10. data/ext/java/nokogiri/HtmlDocument.java +26 -0
  11. data/ext/java/nokogiri/NokogiriService.java +7 -1
  12. data/ext/java/nokogiri/XmlDocument.java +24 -6
  13. data/ext/java/nokogiri/XmlDocumentFragment.java +2 -26
  14. data/ext/java/nokogiri/XmlDtd.java +13 -2
  15. data/ext/java/nokogiri/XmlElement.java +3 -12
  16. data/ext/java/nokogiri/XmlEntityReference.java +32 -8
  17. data/ext/java/nokogiri/XmlNamespace.java +2 -1
  18. data/ext/java/nokogiri/XmlNode.java +83 -31
  19. data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
  20. data/ext/java/nokogiri/XmlText.java +2 -14
  21. data/ext/java/nokogiri/XsltStylesheet.java +4 -2
  22. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  23. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
  24. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  25. data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
  26. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
  27. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
  28. data/ext/java/nokogiri/internals/NokogiriHandler.java +22 -14
  29. data/ext/java/nokogiri/internals/NokogiriHelpers.java +25 -7
  30. data/ext/java/nokogiri/internals/ParserContext.java +2 -1
  31. data/ext/java/nokogiri/internals/ReaderNode.java +2 -1
  32. data/ext/java/nokogiri/internals/SaveContextVisitor.java +100 -102
  33. data/ext/java/nokogiri/internals/XmlDomParserContext.java +10 -4
  34. data/ext/nokogiri/extconf.rb +1 -0
  35. data/ext/nokogiri/xml_document.c +2 -2
  36. data/ext/nokogiri/xml_node.c +31 -14
  37. data/ext/nokogiri/xml_sax_parser.c +16 -0
  38. data/ext/nokogiri/xslt_stylesheet.c +19 -2
  39. data/lib/nekodtd.jar +0 -0
  40. data/lib/nokogiri/nokogiri.jar +0 -0
  41. data/lib/nokogiri/version.rb +4 -1
  42. data/lib/nokogiri/xml/document.rb +8 -6
  43. data/lib/nokogiri/xml/document_fragment.rb +10 -1
  44. data/lib/nokogiri/xml/node.rb +58 -61
  45. data/lib/nokogiri/xml/sax/document.rb +7 -0
  46. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  47. data/lib/nokogiri/xml/xpath_context.rb +1 -1
  48. data/lib/nokogiri/xslt.rb +1 -1
  49. data/tasks/cross_compile.rb +5 -8
  50. data/test/files/test_document_url/bar.xml +2 -0
  51. data/test/files/test_document_url/document.dtd +4 -0
  52. data/test/files/test_document_url/document.xml +6 -0
  53. data/test/helper.rb +6 -0
  54. data/test/html/test_document.rb +23 -0
  55. data/test/html/test_document_fragment.rb +5 -0
  56. data/test/test_xslt_transforms.rb +30 -0
  57. data/test/xml/sax/test_parser.rb +20 -1
  58. data/test/xml/test_builder.rb +42 -0
  59. data/test/xml/test_document.rb +64 -9
  60. data/test/xml/test_document_fragment.rb +7 -0
  61. data/test/xml/test_entity_reference.rb +12 -0
  62. data/test/xml/test_namespace.rb +20 -0
  63. data/test/xml/test_node.rb +79 -0
  64. data/test/xml/test_node_attributes.rb +29 -0
  65. data/test/xml/test_unparented_node.rb +9 -0
  66. data/test_all +11 -14
  67. metadata +744 -560
@@ -158,6 +158,13 @@ module Nokogiri
158
158
  # +string+ contains the cdata content
159
159
  def cdata_block string
160
160
  end
161
+
162
+ ###
163
+ # Called when processing instructions are found
164
+ # +name+ is the target of the instruction
165
+ # +content+ is the value of the instruction
166
+ def processing_instruction name, content
167
+ end
161
168
  end
162
169
  end
163
170
  end
@@ -68,6 +68,7 @@ module Nokogiri
68
68
 
69
69
  # Create a new Parser with +doc+ and +encoding+
70
70
  def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
71
+ check_encoding(encoding)
71
72
  @encoding = encoding
72
73
  @document = doc
73
74
  @warned = false
@@ -87,6 +88,7 @@ module Nokogiri
87
88
  ###
88
89
  # Parse given +io+
89
90
  def parse_io io, encoding = 'ASCII'
91
+ check_encoding(encoding)
90
92
  @encoding = encoding
91
93
  ctx = ParserContext.io(io, ENCODINGS[encoding])
92
94
  yield ctx if block_given?
@@ -109,6 +111,11 @@ module Nokogiri
109
111
  yield ctx if block_given?
110
112
  ctx.parse_with self
111
113
  end
114
+
115
+ private
116
+ def check_encoding(encoding)
117
+ raise ArgumentError.new("'#{encoding}' is not a valid encoding") unless ENCODINGS[encoding]
118
+ end
112
119
  end
113
120
  end
114
121
  end
@@ -6,7 +6,7 @@ module Nokogiri
6
6
  # Register namespaces in +namespaces+
7
7
  def register_namespaces(namespaces)
8
8
  namespaces.each do |k, v|
9
- k = k.gsub(/.*:/,'') # strip off 'xmlns:' or 'xml:'
9
+ k = k.to_s.gsub(/.*:/,'') # strip off 'xmlns:' or 'xml:'
10
10
  register_ns(k, v)
11
11
  end
12
12
  end
@@ -24,7 +24,7 @@ module Nokogiri
24
24
  def parse string, modules = {}
25
25
  modules.each do |url, klass|
26
26
  XSLT.register url, klass
27
- end
27
+ end
28
28
 
29
29
  if Nokogiri.jruby?
30
30
  Stylesheet.parse_stylesheet_doc(XML.parse(string), string)
@@ -4,10 +4,10 @@ HOST = Rake::ExtensionCompiler.mingw_host
4
4
 
5
5
  require 'mini_portile'
6
6
  $recipes = {}
7
- $recipes[:zlib] = MiniPortile.new "zlib", "1.2.5"
7
+ $recipes[:zlib] = MiniPortile.new "zlib", "1.2.7"
8
8
  $recipes[:libiconv] = MiniPortile.new "libiconv", "1.13.1"
9
- $recipes[:libxml2] = MiniPortile.new "libxml2", "2.7.7"
10
- $recipes[:libxslt] = MiniPortile.new "libxslt", "1.1.26"
9
+ $recipes[:libxml2] = MiniPortile.new "libxml2", "2.7.7"
10
+ $recipes[:libxslt] = MiniPortile.new "libxslt", "1.1.26"
11
11
  $recipes.each { |_, recipe| recipe.host = HOST }
12
12
 
13
13
  file "lib/nokogiri/nokogiri.rb" do
@@ -144,10 +144,7 @@ namespace :cross do
144
144
  end
145
145
  end
146
146
 
147
- HOE.clean_globs += [
148
- "#{CROSS_DIR}/*.installed",
149
- "#{CROSS_DIR}/#{HOST}",
150
- "tmp/#{HOST}",
151
- ]
147
+ require 'rake/clean'
148
+ CLOBBER.include("#{CROSS_DIR}/*.installed", "#{CROSS_DIR}/#{HOST}", "tmp/#{HOST}")
152
149
 
153
150
  task :cross => ["cross:libxslt", "lib/nokogiri/nokogiri.rb", "cross:file_list"]
@@ -0,0 +1,2 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <blah>foobar</blah>
@@ -0,0 +1,4 @@
1
+ <!ENTITY bar SYSTEM "bar.xml">
2
+ <!ELEMENT document (body)>
3
+ <!ELEMENT blah ANY>
4
+ <!ELEMENT body ANY>
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!DOCTYPE document SYSTEM "document.dtd">
3
+
4
+ <document>
5
+ <body>&bar;</body>
6
+ </document>
@@ -78,6 +78,7 @@ module Nokogiri
78
78
  attr_reader :data, :comments, :cdata_blocks, :start_elements_namespace
79
79
  attr_reader :errors, :warnings, :end_elements_namespace
80
80
  attr_reader :xmldecls
81
+ attr_reader :processing_instructions
81
82
 
82
83
  def xmldecl version, encoding, standalone
83
84
  @xmldecls = [version, encoding, standalone].compact
@@ -141,6 +142,11 @@ module Nokogiri
141
142
  @cdata_blocks += [string]
142
143
  super
143
144
  end
145
+
146
+ def processing_instruction name, content
147
+ @processing_instructions ||= []
148
+ @processing_instructions << [name, content]
149
+ end
144
150
  end
145
151
  end
146
152
  end
@@ -369,6 +369,29 @@ eohtml
369
369
  assert_equal('Hello world!', node.inner_text.strip)
370
370
  end
371
371
 
372
+ def test_doc_type
373
+ html = Nokogiri::HTML(<<-eohtml)
374
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
375
+ <html xmlns="http://www.w3.org/1999/xhtml">
376
+ <body>
377
+ <p>Rainbow Dash</p>
378
+ </body>
379
+ </html>
380
+ eohtml
381
+ assert_equal "html", html.internal_subset.name
382
+ assert_equal "-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id
383
+ assert_equal "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id
384
+ assert_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">", html.to_s[0,97]
385
+ end
386
+
387
+ def test_content_size
388
+ html = Nokogiri::HTML('<div>
389
+ </div>')
390
+ assert_equal 1, html.content.size
391
+ assert_equal 1, html.content.split("").size
392
+ assert_equal "\n", html.content
393
+ end
394
+
372
395
  def test_find_by_xpath
373
396
  found = @html.xpath('//div/a')
374
397
  assert_equal 3, found.length
@@ -24,6 +24,11 @@ module Nokogiri
24
24
  end
25
25
  end
26
26
 
27
+ def test_colons_are_not_removed
28
+ doc = Nokogiri::HTML::DocumentFragment.parse("<span>3:30pm</span>")
29
+ assert_match(/3:30/, doc.to_s)
30
+ end
31
+
27
32
  def test_parse_encoding
28
33
  fragment = "<div>hello world</div>"
29
34
  f = Nokogiri::HTML::DocumentFragment.parse fragment, 'ISO-8859-1'
@@ -189,8 +189,37 @@ encoding="iso-8859-1" indent="yes"/>
189
189
  Nokogiri::XSLT.quote_params(params.to_a.flatten)))
190
190
  check_params result_doc, params
191
191
  end
192
+
193
+ def test_xslt_paramaters
194
+ xslt_str = <<-EOX
195
+ <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" >
196
+ <xsl:template match="/">
197
+ <xsl:value-of select="$foo" />
198
+ </xsl:template>
199
+ </xsl:stylesheet>
200
+ EOX
201
+
202
+ xslt = Nokogiri::XSLT(xslt_str)
203
+ doc = Nokogiri::XML("<root />")
204
+ assert_match %r{bar}, xslt.transform(doc, Nokogiri::XSLT.quote_params('foo' => 'bar')).to_s
205
+ end
206
+
207
+ def test_xslt_transform_error
208
+ xslt_str = <<-EOX
209
+ <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" >
210
+ <xsl:template match="/">
211
+ <xsl:value-of select="$foo" />
212
+ </xsl:template>
213
+ </xsl:stylesheet>
214
+ EOX
215
+
216
+ xslt = Nokogiri::XSLT(xslt_str)
217
+ doc = Nokogiri::XML("<root />")
218
+ assert_raises(RuntimeError) { xslt.transform(doc) }
219
+ end
192
220
  end
193
221
 
222
+
194
223
  def test_xslt_parse_error
195
224
  xslt_str = <<-EOX
196
225
  <xsl:stylesheet version="1.0"
@@ -210,6 +239,7 @@ encoding="iso-8859-1" indent="yes"/>
210
239
  assert_raises(RuntimeError) { Nokogiri::XSLT.parse(xslt_str) }
211
240
  end
212
241
 
242
+
213
243
  def test_passing_a_non_document_to_transform
214
244
  xsl = Nokogiri::XSLT('<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"></xsl:stylesheet>')
215
245
  assert_raises(ArgumentError) { xsl.transform("<div></div>") }
@@ -173,7 +173,12 @@ module Nokogiri
173
173
  end
174
174
  end
175
175
 
176
- assert_equal doc.errors.length, @parser.document.errors.length
176
+ # when using JRuby Nokogiri, more errors will be generated as the DOM
177
+ # parser continue to parse an ill formed document, while the sax parser
178
+ # will stop at the first error
179
+ unless Nokogiri.jruby?
180
+ assert_equal doc.errors.length, @parser.document.errors.length
181
+ end
177
182
  end
178
183
 
179
184
  def test_parse_with_memory_argument
@@ -250,6 +255,11 @@ module Nokogiri
250
255
  assert_raises(ArgumentError) { @parser.parse_memory(nil) }
251
256
  end
252
257
 
258
+ def test_bad_encoding_args
259
+ assert_raises(ArgumentError) { XML::SAX::Parser.new(Doc.new, 'not an encoding') }
260
+ assert_raises(ArgumentError) { @parser.parse_io(StringIO.new('<root/>'), 'not an encoding')}
261
+ end
262
+
253
263
  def test_ctag
254
264
  @parser.parse_memory(<<-eoxml)
255
265
  <p id="asdfasdf">
@@ -308,6 +318,15 @@ module Nokogiri
308
318
  @parser.document.start_elements
309
319
  end
310
320
 
321
+ def test_processing_instruction
322
+ @parser.parse_memory(<<-eoxml)
323
+ <?xml-stylesheet href="a.xsl" type="text/xsl"?>
324
+ <?xml version="1.0"?>
325
+ eoxml
326
+ assert_equal [['xml-stylesheet', 'href="a.xsl" type="text/xsl"']],
327
+ @parser.document.processing_instructions
328
+ end
329
+
311
330
  if Nokogiri.uses_libxml? # JRuby SAXParser only parses well-formed XML documents
312
331
  def test_parse_document
313
332
  @parser.parse_memory(<<-eoxml)
@@ -1,3 +1,5 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  require "helper"
2
4
 
3
5
  module Nokogiri
@@ -11,6 +13,12 @@ module Nokogiri
11
13
  assert_equal 'world', doc.root['abcDef']
12
14
  end
13
15
 
16
+ def test_builder_with_utf8_text
17
+ text = "test ﺵ "
18
+ doc = Nokogiri::XML::Builder.new(:encoding => "UTF-8") { |xml| xml.test text }.doc
19
+ assert_equal text, doc.content
20
+ end
21
+
14
22
  def test_builder_escape
15
23
  xml = Nokogiri::XML::Builder.new { |x|
16
24
  x.condition "value < 1", :attr => "value < 1"
@@ -103,6 +111,21 @@ module Nokogiri
103
111
  assert_equal 'bar', doc.at('foo|baz', 'foo' => 'bar').namespace.href
104
112
  end
105
113
 
114
+ def test_dtd_in_builder_output
115
+ builder = Nokogiri::XML::Builder.new do |xml|
116
+ xml.doc.create_internal_subset(
117
+ 'html',
118
+ "-//W3C//DTD HTML 4.01 Transitional//EN",
119
+ "http://www.w3.org/TR/html4/loose.dtd"
120
+ )
121
+ xml.root do
122
+ xml.foo
123
+ end
124
+ end
125
+ assert_match(/<!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD HTML 4.01 Transitional\/\/EN" "http:\/\/www.w3.org\/TR\/html4\/loose.dtd">/,
126
+ builder.to_xml)
127
+ end
128
+
106
129
  def test_specify_namespace_nested
107
130
  b = Nokogiri::XML::Builder.new { |xml|
108
131
  xml.root('xmlns:foo' => 'bar') do
@@ -209,6 +232,25 @@ module Nokogiri
209
232
  assert_equal ["bbb","ccc"], builder.doc.at_css("aaa").children.collect(&:name)
210
233
  end
211
234
 
235
+ def test_raw_xml_append_with_namespaces
236
+ doc = Nokogiri::XML::Builder.new do |xml|
237
+ xml.root("xmlns:foo" => "x", "xmlns" => "y") do
238
+ xml << '<Element foo:bar="bazz"/>'
239
+ end
240
+ end.doc
241
+
242
+ el = doc.at 'Element'
243
+ assert_not_nil el
244
+
245
+ assert_equal 'y', el.namespace.href
246
+ assert_nil el.namespace.prefix
247
+
248
+ attr = el.attributes["bar"]
249
+ assert_not_nil attr
250
+ assert_not_nil attr.namespace
251
+ assert_equal "foo", attr.namespace.prefix
252
+ end
253
+
212
254
  def test_cdata
213
255
  builder = Nokogiri::XML::Builder.new do
214
256
  root {
@@ -16,11 +16,37 @@ module Nokogiri
16
16
  @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
17
17
  end
18
18
 
19
+ def test_dtd_with_empty_internal_subset
20
+ doc = Nokogiri::XML <<-eoxml
21
+ <?xml version="1.0"?>
22
+ <!DOCTYPE people >
23
+ <people>
24
+ </people>
25
+ eoxml
26
+ assert doc.root
27
+ end
28
+
29
+ def test_document_with_initial_space
30
+ doc = Nokogiri::XML(" <?xml version='1.0' encoding='utf-8' ?><first \>")
31
+ assert_equal 2, doc.children.size
32
+ end
33
+
19
34
  def test_root_set_to_nil
20
35
  @xml.root = nil
21
36
  assert_equal nil, @xml.root
22
37
  end
23
38
 
39
+ def test_ignore_unknown_namespace
40
+ doc = Nokogiri::XML(<<-eoxml)
41
+ <xml>
42
+ <unknown:foo xmlns='hello' />
43
+ <bar />
44
+ </xml>
45
+ eoxml
46
+ refute doc.xpath('//foo').first.namespace # assert that the namespace is nil
47
+ refute_empty doc.xpath('//bar'), "bar wasn't found in the document" # bar should be part of the doc
48
+ end
49
+
24
50
  def test_collect_namespaces
25
51
  doc = Nokogiri::XML(<<-eoxml)
26
52
  <xml>
@@ -683,6 +709,18 @@ module Nokogiri
683
709
  assert_match %r{foo attr}, doc.to_xml
684
710
  end
685
711
 
712
+ # issue #785
713
+ def test_attribute_decoration
714
+ decorator = Module.new do
715
+ def test_method
716
+ end
717
+ end
718
+
719
+ util_decorate(@xml, decorator)
720
+
721
+ assert @xml.search('//@street').first.respond_to?(:test_method)
722
+ end
723
+
686
724
  def test_subset_is_decorated
687
725
  x = Module.new do
688
726
  def awesome!
@@ -716,26 +754,43 @@ module Nokogiri
716
754
  assert @xml.children.respond_to?(:awesome!)
717
755
  end
718
756
 
719
- def test_java_integration
720
- if Nokogiri.jruby?
757
+ if Nokogiri.jruby?
758
+ def wrap_java_document
721
759
  require 'java'
722
760
  factory = javax.xml.parsers.DocumentBuilderFactory.newInstance
723
761
  builder = factory.newDocumentBuilder
724
762
  document = builder.newDocument
725
763
  root = document.createElement("foo")
726
764
  document.appendChild(root)
727
- noko_doc = Nokogiri::XML::Document.wrap(document)
728
- assert_equal 'foo', noko_doc.root.name
765
+ Nokogiri::XML::Document.wrap(document)
766
+ end
767
+ end
729
768
 
730
- noko_doc = Nokogiri::XML(<<eoxml)
769
+ def test_java_integration
770
+ skip("Ruby doesn't have the wrap method") unless Nokogiri.jruby?
771
+ noko_doc = wrap_java_document
772
+ assert_equal 'foo', noko_doc.root.name
773
+
774
+ noko_doc = Nokogiri::XML(<<eoxml)
731
775
  <foo xmlns='hello'>
732
776
  <bar xmlns:foo='world' />
733
777
  </foo>
734
778
  eoxml
735
- dom = noko_doc.to_java
736
- assert dom.kind_of? org.w3c.dom.Document
737
- assert_equal 'foo', dom.getDocumentElement().getTagName()
738
- end
779
+ dom = noko_doc.to_java
780
+ assert dom.kind_of? org.w3c.dom.Document
781
+ assert_equal 'foo', dom.getDocumentElement().getTagName()
782
+ end
783
+
784
+ def test_add_child
785
+ skip("Ruby doesn't have the wrap method") unless Nokogiri.jruby?
786
+ doc = wrap_java_document
787
+ doc.root.add_child "<bar />"
788
+ end
789
+
790
+ def test_can_be_closed
791
+ f = File.open XML_FILE
792
+ Nokogiri::XML f
793
+ f.close
739
794
  end
740
795
  end
741
796
  end
@@ -8,6 +8,13 @@ module Nokogiri
8
8
  @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
9
9
  end
10
10
 
11
+ def test_replace_text_node
12
+ html = "foo"
13
+ doc = Nokogiri::XML::DocumentFragment.parse(html)
14
+ doc.children[0].replace "bar"
15
+ assert_equal 'bar', doc.children[0].content
16
+ end
17
+
11
18
  def test_fragment_is_relative
12
19
  doc = Nokogiri::XML('<root><a xmlns="blah" /></root>')
13
20
  ctx = doc.root.child