nokogiri 1.5.6.rc1-java → 1.5.6.rc2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (36) hide show
  1. data/CHANGELOG.ja.rdoc +3 -0
  2. data/CHANGELOG.rdoc +3 -0
  3. data/Manifest.txt +8 -4
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +1 -1
  6. data/ROADMAP.md +3 -0
  7. data/Rakefile +26 -7
  8. data/build_all +40 -27
  9. data/ext/java/nokogiri/HtmlDocument.java +26 -0
  10. data/ext/java/nokogiri/XmlDocument.java +17 -4
  11. data/ext/java/nokogiri/XmlDocumentFragment.java +1 -39
  12. data/ext/java/nokogiri/XmlNode.java +3 -2
  13. data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
  14. data/ext/java/nokogiri/XsltStylesheet.java +4 -2
  15. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  16. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
  17. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  18. data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
  19. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
  20. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
  21. data/ext/java/nokogiri/internals/NokogiriHandler.java +12 -10
  22. data/ext/java/nokogiri/internals/NokogiriHelpers.java +12 -2
  23. data/ext/java/nokogiri/internals/XmlDomParserContext.java +1 -1
  24. data/ext/nokogiri/extconf.rb +1 -0
  25. data/ext/nokogiri/xslt_stylesheet.c +19 -2
  26. data/lib/nokogiri/nokogiri.jar +0 -0
  27. data/lib/nokogiri/version.rb +1 -1
  28. data/lib/nokogiri/xml/node.rb +43 -50
  29. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  30. data/lib/nokogiri/xslt.rb +1 -1
  31. data/tasks/cross_compile.rb +3 -3
  32. data/test/html/test_document.rb +23 -0
  33. data/test/test_xslt_transforms.rb +30 -0
  34. data/test/xml/sax/test_parser.rb +5 -0
  35. data/test/xml/test_node.rb +9 -1
  36. metadata +106 -80
@@ -62,12 +62,10 @@ import org.xml.sax.ext.DefaultHandler2;
62
62
  * @author Yoko Harada <yokolet@gmail.com>
63
63
  */
64
64
  public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
65
-
66
- boolean inCDATA = false;
67
-
68
- private Ruby ruby;
69
- private RubyClass attrClass;
70
- private IRubyObject object;
65
+ private StringBuffer buffer;
66
+ private final Ruby ruby;
67
+ private final RubyClass attrClass;
68
+ private final IRubyObject object;
71
69
 
72
70
  /**
73
71
  * Stores parse errors with the most-recent error last.
@@ -235,8 +233,11 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
235
233
 
236
234
  @Override
237
235
  public void characters(char[] ch, int start, int length) throws SAXException {
238
- String target = inCDATA ? "cdata_block" : "characters";
239
- call(target, ruby.newString(new String(ch, start, length)));
236
+ if (buffer != null) {
237
+ buffer.append(new String(ch, start, length));
238
+ } else {
239
+ call("characters", ruby.newString(new String(ch, start, length)));
240
+ }
240
241
  }
241
242
 
242
243
  @Override
@@ -246,12 +247,13 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
246
247
 
247
248
  @Override
248
249
  public void startCDATA() throws SAXException {
249
- inCDATA = true;
250
+ buffer = new StringBuffer();
250
251
  }
251
252
 
252
253
  @Override
253
254
  public void endCDATA() throws SAXException {
254
- inCDATA = false;
255
+ call("cdata_block", ruby.newString(buffer.toString()));
256
+ buffer = null;
255
257
  }
256
258
 
257
259
  @Override
@@ -63,6 +63,7 @@ import nokogiri.XmlNode;
63
63
  import nokogiri.XmlProcessingInstruction;
64
64
  import nokogiri.XmlText;
65
65
 
66
+ import org.jcodings.specific.UTF8Encoding;
66
67
  import org.jruby.Ruby;
67
68
  import org.jruby.RubyArray;
68
69
  import org.jruby.RubyClass;
@@ -179,7 +180,7 @@ public class NokogiriHelpers {
179
180
 
180
181
  public static IRubyObject stringOrNil(Ruby runtime, String s) {
181
182
  if (s == null) return runtime.getNil();
182
- return RubyString.newString(runtime, s);
183
+ return convertJavaStringToRuby(runtime, s);
183
184
  }
184
185
 
185
186
  public static IRubyObject stringOrNil(Ruby runtime, byte[] bytes) {
@@ -189,7 +190,16 @@ public class NokogiriHelpers {
189
190
 
190
191
  public static IRubyObject stringOrBlank(Ruby runtime, String s) {
191
192
  if (s == null) return runtime.newString();
192
- return RubyString.newString(runtime, s);
193
+ return convertJavaStringToRuby(runtime, s);
194
+ }
195
+
196
+ private static IRubyObject convertJavaStringToRuby(Ruby runtime, String str) {
197
+ if (runtime.is1_9()) {
198
+ ByteList bytes = new ByteList(str.getBytes(RubyEncoding.UTF8), UTF8Encoding.INSTANCE);
199
+ return RubyString.newString(runtime, bytes);
200
+ } else {
201
+ return RubyString.newString(runtime, str);
202
+ }
193
203
  }
194
204
 
195
205
  /**
@@ -111,7 +111,7 @@ public class XmlDomParserContext extends ParserContext {
111
111
  "org.apache.xerces.parsers.XIncludeParserConfiguration");
112
112
  }
113
113
 
114
- parser = new XmlDomParser(options);
114
+ parser = new NokogiriDomParser(options);
115
115
  parser.setErrorHandler(errorHandler);
116
116
 
117
117
  // Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000.
@@ -27,6 +27,7 @@ end
27
27
 
28
28
  if RbConfig::MAKEFILE_CONFIG['CC'] =~ /mingw/
29
29
  $CFLAGS << " -DIN_LIBXML"
30
+ $LIBS << " -lz" # TODO why is this necessary?
30
31
  end
31
32
 
32
33
  if RbConfig::MAKEFILE_CONFIG['CC'] =~ /gcc/
@@ -109,6 +109,10 @@ static VALUE serialize(VALUE self, VALUE xmlobj)
109
109
  return rval ;
110
110
  }
111
111
 
112
+ static void swallow_superfluous_xml_errors(void * userdata, xmlErrorPtr error, ...)
113
+ {
114
+ }
115
+
112
116
  /*
113
117
  * call-seq:
114
118
  * transform(document, params = [])
@@ -126,12 +130,13 @@ static VALUE serialize(VALUE self, VALUE xmlobj)
126
130
  */
127
131
  static VALUE transform(int argc, VALUE* argv, VALUE self)
128
132
  {
129
- VALUE xmldoc, paramobj ;
133
+ VALUE xmldoc, paramobj, errstr, exception ;
130
134
  xmlDocPtr xml ;
131
135
  xmlDocPtr result ;
132
136
  nokogiriXsltStylesheetTuple *wrapper;
133
137
  const char** params ;
134
138
  long param_len, j ;
139
+ int parse_error_occurred ;
135
140
 
136
141
  rb_scan_args(argc, argv, "11", &xmldoc, &paramobj);
137
142
  if (NIL_P(paramobj)) { paramobj = rb_ary_new2(0L) ; }
@@ -158,10 +163,22 @@ static VALUE transform(int argc, VALUE* argv, VALUE self)
158
163
  }
159
164
  params[param_len] = 0 ;
160
165
 
166
+ errstr = rb_str_new(0, 0);
167
+ xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
168
+ xmlSetGenericErrorFunc(NULL, (xmlGenericErrorFunc)&swallow_superfluous_xml_errors);
169
+
161
170
  result = xsltApplyStylesheet(wrapper->ss, xml, params);
162
171
  free(params);
163
172
 
164
- if (!result) rb_raise(rb_eRuntimeError, "could not perform xslt transform on document");
173
+ xsltSetGenericErrorFunc(NULL, NULL);
174
+ xmlSetGenericErrorFunc(NULL, NULL);
175
+
176
+ parse_error_occurred = (Qfalse == rb_funcall(errstr, rb_intern("empty?"), 0));
177
+
178
+ if (parse_error_occurred) {
179
+ exception = rb_exc_new3(rb_eRuntimeError, errstr);
180
+ rb_exc_raise(exception);
181
+ }
165
182
 
166
183
  return Nokogiri_wrap_xml_document((VALUE)0, result) ;
167
184
  }
Binary file
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.5.6.rc1'
3
+ VERSION = '1.5.6.rc2'
4
4
 
5
5
  class VersionInfo # :nodoc:
6
6
  def jruby?
@@ -299,20 +299,7 @@ module Nokogiri
299
299
  def add_previous_sibling node_or_tags
300
300
  raise ArgumentError.new("A document may not have multiple root nodes.") if parent.is_a?(XML::Document) && !node_or_tags.is_a?(XML::ProcessingInstruction)
301
301
 
302
- node_or_tags = coerce(node_or_tags)
303
- if node_or_tags.is_a?(XML::NodeSet)
304
- if text?
305
- pivot = Nokogiri::XML::Node.new 'dummy', document
306
- add_previous_sibling_node pivot
307
- else
308
- pivot = self
309
- end
310
- node_or_tags.each { |n| pivot.send :add_previous_sibling_node, n }
311
- pivot.unlink if text?
312
- else
313
- add_previous_sibling_node node_or_tags
314
- end
315
- node_or_tags
302
+ add_sibling :previous, node_or_tags
316
303
  end
317
304
 
318
305
  ###
@@ -325,20 +312,7 @@ module Nokogiri
325
312
  def add_next_sibling node_or_tags
326
313
  raise ArgumentError.new("A document may not have multiple root nodes.") if parent.is_a?(XML::Document)
327
314
 
328
- node_or_tags = coerce(node_or_tags)
329
- if node_or_tags.is_a?(XML::NodeSet)
330
- if text?
331
- pivot = Nokogiri::XML::Node.new 'dummy', document
332
- add_next_sibling_node pivot
333
- else
334
- pivot = self
335
- end
336
- node_or_tags.reverse_each { |n| pivot.send :add_next_sibling_node, n }
337
- pivot.unlink if text?
338
- else
339
- add_next_sibling_node node_or_tags
340
- end
341
- node_or_tags
315
+ add_sibling :next, node_or_tags
342
316
  end
343
317
 
344
318
  ####
@@ -761,12 +735,7 @@ module Nokogiri
761
735
  # See Node#write_to for a list of +options+. For formatted output,
762
736
  # use Node#to_xhtml instead.
763
737
  def to_html options = {}
764
- # FIXME: this is a hack around broken libxml versions
765
- return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
766
-
767
- options[:save_with] |= SaveOptions::DEFAULT_HTML if options[:save_with]
768
- options[:save_with] = SaveOptions::DEFAULT_HTML unless options[:save_with]
769
- serialize(options)
738
+ to_format SaveOptions::DEFAULT_HTML, options
770
739
  end
771
740
 
772
741
  ###
@@ -787,12 +756,7 @@ module Nokogiri
787
756
  #
788
757
  # See Node#write_to for a list of +options+
789
758
  def to_xhtml options = {}
790
- # FIXME: this is a hack around broken libxml versions
791
- return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
792
-
793
- options[:save_with] |= SaveOptions::DEFAULT_XHTML if options[:save_with]
794
- options[:save_with] = SaveOptions::DEFAULT_XHTML unless options[:save_with]
795
- serialize(options)
759
+ to_format SaveOptions::DEFAULT_XHTML, options
796
760
  end
797
761
 
798
762
  ###
@@ -835,11 +799,7 @@ module Nokogiri
835
799
  #
836
800
  # See Node#write_to for a list of +options+
837
801
  def write_html_to io, options = {}
838
- # FIXME: this is a hack around broken libxml versions
839
- return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
840
-
841
- options[:save_with] ||= SaveOptions::DEFAULT_HTML
842
- write_to io, options
802
+ write_format_to SaveOptions::DEFAULT_HTML, io, options
843
803
  end
844
804
 
845
805
  ###
@@ -847,11 +807,7 @@ module Nokogiri
847
807
  #
848
808
  # See Node#write_to for a list of +options+
849
809
  def write_xhtml_to io, options = {}
850
- # FIXME: this is a hack around broken libxml versions
851
- return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
852
-
853
- options[:save_with] ||= SaveOptions::DEFAULT_XHTML
854
- write_to io, options
810
+ write_format_to SaveOptions::DEFAULT_XHTML, io, options
855
811
  end
856
812
 
857
813
  ###
@@ -898,6 +854,43 @@ module Nokogiri
898
854
 
899
855
  private
900
856
 
857
+ def add_sibling next_or_previous, node_or_tags
858
+ impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
859
+ iter = (next_or_previous == :next) ? :reverse_each : :each
860
+
861
+ node_or_tags = coerce node_or_tags
862
+ if node_or_tags.is_a?(XML::NodeSet)
863
+ if text?
864
+ pivot = Nokogiri::XML::Node.new 'dummy', document
865
+ send impl, pivot
866
+ else
867
+ pivot = self
868
+ end
869
+ node_or_tags.send(iter) { |n| pivot.send impl, n }
870
+ pivot.unlink if text?
871
+ else
872
+ send impl, node_or_tags
873
+ end
874
+ node_or_tags
875
+ end
876
+
877
+ def to_format save_option, options
878
+ # FIXME: this is a hack around broken libxml versions
879
+ return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
880
+
881
+ options[:save_with] |= save_option if options[:save_with]
882
+ options[:save_with] = save_option unless options[:save_with]
883
+ serialize(options)
884
+ end
885
+
886
+ def write_format_to save_option, io, options
887
+ # FIXME: this is a hack around broken libxml versions
888
+ return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
889
+
890
+ options[:save_with] ||= save_option
891
+ write_to io, options
892
+ end
893
+
901
894
  def extract_params params # :nodoc:
902
895
  # Pop off our custom function handler if it exists
903
896
  handler = params.find { |param|
@@ -68,6 +68,7 @@ module Nokogiri
68
68
 
69
69
  # Create a new Parser with +doc+ and +encoding+
70
70
  def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
71
+ check_encoding(encoding)
71
72
  @encoding = encoding
72
73
  @document = doc
73
74
  @warned = false
@@ -87,6 +88,7 @@ module Nokogiri
87
88
  ###
88
89
  # Parse given +io+
89
90
  def parse_io io, encoding = 'ASCII'
91
+ check_encoding(encoding)
90
92
  @encoding = encoding
91
93
  ctx = ParserContext.io(io, ENCODINGS[encoding])
92
94
  yield ctx if block_given?
@@ -109,6 +111,11 @@ module Nokogiri
109
111
  yield ctx if block_given?
110
112
  ctx.parse_with self
111
113
  end
114
+
115
+ private
116
+ def check_encoding(encoding)
117
+ raise ArgumentError.new("'#{encoding}' is not a valid encoding") unless ENCODINGS[encoding]
118
+ end
112
119
  end
113
120
  end
114
121
  end
@@ -24,7 +24,7 @@ module Nokogiri
24
24
  def parse string, modules = {}
25
25
  modules.each do |url, klass|
26
26
  XSLT.register url, klass
27
- end
27
+ end
28
28
 
29
29
  if Nokogiri.jruby?
30
30
  Stylesheet.parse_stylesheet_doc(XML.parse(string), string)
@@ -4,10 +4,10 @@ HOST = Rake::ExtensionCompiler.mingw_host
4
4
 
5
5
  require 'mini_portile'
6
6
  $recipes = {}
7
- $recipes[:zlib] = MiniPortile.new "zlib", "1.2.5"
7
+ $recipes[:zlib] = MiniPortile.new "zlib", "1.2.7"
8
8
  $recipes[:libiconv] = MiniPortile.new "libiconv", "1.13.1"
9
- $recipes[:libxml2] = MiniPortile.new "libxml2", "2.7.7"
10
- $recipes[:libxslt] = MiniPortile.new "libxslt", "1.1.26"
9
+ $recipes[:libxml2] = MiniPortile.new "libxml2", "2.7.7"
10
+ $recipes[:libxslt] = MiniPortile.new "libxslt", "1.1.26"
11
11
  $recipes.each { |_, recipe| recipe.host = HOST }
12
12
 
13
13
  file "lib/nokogiri/nokogiri.rb" do
@@ -369,6 +369,29 @@ eohtml
369
369
  assert_equal('Hello world!', node.inner_text.strip)
370
370
  end
371
371
 
372
+ def test_doc_type
373
+ html = Nokogiri::HTML(<<-eohtml)
374
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
375
+ <html xmlns="http://www.w3.org/1999/xhtml">
376
+ <body>
377
+ <p>Rainbow Dash</p>
378
+ </body>
379
+ </html>
380
+ eohtml
381
+ assert_equal "html", html.internal_subset.name
382
+ assert_equal "-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id
383
+ assert_equal "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id
384
+ assert_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">", html.to_s[0,97]
385
+ end
386
+
387
+ def test_content_size
388
+ html = Nokogiri::HTML('<div>
389
+ </div>')
390
+ assert_equal 1, html.content.size
391
+ assert_equal 1, html.content.split("").size
392
+ assert_equal "\n", html.content
393
+ end
394
+
372
395
  def test_find_by_xpath
373
396
  found = @html.xpath('//div/a')
374
397
  assert_equal 3, found.length
@@ -189,8 +189,37 @@ encoding="iso-8859-1" indent="yes"/>
189
189
  Nokogiri::XSLT.quote_params(params.to_a.flatten)))
190
190
  check_params result_doc, params
191
191
  end
192
+
193
+ def test_xslt_paramaters
194
+ xslt_str = <<-EOX
195
+ <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" >
196
+ <xsl:template match="/">
197
+ <xsl:value-of select="$foo" />
198
+ </xsl:template>
199
+ </xsl:stylesheet>
200
+ EOX
201
+
202
+ xslt = Nokogiri::XSLT(xslt_str)
203
+ doc = Nokogiri::XML("<root />")
204
+ assert_match %r{bar}, xslt.transform(doc, Nokogiri::XSLT.quote_params('foo' => 'bar')).to_s
205
+ end
206
+
207
+ def test_xslt_transform_error
208
+ xslt_str = <<-EOX
209
+ <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" >
210
+ <xsl:template match="/">
211
+ <xsl:value-of select="$foo" />
212
+ </xsl:template>
213
+ </xsl:stylesheet>
214
+ EOX
215
+
216
+ xslt = Nokogiri::XSLT(xslt_str)
217
+ doc = Nokogiri::XML("<root />")
218
+ assert_raises(RuntimeError) { xslt.transform(doc) }
219
+ end
192
220
  end
193
221
 
222
+
194
223
  def test_xslt_parse_error
195
224
  xslt_str = <<-EOX
196
225
  <xsl:stylesheet version="1.0"
@@ -210,6 +239,7 @@ encoding="iso-8859-1" indent="yes"/>
210
239
  assert_raises(RuntimeError) { Nokogiri::XSLT.parse(xslt_str) }
211
240
  end
212
241
 
242
+
213
243
  def test_passing_a_non_document_to_transform
214
244
  xsl = Nokogiri::XSLT('<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"></xsl:stylesheet>')
215
245
  assert_raises(ArgumentError) { xsl.transform("<div></div>") }
@@ -250,6 +250,11 @@ module Nokogiri
250
250
  assert_raises(ArgumentError) { @parser.parse_memory(nil) }
251
251
  end
252
252
 
253
+ def test_bad_encoding_args
254
+ assert_raises(ArgumentError) { XML::SAX::Parser.new(Doc.new, 'not an encoding') }
255
+ assert_raises(ArgumentError) { @parser.parse_io(StringIO.new('<root/>'), 'not an encoding')}
256
+ end
257
+
253
258
  def test_ctag
254
259
  @parser.parse_memory(<<-eoxml)
255
260
  <p id="asdfasdf">
@@ -1031,7 +1031,15 @@ EOXML
1031
1031
  subject = Nokogiri::XML::Node.new 'foo', document
1032
1032
  ns = subject.add_namespace nil, 'bar'
1033
1033
  subject.namespace = ns
1034
- assert_match subject.to_xml, /xmlns="bar"/
1034
+ assert_match(/xmlns="bar"/, subject.to_xml)
1035
+ end
1036
+
1037
+ def test_text_node_colon
1038
+ document = Nokogiri::XML::Document.new
1039
+ root = Nokogiri::XML::Node.new 'foo', document
1040
+ document.root = root
1041
+ root << "<a>hello:with_colon</a>"
1042
+ assert_match(/hello:with_colon/, document.to_xml)
1035
1043
  end
1036
1044
  end
1037
1045
  end