nokogiri 1.5.6.rc1-java → 1.5.6.rc2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (36) hide show
  1. data/CHANGELOG.ja.rdoc +3 -0
  2. data/CHANGELOG.rdoc +3 -0
  3. data/Manifest.txt +8 -4
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +1 -1
  6. data/ROADMAP.md +3 -0
  7. data/Rakefile +26 -7
  8. data/build_all +40 -27
  9. data/ext/java/nokogiri/HtmlDocument.java +26 -0
  10. data/ext/java/nokogiri/XmlDocument.java +17 -4
  11. data/ext/java/nokogiri/XmlDocumentFragment.java +1 -39
  12. data/ext/java/nokogiri/XmlNode.java +3 -2
  13. data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
  14. data/ext/java/nokogiri/XsltStylesheet.java +4 -2
  15. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  16. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
  17. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  18. data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
  19. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
  20. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
  21. data/ext/java/nokogiri/internals/NokogiriHandler.java +12 -10
  22. data/ext/java/nokogiri/internals/NokogiriHelpers.java +12 -2
  23. data/ext/java/nokogiri/internals/XmlDomParserContext.java +1 -1
  24. data/ext/nokogiri/extconf.rb +1 -0
  25. data/ext/nokogiri/xslt_stylesheet.c +19 -2
  26. data/lib/nokogiri/nokogiri.jar +0 -0
  27. data/lib/nokogiri/version.rb +1 -1
  28. data/lib/nokogiri/xml/node.rb +43 -50
  29. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  30. data/lib/nokogiri/xslt.rb +1 -1
  31. data/tasks/cross_compile.rb +3 -3
  32. data/test/html/test_document.rb +23 -0
  33. data/test/test_xslt_transforms.rb +30 -0
  34. data/test/xml/sax/test_parser.rb +5 -0
  35. data/test/xml/test_node.rb +9 -1
  36. metadata +106 -80
@@ -6,6 +6,9 @@
6
6
 
7
7
  * JRuby で '#' で始まる文字列を名前とする EntityReference を作ろうとすると INVALID_CHARACTER_ERR という例外がはっせいする。 #719
8
8
  * JRuby では Nodeのサブクラスのnamespaceを正しく文字列に変換しない。 #715
9
+ * Nokogiri now detects XSLT transform errors. #731 (Thanks, Justin Fitzsimmons!)
10
+ * Raise an ArgumentError if an invalid encoding is passed to the SAX parser. #756 (Thanks, Bradley Schaefer!)
11
+ * JRuby Node#content now renders newlines properly. #737 (Thanks, Piotr Szmielew!)
9
12
 
10
13
 
11
14
  == 1.5.5 / 2012年6月24日
@@ -6,6 +6,9 @@
6
6
 
7
7
  * JRuby raises INVALID_CHARACTER_ERR exception when EntityReference name starts with '#'. #719
8
8
  * JRuby doesn't coerce namespaces out of strings on a direct subclass of Node. #715
9
+ * Nokogiri now detects XSLT transform errors. #731 (Thanks, Justin Fitzsimmons!)
10
+ * Raise an ArgumentError if an invalid encoding is passed to the SAX parser. #756 (Thanks, Bradley Schaefer!)
11
+ * JRuby Node#content now renders newlines properly. #737 (Thanks, Piotr Szmielew!)
9
12
 
10
13
 
11
14
  == 1.5.5 / 2012-06-24
@@ -43,8 +43,13 @@ ext/java/nokogiri/XmlSyntaxError.java
43
43
  ext/java/nokogiri/XmlText.java
44
44
  ext/java/nokogiri/XmlXpathContext.java
45
45
  ext/java/nokogiri/XsltStylesheet.java
46
+ ext/java/nokogiri/internals/ClosedStreamException.java
46
47
  ext/java/nokogiri/internals/HtmlDomParserContext.java
48
+ ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java
47
49
  ext/java/nokogiri/internals/NokogiriDocumentCache.java
50
+ ext/java/nokogiri/internals/NokogiriDomParser.java
51
+ ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java
52
+ ext/java/nokogiri/internals/NokogiriEntityResolver.java
48
53
  ext/java/nokogiri/internals/NokogiriErrorHandler.java
49
54
  ext/java/nokogiri/internals/NokogiriHandler.java
50
55
  ext/java/nokogiri/internals/NokogiriHelpers.java
@@ -62,7 +67,6 @@ ext/java/nokogiri/internals/ReaderNode.java
62
67
  ext/java/nokogiri/internals/SaveContextVisitor.java
63
68
  ext/java/nokogiri/internals/SchemaErrorHandler.java
64
69
  ext/java/nokogiri/internals/XmlDeclHandler.java
65
- ext/java/nokogiri/internals/XmlDomParser.java
66
70
  ext/java/nokogiri/internals/XmlDomParserContext.java
67
71
  ext/java/nokogiri/internals/XmlSaxParser.java
68
72
  ext/java/nokogiri/internals/XsltExtensionFunction.java
@@ -230,13 +234,13 @@ test/files/snuggles.xml
230
234
  test/files/staff.dtd
231
235
  test/files/staff.xml
232
236
  test/files/staff.xslt
237
+ test/files/test_document_url/bar.xml
238
+ test/files/test_document_url/document.dtd
239
+ test/files/test_document_url/document.xml
233
240
  test/files/tlm.html
234
241
  test/files/to_be_xincluded.xml
235
242
  test/files/valid_bar.xml
236
243
  test/files/xinclude.xml
237
- test/files/test_document_url/bar.xml
238
- test/files/test_document_url/document.dtd
239
- test/files/test_document_url/document.xml
240
244
  test/helper.rb
241
245
  test/html/sax/test_parser.rb
242
246
  test/html/sax/test_parser_context.rb
@@ -1,4 +1,4 @@
1
- = Nokogiri (鋸)
1
+ = Nokogiri (鋸) {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri]
2
2
 
3
3
  * http://nokogiri.org/
4
4
  * http://github.com/sparklemotion/nokogiri/wikis
@@ -1,4 +1,4 @@
1
- = Nokogiri {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri]
1
+ = Nokogiri {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri]
2
2
 
3
3
  * http://nokogiri.org
4
4
  * http://github.com/sparklemotion/nokogiri/wikis
data/ROADMAP.md CHANGED
@@ -69,6 +69,9 @@
69
69
  * we should standardize on a hash of options for these and other calls
70
70
  * what should NodeSet#xpath return?
71
71
  * https://github.com/sparklemotion/nokogiri/issues/656
72
+ * also, clean up or unify the implementations of #xpath-and-friends in Node and NodeSet
73
+ * implementations are very similar, but no shared code :(
74
+ * decorate nodes in a consistent manner
72
75
 
73
76
  ## Encoding
74
77
 
data/Rakefile CHANGED
@@ -46,14 +46,10 @@ HOE = Hoe.spec 'nokogiri' do
46
46
  ["mini_portile", ">= 0.2.2"],
47
47
  ["minitest", "~> 2.2.2"],
48
48
  ["rake", ">= 0.9"],
49
- ["rake-compiler", "= 0.8.0"]
49
+ ["rake-compiler", "= 0.8.0"],
50
+ ["racc", ">= 1.4.6"],
51
+ ["rexical", ">= 1.0.5"]
50
52
  ]
51
- if ! java?
52
- self.extra_dev_deps += [
53
- ["racc", ">= 1.4.6"],
54
- ["rexical", ">= 1.0.5"]
55
- ]
56
- end
57
53
 
58
54
  if java?
59
55
  self.spec_extras = { :platform => 'java' }
@@ -118,6 +114,20 @@ desc "Generate css/parser.rb and css/tokenizer.rex"
118
114
  task 'generate' => [GENERATED_PARSER, GENERATED_TOKENIZER]
119
115
  task 'gem:spec' => 'generate' if Rake::Task.task_defined?("gem:spec")
120
116
 
117
+ # This is a big hack to make sure that the racc and rexical
118
+ # dependencies in the Gemfile are constrainted to ruby platforms
119
+ # (i.e. MRI and Rubinius). There's no way to do that through hoe,
120
+ # and any solution will require changing hoe and hoe-bundler.
121
+ old_gemfile_task = Rake::Task['bundler:gemfile']
122
+ task 'bundler:gemfile' do
123
+ old_gemfile_task.invoke
124
+
125
+ lines = File.open('Gemfile', 'r') { |f| f.readlines }.map do |line|
126
+ line =~ /racc|rexical/ ? "#{line.strip}, :platform => :ruby" : line
127
+ end
128
+ File.open('Gemfile', 'w') { |f| lines.each { |line| f.puts line } }
129
+ end
130
+
121
131
  file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
122
132
  racc = RbConfig::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
123
133
  racc = "#{::RbConfig::CONFIG['bindir']}/racc" if racc.empty?
@@ -149,9 +159,18 @@ task :java_debug do
149
159
  ENV['JAVA_OPTS'] = '-Xdebug -Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y' if java? && ENV['JAVA_DEBUG']
150
160
  end
151
161
 
162
+ if java?
163
+ task :test_18 => :test
164
+ task :test_19 do
165
+ ENV['JRUBY_OPTS'] = "--1.9"
166
+ Rake::Task["test"].invoke
167
+ end
168
+ end
169
+
152
170
  Rake::Task[:test].prerequisites << :compile
153
171
  Rake::Task[:test].prerequisites << :java_debug
154
172
  Rake::Task[:test].prerequisites << :check_extra_deps unless java?
173
+
155
174
  if Hoe.plugins.include?(:debugging)
156
175
  ['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name|
157
176
  Rake::Task["test:#{task_name}"].prerequisites << :compile
data/build_all CHANGED
@@ -2,26 +2,32 @@
2
2
  #
3
3
  # script to build gems for all relevant platforms:
4
4
  # - MRI et al (standard gem)
5
- # - windows (x86-mingw32 and x86-msin32-60)
5
+ # - windows (x86-mingw32 and x86-mswin32-60)
6
6
  # - jruby
7
7
  #
8
- # prerequisite is the mingw32 packages.
9
- # on ubuntu, `sudo apt-get install mingw32`
10
- # for others, read up at https://github.com/luislavena/rake-compiler
8
+ # here's what I recommend for building all the gems:
11
9
  #
12
- # on ubuntus 11 and later, you may have issues with building
13
- # rake-compiler's rubies against openssl v2. Just comment the lines
14
- # out from ossl_ssl.c and you'll be fine.
10
+ # 1. set up a vagrant VM guest running ubuntu lucid 32-bit.
11
+ # 2. install rvm, and install 1.8.7, 1.9.3 and jruby.
12
+ # 3. `sudo apt-get install mingw32`
15
13
  #
16
- # you may have issues with Pathname conversion to String in bundler. Add this to the offending bundler file:
14
+ # as you build, you may run into these problems:
17
15
  #
18
- # class Pathname
19
- # def to_str
20
- # to_s
21
- # end
22
- # end
16
+ # - on ubuntus 11 and later, you may have issues with building
17
+ # rake-compiler's rubies against openssl v2. Just comment the lines
18
+ # out from ossl_ssl.c and you'll be fine.
23
19
  #
24
- # you may also have to remove some of the symbol checks in extconf.rb ... crazy.
20
+ # - you may have issues with Pathname conversion to String in
21
+ # bundler. Add this to the offending bundler file:
22
+ #
23
+ # class Pathname
24
+ # def to_str
25
+ # to_s
26
+ # end
27
+ # end
28
+ #
29
+ # - you may also have to hack rubygems.rb to eliminate a reference to
30
+ # RUBY_ENGINE
25
31
  #
26
32
 
27
33
  HOST=
@@ -53,16 +59,23 @@ rm -rf gems
53
59
  mkdir -p gems
54
60
 
55
61
  # windows
56
- # rvm_use 1.8.7
57
- # if [[ ! -a /home/mike/.rake-compiler/ruby/ruby-1.8.7-p330/lib/ruby/1.8.7/x86_64-linux/rbconfig.rb ]] ; then
58
- # bundle exec rake-compiler cross-ruby VERSION=1.8.7-p330
59
- # fi
60
- # if [[ ! -a /home/mike/.rake-compiler/ruby/ruby-1.9.2-p136/lib/ruby/1.9.1/x86_64-linux/rbconfig.rb ]] ; then
61
- # bundle exec rake-compiler cross-ruby VERSION=1.9.2-p136
62
- # fi
63
- # bundle exec rake cross
64
- # bundle exec rake gem:windows
65
- # cp -v pkg/nokogiri*x86-{mingw32,mswin32}*.gem gems
62
+ platform=$(uname -i)
63
+ if [[ $platform =~ "64" ]] ; then
64
+ echo ""
65
+ echo "ERROR: You need to build the windows gem on a 32-bit machine!"
66
+ echo ""
67
+ exit 1
68
+ fi
69
+ rvm_use 1.8.7
70
+ if [[ ! -a ${HOME}/.rake-compiler/ruby/ruby-1.8.7-p330/lib/ruby/1.8.7/x86_64-linux/rbconfig.rb ]] ; then
71
+ bundle exec rake-compiler cross-ruby VERSION=1.8.7-p330
72
+ fi
73
+ if [[ ! -a ${HOME}/.rake-compiler/ruby/ruby-1.9.2-p136/lib/ruby/1.9.1/x86_64-linux/rbconfig.rb ]] ; then
74
+ bundle exec rake-compiler cross-ruby VERSION=1.9.2-p136
75
+ fi
76
+ bundle exec rake cross
77
+ bundle exec rake gem:windows
78
+ cp -v pkg/nokogiri*x86-{mingw32,mswin32}*.gem gems
66
79
 
67
80
  # MRI
68
81
  rvm_use 1.8.7
@@ -70,11 +83,11 @@ bundle exec rake gem
70
83
  cp -v pkg/nokogiri*.gem gems # should only be one at this point in the script
71
84
 
72
85
  # jruby
73
- rvm_use jruby-1.6.5
86
+ rvm_use jruby
74
87
  bundle install --quiet --local || bundle install
75
88
  bundle exec rake clean clobber
76
89
  rvm_use 1.8.7
77
90
  bundle exec rake generate
78
- rvm_use jruby-1.6.5
91
+ rvm_use jruby
79
92
  bundle exec rake gem
80
- cp -v pkg/nokogiri*java.gem gems
93
+ cp -v pkg/nokogiri*java.gem gems
@@ -55,6 +55,10 @@ import org.w3c.dom.NodeList;
55
55
  */
56
56
  @JRubyClass(name="Nokogiri::HTML::Document", parent="Nokogiri::XML::Document")
57
57
  public class HtmlDocument extends XmlDocument {
58
+ private static final String DEFAULT_CONTENT_TYPE = "html";
59
+ private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN";
60
+ private static final String DEFAULT_SYTEM_ID = "http://www.w3.org/TR/html4/strict.dtd";
61
+
58
62
  private String parsed_encoding = null;
59
63
 
60
64
  public HtmlDocument(Ruby ruby, RubyClass klazz) {
@@ -82,6 +86,28 @@ public class HtmlDocument extends XmlDocument {
82
86
  return htmlDocument;
83
87
  }
84
88
 
89
+ public IRubyObject getInternalSubset(ThreadContext context) {
90
+ IRubyObject internalSubset = super.getInternalSubset(context);
91
+
92
+ // html documents are expected to have a default internal subset
93
+ // the default values are the same ones used when the following
94
+ // feature is turned on
95
+ // "http://cyberneko.org/html/features/insert-doctype"
96
+ // the reason we don't turn it on, is because it overrides the document's
97
+ // declared doctype declaration.
98
+
99
+ if (internalSubset.isNil()) {
100
+ internalSubset = XmlDtd.newEmpty(context.getRuntime(),
101
+ getDocument(),
102
+ context.getRuntime().newString(DEFAULT_CONTENT_TYPE),
103
+ context.getRuntime().newString(DEFAULT_PUBLIC_ID),
104
+ context.getRuntime().newString(DEFAULT_SYTEM_ID));
105
+ setInternalSubset(internalSubset);
106
+ }
107
+
108
+ return internalSubset;
109
+ }
110
+
85
111
  public static IRubyObject do_parse(ThreadContext context,
86
112
  IRubyObject klass,
87
113
  IRubyObject[] args) {
@@ -64,6 +64,7 @@ import org.jruby.runtime.ThreadContext;
64
64
  import org.jruby.runtime.builtin.IRubyObject;
65
65
  import org.w3c.dom.Attr;
66
66
  import org.w3c.dom.Document;
67
+ import org.w3c.dom.DocumentType;
67
68
  import org.w3c.dom.NamedNodeMap;
68
69
  import org.w3c.dom.Node;
69
70
  import org.w3c.dom.NodeList;
@@ -288,7 +289,7 @@ public class XmlDocument extends XmlNode {
288
289
 
289
290
  @JRubyMethod
290
291
  public IRubyObject encoding(ThreadContext context) {
291
- if (this.encoding == null) {
292
+ if (this.encoding == null || this.encoding.isNil()) {
292
293
  if (getDocument().getXmlEncoding() == null) {
293
294
  this.encoding = context.getRuntime().getNil();
294
295
  } else {
@@ -296,7 +297,7 @@ public class XmlDocument extends XmlNode {
296
297
  }
297
298
  }
298
299
 
299
- return this.encoding;
300
+ return this.encoding.isNil() ? this.encoding : this.encoding.asString().encode(context, context.getRuntime().newString("UTF-8"));
300
301
  }
301
302
 
302
303
  @JRubyMethod(meta = true)
@@ -438,8 +439,20 @@ public class XmlDocument extends XmlNode {
438
439
  IRubyObject dtd = (IRubyObject) node.getUserData(DTD_INTERNAL_SUBSET);
439
440
 
440
441
  if (dtd == null) {
441
- if (getDocument().getDoctype() == null) dtd = context.getRuntime().getNil();
442
- else dtd = XmlDtd.newFromInternalSubset(context.getRuntime(), getDocument());
442
+ Document document = getDocument();
443
+ if (document.getUserData(XmlDocument.DTD_RAW_DOCUMENT) != null) {
444
+ dtd = XmlDtd.newFromInternalSubset(context.getRuntime(), document);
445
+ } else if (document.getDoctype() != null) {
446
+ DocumentType docType = document.getDoctype();
447
+ dtd = XmlDtd.newEmpty(context.getRuntime(),
448
+ document,
449
+ context.getRuntime().newString(docType.getName()),
450
+ context.getRuntime().newString(docType.getPublicId()),
451
+ context.getRuntime().newString(docType.getSystemId()));
452
+ } else {
453
+ dtd = context.getRuntime().getNil();
454
+ }
455
+
443
456
  setInternalSubset(dtd);
444
457
  }
445
458
 
@@ -33,7 +33,6 @@
33
33
  package nokogiri;
34
34
 
35
35
  import static nokogiri.internals.NokogiriHelpers.getLocalNameForNamespace;
36
- import static nokogiri.internals.NokogiriHelpers.getLocalPart;
37
36
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
38
37
  import static nokogiri.internals.NokogiriHelpers.getPrefix;
39
38
  import static nokogiri.internals.NokogiriHelpers.isNamespace;
@@ -93,7 +92,6 @@ public class XmlDocumentFragment extends XmlNode {
93
92
  if (args.length > 1 && args[1] instanceof RubyString) {
94
93
  args[1] = trim(context, doc, (RubyString)args[1]);
95
94
  if (XmlDocumentFragment.isTag((RubyString)args[1])) {
96
- args[1] = RubyString.newString(context.getRuntime(), ignoreNamespaceIfNeeded(doc, rubyStringToString(args[1])));
97
95
  args[1] = RubyString.newString(context.getRuntime(), addNamespaceDeclIfNeeded(doc, rubyStringToString(args[1])));
98
96
  }
99
97
  }
@@ -127,46 +125,10 @@ public class XmlDocumentFragment extends XmlNode {
127
125
  if (str.startsWith("<") && str.endsWith(">")) return true;
128
126
  return false;
129
127
  }
130
-
128
+
131
129
  private static Pattern qname_pattern = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
132
130
  private static Pattern starttag_pattern = Pattern.compile("<[^</>]+>");
133
131
 
134
- /**
135
- * Remove the namespace from @param tag and/or the attributes in @param tag if
136
- * the namespace is the default in the current context as defined by @param doc.
137
- *
138
- * @return the normalized tag.
139
- */
140
- private static String ignoreNamespaceIfNeeded(XmlDocument doc, String tags) {
141
- if (doc.getDocument() == null) return tags;
142
- Map<String, String> rewriteTable = new HashMap<String, String>();
143
- // we have to make sure that we don't replace strings in double quotes, e.g.
144
- // 'urn:xmpp:foospec:barfoo' in '<foobar xmlns="urn:xmpp:foospec:barfoo"/>'
145
- // has to remain the same
146
- String[] parts = tags.split("\"");
147
- for (int partidx = 0; partidx < parts.length; partidx++) {
148
- if (partidx % 2 == 1)
149
- continue;
150
- Matcher matcher = qname_pattern.matcher(parts[partidx]);
151
- while(matcher.find()) {
152
- String qName = matcher.group();
153
- if (doc.getDocument().getDocumentElement() != null) {
154
- NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
155
- if (!isNamespaceDefined(qName, nodeMap)) {
156
- rewriteTable.put(qName, getLocalPart(qName));
157
- }
158
- } else {
159
- rewriteTable.put(qName, getLocalPart(qName));
160
- }
161
- }
162
- }
163
- Set<String> keys = rewriteTable.keySet();
164
- for (String key : keys) {
165
- tags = tags.replace(key, rewriteTable.get(key));
166
- }
167
- return tags;
168
- }
169
-
170
132
  private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
171
133
  if (isNamespace(qName.intern())) return true;
172
134
  for (int i=0; i < nodeMap.getLength(); i++) {
@@ -448,6 +448,7 @@ public class XmlNode extends RubyObject {
448
448
  public void relink_namespace(ThreadContext context) {
449
449
  if (node instanceof Element) {
450
450
  Element e = (Element) node;
451
+ e.getOwnerDocument().setStrictErrorChecking(false);
451
452
  e.getOwnerDocument().renameNode(e, e.lookupNamespaceURI(e.getPrefix()), e.getNodeName());
452
453
 
453
454
  if (e.hasAttributes()) {
@@ -550,7 +551,7 @@ public class XmlNode extends RubyObject {
550
551
  str = NokogiriHelpers.getLocalPart(str);
551
552
  }
552
553
  if (str == null) str = "";
553
- name = context.getRuntime().newString(str);
554
+ name = NokogiriHelpers.stringOrBlank(context.getRuntime(), str);
554
555
  return name;
555
556
  }
556
557
 
@@ -815,7 +816,7 @@ public class XmlNode extends RubyObject {
815
816
  if (node == null) {
816
817
  textContent = "";
817
818
  } else {
818
- textContent = ((Document)this.node).getDocumentElement().getTextContent().trim();
819
+ textContent = ((Document)this.node).getDocumentElement().getTextContent();
819
820
  }
820
821
  } else {
821
822
  textContent = this.node.getTextContent();
@@ -35,16 +35,16 @@ package nokogiri;
35
35
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
36
  import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
37
37
 
38
+ import java.io.ByteArrayInputStream;
38
39
  import java.io.IOException;
39
- import java.io.InputStream;
40
- import java.io.OutputStream;
41
- import java.nio.channels.Channels;
42
- import java.nio.channels.Pipe;
43
40
  import java.util.concurrent.Callable;
44
41
  import java.util.concurrent.ExecutorService;
45
42
  import java.util.concurrent.Executors;
43
+ import java.util.concurrent.Future;
46
44
  import java.util.concurrent.FutureTask;
47
45
 
46
+ import nokogiri.internals.ClosedStreamException;
47
+ import nokogiri.internals.NokogiriBlockingQueueInputStream;
48
48
  import nokogiri.internals.ParserContext;
49
49
 
50
50
  import org.jruby.Ruby;
@@ -69,8 +69,7 @@ public class XmlSaxPushParser extends RubyObject {
69
69
  ParserContext.Options options;
70
70
  IRubyObject optionsRuby;
71
71
  IRubyObject saxParser;
72
- OutputStream ostream = null;
73
- InputStream istream = null;
72
+ NokogiriBlockingQueueInputStream stream;
74
73
  ParserTask parserTask = null;
75
74
  FutureTask<XmlSaxParserContext> futureTask = null;
76
75
  ExecutorService executor = null;
@@ -120,53 +119,43 @@ public class XmlSaxPushParser extends RubyObject {
120
119
  byte[] data = null;
121
120
  if (chunk instanceof RubyString || chunk.respondsTo("to_str")) {
122
121
  data = chunk.convertToString().getBytes();
123
- } else {
124
- try {
125
- terminateTask();
126
- } catch (IOException e) {
127
- throw context.getRuntime().newRuntimeError(e.getMessage());
128
- }
129
- XmlSyntaxError xmlSyntaxError =
122
+ } else {
123
+ terminateTask(context);
124
+ XmlSyntaxError xmlSyntaxError =
130
125
  (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SyntaxError"));
131
126
  throw new RaiseException(xmlSyntaxError);
132
127
  }
133
128
 
134
129
  int errorCount0 = parserTask.getErrorCount();;
135
-
136
- try {
137
- if (isLast.isTrue()) {
138
- IRubyObject document = invoke(context, this, "document");
139
- invoke(context, document, "end_document");
140
- terminateTask();
141
- } else {
142
- ostream.write(data);
143
- Thread.currentThread().sleep(10); // gives a reader a chance to work
130
+
131
+
132
+ if (isLast.isTrue()) {
133
+ IRubyObject document = invoke(context, this, "document");
134
+ invoke(context, document, "end_document");
135
+ terminateTask(context);
136
+ } else {
137
+ try {
138
+ Future<Void> task = stream.addChunk(new ByteArrayInputStream(data));
139
+ task.get();
140
+ } catch (ClosedStreamException ex) {
141
+ // this means the stream is closed, ignore this exception
142
+ } catch (Exception e) {
143
+ throw context.getRuntime().newRuntimeError(e.getMessage());
144
144
  }
145
- } catch (IOException e) {
146
- throw context.getRuntime().newRuntimeError(e.getMessage());
147
- } catch (InterruptedException e) {
148
- throw context.getRuntime().newRuntimeError(e.getMessage());
145
+
149
146
  }
150
147
 
151
148
  if (!options.recover && parserTask.getErrorCount() > errorCount0) {
152
- try {
153
- terminateTask();
154
- } catch (IOException e) {
155
- throw context.getRuntime().newRuntimeError(e.getMessage());
156
- }
149
+ terminateTask(context);
157
150
  throw new RaiseException(parserTask.getLastError(), true);
158
151
  }
159
152
 
160
153
  return this;
161
154
  }
162
-
155
+
163
156
  private void initialize_task(ThreadContext context) throws IOException {
164
- if (futureTask == null || ostream == null || istream == null) {
165
- Pipe pipe = Pipe.open();
166
- Pipe.SinkChannel sink = pipe.sink();
167
- ostream = Channels.newOutputStream(sink);
168
- Pipe.SourceChannel source = pipe.source();
169
- istream = Channels.newInputStream(source);
157
+ if (futureTask == null || stream == null) {
158
+ stream = new NokogiriBlockingQueueInputStream();
170
159
 
171
160
  parserTask = new ParserTask(context, saxParser);
172
161
  futureTask = new FutureTask<XmlSaxParserContext>(parserTask);
@@ -174,40 +163,53 @@ public class XmlSaxPushParser extends RubyObject {
174
163
  executor.submit(futureTask);
175
164
  }
176
165
  }
177
-
178
- private synchronized void terminateTask() throws IOException {
166
+
167
+ private synchronized void terminateTask(ThreadContext context) {
168
+ try {
169
+ Future<Void> task = stream.addChunk(NokogiriBlockingQueueInputStream.END);
170
+ task.get();
171
+ } catch (ClosedStreamException ex) {
172
+ // ignore this exception, it means the stream was closed
173
+ } catch (Exception e) {
174
+ throw context.getRuntime().newRuntimeError(e.getMessage());
175
+ }
179
176
  futureTask.cancel(true);
180
177
  executor.shutdown();
181
- ostream.close();
182
- istream.close();
183
- ostream = null;
184
- istream = null;
178
+ executor = null;
179
+ stream = null;
180
+ futureTask = null;
185
181
  }
186
-
182
+
187
183
  private class ParserTask implements Callable<XmlSaxParserContext> {
188
- private ThreadContext context;
189
- private IRubyObject handler;
190
- private XmlSaxParserContext parser;
191
-
184
+ private final ThreadContext context;
185
+ private final IRubyObject handler;
186
+ private final XmlSaxParserContext parser;
187
+
192
188
  private ParserTask(ThreadContext context, IRubyObject handler) {
193
189
  RubyClass klazz = getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SAX::ParserContext");
194
190
  this.context = context;
195
191
  this.handler = handler;
196
- this.parser = (XmlSaxParserContext) XmlSaxParserContext.parse_stream(context, klazz, istream);
192
+ this.parser = (XmlSaxParserContext) XmlSaxParserContext.parse_stream(context, klazz, stream);
197
193
  }
198
194
 
199
195
  @Override
200
196
  public XmlSaxParserContext call() throws Exception {
197
+ try {
201
198
  parser.parse_with(context, handler);
202
- return parser;
199
+ } finally {
200
+ // we have to close the stream before exiting, otherwise someone
201
+ // can add a chunk and block on task.get() forever.
202
+ stream.close();
203
+ }
204
+ return parser;
203
205
  }
204
-
206
+
205
207
  private synchronized int getErrorCount() {
206
208
  // check for null because thread may not have started yet
207
209
  if (parser.getNokogiriHandler() == null) return 0;
208
210
  else return parser.getNokogiriHandler().getErrorCount();
209
211
  }
210
-
212
+
211
213
  private synchronized RubyException getLastError() {
212
214
  return (RubyException) parser.getNokogiriHandler().getLastError();
213
215
  }