nokogiri 1.5.5.rc3-java → 1.5.6-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (67) hide show
  1. data/CHANGELOG.ja.rdoc +42 -1
  2. data/CHANGELOG.rdoc +41 -1
  3. data/Manifest.txt +8 -1
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +5 -8
  6. data/ROADMAP.md +6 -2
  7. data/Rakefile +29 -7
  8. data/bin/nokogiri +19 -4
  9. data/build_all +56 -17
  10. data/ext/java/nokogiri/HtmlDocument.java +26 -0
  11. data/ext/java/nokogiri/NokogiriService.java +7 -1
  12. data/ext/java/nokogiri/XmlDocument.java +24 -6
  13. data/ext/java/nokogiri/XmlDocumentFragment.java +2 -26
  14. data/ext/java/nokogiri/XmlDtd.java +13 -2
  15. data/ext/java/nokogiri/XmlElement.java +3 -12
  16. data/ext/java/nokogiri/XmlEntityReference.java +32 -8
  17. data/ext/java/nokogiri/XmlNamespace.java +2 -1
  18. data/ext/java/nokogiri/XmlNode.java +83 -31
  19. data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
  20. data/ext/java/nokogiri/XmlText.java +2 -14
  21. data/ext/java/nokogiri/XsltStylesheet.java +4 -2
  22. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  23. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
  24. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  25. data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
  26. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
  27. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
  28. data/ext/java/nokogiri/internals/NokogiriHandler.java +22 -14
  29. data/ext/java/nokogiri/internals/NokogiriHelpers.java +25 -7
  30. data/ext/java/nokogiri/internals/ParserContext.java +2 -1
  31. data/ext/java/nokogiri/internals/ReaderNode.java +2 -1
  32. data/ext/java/nokogiri/internals/SaveContextVisitor.java +100 -102
  33. data/ext/java/nokogiri/internals/XmlDomParserContext.java +10 -4
  34. data/ext/nokogiri/extconf.rb +1 -0
  35. data/ext/nokogiri/xml_document.c +2 -2
  36. data/ext/nokogiri/xml_node.c +31 -14
  37. data/ext/nokogiri/xml_sax_parser.c +16 -0
  38. data/ext/nokogiri/xslt_stylesheet.c +19 -2
  39. data/lib/nekodtd.jar +0 -0
  40. data/lib/nokogiri/nokogiri.jar +0 -0
  41. data/lib/nokogiri/version.rb +4 -1
  42. data/lib/nokogiri/xml/document.rb +8 -6
  43. data/lib/nokogiri/xml/document_fragment.rb +10 -1
  44. data/lib/nokogiri/xml/node.rb +58 -61
  45. data/lib/nokogiri/xml/sax/document.rb +7 -0
  46. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  47. data/lib/nokogiri/xml/xpath_context.rb +1 -1
  48. data/lib/nokogiri/xslt.rb +1 -1
  49. data/tasks/cross_compile.rb +5 -8
  50. data/test/files/test_document_url/bar.xml +2 -0
  51. data/test/files/test_document_url/document.dtd +4 -0
  52. data/test/files/test_document_url/document.xml +6 -0
  53. data/test/helper.rb +6 -0
  54. data/test/html/test_document.rb +23 -0
  55. data/test/html/test_document_fragment.rb +5 -0
  56. data/test/test_xslt_transforms.rb +30 -0
  57. data/test/xml/sax/test_parser.rb +20 -1
  58. data/test/xml/test_builder.rb +42 -0
  59. data/test/xml/test_document.rb +64 -9
  60. data/test/xml/test_document_fragment.rb +7 -0
  61. data/test/xml/test_entity_reference.rb +12 -0
  62. data/test/xml/test_namespace.rb +20 -0
  63. data/test/xml/test_node.rb +79 -0
  64. data/test/xml/test_node_attributes.rb +29 -0
  65. data/test/xml/test_unparented_node.rb +9 -0
  66. data/test_all +11 -14
  67. metadata +744 -560
@@ -35,16 +35,16 @@ package nokogiri;
35
35
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
36
  import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
37
37
 
38
+ import java.io.ByteArrayInputStream;
38
39
  import java.io.IOException;
39
- import java.io.InputStream;
40
- import java.io.OutputStream;
41
- import java.nio.channels.Channels;
42
- import java.nio.channels.Pipe;
43
40
  import java.util.concurrent.Callable;
44
41
  import java.util.concurrent.ExecutorService;
45
42
  import java.util.concurrent.Executors;
43
+ import java.util.concurrent.Future;
46
44
  import java.util.concurrent.FutureTask;
47
45
 
46
+ import nokogiri.internals.ClosedStreamException;
47
+ import nokogiri.internals.NokogiriBlockingQueueInputStream;
48
48
  import nokogiri.internals.ParserContext;
49
49
 
50
50
  import org.jruby.Ruby;
@@ -69,8 +69,7 @@ public class XmlSaxPushParser extends RubyObject {
69
69
  ParserContext.Options options;
70
70
  IRubyObject optionsRuby;
71
71
  IRubyObject saxParser;
72
- OutputStream ostream = null;
73
- InputStream istream = null;
72
+ NokogiriBlockingQueueInputStream stream;
74
73
  ParserTask parserTask = null;
75
74
  FutureTask<XmlSaxParserContext> futureTask = null;
76
75
  ExecutorService executor = null;
@@ -120,53 +119,43 @@ public class XmlSaxPushParser extends RubyObject {
120
119
  byte[] data = null;
121
120
  if (chunk instanceof RubyString || chunk.respondsTo("to_str")) {
122
121
  data = chunk.convertToString().getBytes();
123
- } else {
124
- try {
125
- terminateTask();
126
- } catch (IOException e) {
127
- throw context.getRuntime().newRuntimeError(e.getMessage());
128
- }
129
- XmlSyntaxError xmlSyntaxError =
122
+ } else {
123
+ terminateTask(context);
124
+ XmlSyntaxError xmlSyntaxError =
130
125
  (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SyntaxError"));
131
126
  throw new RaiseException(xmlSyntaxError);
132
127
  }
133
128
 
134
129
  int errorCount0 = parserTask.getErrorCount();;
135
-
136
- try {
137
- if (isLast.isTrue()) {
138
- IRubyObject document = invoke(context, this, "document");
139
- invoke(context, document, "end_document");
140
- terminateTask();
141
- } else {
142
- ostream.write(data);
143
- Thread.currentThread().sleep(10); // gives a reader a chance to work
130
+
131
+
132
+ if (isLast.isTrue()) {
133
+ IRubyObject document = invoke(context, this, "document");
134
+ invoke(context, document, "end_document");
135
+ terminateTask(context);
136
+ } else {
137
+ try {
138
+ Future<Void> task = stream.addChunk(new ByteArrayInputStream(data));
139
+ task.get();
140
+ } catch (ClosedStreamException ex) {
141
+ // this means the stream is closed, ignore this exception
142
+ } catch (Exception e) {
143
+ throw context.getRuntime().newRuntimeError(e.getMessage());
144
144
  }
145
- } catch (IOException e) {
146
- throw context.getRuntime().newRuntimeError(e.getMessage());
147
- } catch (InterruptedException e) {
148
- throw context.getRuntime().newRuntimeError(e.getMessage());
145
+
149
146
  }
150
147
 
151
148
  if (!options.recover && parserTask.getErrorCount() > errorCount0) {
152
- try {
153
- terminateTask();
154
- } catch (IOException e) {
155
- throw context.getRuntime().newRuntimeError(e.getMessage());
156
- }
149
+ terminateTask(context);
157
150
  throw new RaiseException(parserTask.getLastError(), true);
158
151
  }
159
152
 
160
153
  return this;
161
154
  }
162
-
155
+
163
156
  private void initialize_task(ThreadContext context) throws IOException {
164
- if (futureTask == null || ostream == null || istream == null) {
165
- Pipe pipe = Pipe.open();
166
- Pipe.SinkChannel sink = pipe.sink();
167
- ostream = Channels.newOutputStream(sink);
168
- Pipe.SourceChannel source = pipe.source();
169
- istream = Channels.newInputStream(source);
157
+ if (futureTask == null || stream == null) {
158
+ stream = new NokogiriBlockingQueueInputStream();
170
159
 
171
160
  parserTask = new ParserTask(context, saxParser);
172
161
  futureTask = new FutureTask<XmlSaxParserContext>(parserTask);
@@ -174,40 +163,53 @@ public class XmlSaxPushParser extends RubyObject {
174
163
  executor.submit(futureTask);
175
164
  }
176
165
  }
177
-
178
- private synchronized void terminateTask() throws IOException {
166
+
167
+ private synchronized void terminateTask(ThreadContext context) {
168
+ try {
169
+ Future<Void> task = stream.addChunk(NokogiriBlockingQueueInputStream.END);
170
+ task.get();
171
+ } catch (ClosedStreamException ex) {
172
+ // ignore this exception, it means the stream was closed
173
+ } catch (Exception e) {
174
+ throw context.getRuntime().newRuntimeError(e.getMessage());
175
+ }
179
176
  futureTask.cancel(true);
180
177
  executor.shutdown();
181
- ostream.close();
182
- istream.close();
183
- ostream = null;
184
- istream = null;
178
+ executor = null;
179
+ stream = null;
180
+ futureTask = null;
185
181
  }
186
-
182
+
187
183
  private class ParserTask implements Callable<XmlSaxParserContext> {
188
- private ThreadContext context;
189
- private IRubyObject handler;
190
- private XmlSaxParserContext parser;
191
-
184
+ private final ThreadContext context;
185
+ private final IRubyObject handler;
186
+ private final XmlSaxParserContext parser;
187
+
192
188
  private ParserTask(ThreadContext context, IRubyObject handler) {
193
189
  RubyClass klazz = getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SAX::ParserContext");
194
190
  this.context = context;
195
191
  this.handler = handler;
196
- this.parser = (XmlSaxParserContext) XmlSaxParserContext.parse_stream(context, klazz, istream);
192
+ this.parser = (XmlSaxParserContext) XmlSaxParserContext.parse_stream(context, klazz, stream);
197
193
  }
198
194
 
199
195
  @Override
200
196
  public XmlSaxParserContext call() throws Exception {
197
+ try {
201
198
  parser.parse_with(context, handler);
202
- return parser;
199
+ } finally {
200
+ // we have to close the stream before exiting, otherwise someone
201
+ // can add a chunk and block on task.get() forever.
202
+ stream.close();
203
+ }
204
+ return parser;
203
205
  }
204
-
206
+
205
207
  private synchronized int getErrorCount() {
206
208
  // check for null because thread may not have started yet
207
209
  if (parser.getNokogiriHandler() == null) return 0;
208
210
  else return parser.getNokogiriHandler().getErrorCount();
209
211
  }
210
-
212
+
211
213
  private synchronized RubyException getLastError() {
212
214
  return (RubyException) parser.getNokogiriHandler().getLastError();
213
215
  }
@@ -34,13 +34,11 @@ package nokogiri;
34
34
 
35
35
  import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate;
36
36
  import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
37
- import static nokogiri.internals.NokogiriHelpers.stringOrNil;
38
37
  import nokogiri.internals.SaveContextVisitor;
39
38
 
40
39
  import org.jruby.Ruby;
41
40
  import org.jruby.RubyClass;
42
41
  import org.jruby.anno.JRubyClass;
43
- import org.jruby.anno.JRubyMethod;
44
42
  import org.jruby.runtime.ThreadContext;
45
43
  import org.jruby.runtime.builtin.IRubyObject;
46
44
  import org.w3c.dom.Document;
@@ -88,17 +86,7 @@ public class XmlText extends XmlNode {
88
86
  if (name == null) name = context.getRuntime().newString("text");
89
87
  return name;
90
88
  }
91
-
92
- @Override
93
- @JRubyMethod(name = {"content", "text", "inner_text"})
94
- public IRubyObject content(ThreadContext context) {
95
- if (content == null || content.isNil()) {
96
- return stringOrNil(context.getRuntime(), node.getTextContent());
97
- } else {
98
- return content;
99
- }
100
- }
101
-
89
+
102
90
  @Override
103
91
  public void accept(ThreadContext context, SaveContextVisitor visitor) {
104
92
  visitor.enter((Text)node);
@@ -114,6 +102,6 @@ public class XmlText extends XmlNode {
114
102
  }
115
103
  child = child.getNextSibling();
116
104
  }
117
- visitor.leave((Text)node);
105
+ visitor.leave(node);
118
106
  }
119
107
  }
@@ -163,7 +163,7 @@ public class XsltStylesheet extends RubyObject {
163
163
  try {
164
164
  xslt.init(args[1], doc);
165
165
  } catch (TransformerConfigurationException ex) {
166
- runtime.newRuntimeError("could not parse xslt stylesheet");
166
+ throw runtime.newRuntimeError("could not parse xslt stylesheet");
167
167
  }
168
168
 
169
169
  return xslt;
@@ -172,6 +172,8 @@ public class XsltStylesheet extends RubyObject {
172
172
  private void init(IRubyObject stylesheet, Document document) throws TransformerConfigurationException {
173
173
  this.stylesheet = stylesheet; // either RubyString or RubyFile
174
174
  if (factory == null) factory = TransformerFactory.newInstance();
175
+ NokogiriXsltErrorListener elistener = new NokogiriXsltErrorListener();
176
+ factory.setErrorListener(elistener);
175
177
  sheet = factory.newTemplates(new DOMSource(document));
176
178
  }
177
179
 
@@ -187,7 +189,7 @@ public class XsltStylesheet extends RubyObject {
187
189
  Ruby runtime = context.getRuntime();
188
190
  RubyArray errors_of_xmlDoc = (RubyArray) xmlDoc.getInstanceVariable("@errors");
189
191
  if (!errors_of_xmlDoc.isEmpty()) {
190
- throw runtime.newRuntimeError(errors_of_xmlDoc.first().asJavaString());
192
+ throw runtime.newRuntimeError(errors_of_xmlDoc.first().asString().asJavaString());
191
193
  }
192
194
  }
193
195
 
@@ -0,0 +1,10 @@
1
+ package nokogiri.internals;
2
+
3
+ @SuppressWarnings("serial")
4
+ public class ClosedStreamException extends Exception {
5
+
6
+ public ClosedStreamException(String message) {
7
+ super(message);
8
+ }
9
+
10
+ }
@@ -91,7 +91,8 @@ public class HtmlDomParserContext extends XmlDomParserContext {
91
91
  XMLDocumentFilter[] filters = { elementValidityCheckFilter};
92
92
 
93
93
  config.setErrorHandler(this.errorHandler);
94
- parser = new DOMParser(config);
94
+
95
+ parser = new NokogiriDomParser(config);
95
96
 
96
97
  // see http://nekohtml.sourceforge.net/settings.html for details
97
98
  setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
@@ -100,7 +101,6 @@ public class HtmlDomParserContext extends XmlDomParserContext {
100
101
  setProperty("http://cyberneko.org/html/properties/filters", filters);
101
102
  setFeature("http://cyberneko.org/html/features/report-errors", true);
102
103
  setFeature("http://xml.org/sax/features/namespaces", false);
103
- setFeature("http://cyberneko.org/html/features/insert-doctype", true);
104
104
  }
105
105
 
106
106
  /**
@@ -0,0 +1,151 @@
1
+ /**
2
+ *
3
+ */
4
+ package nokogiri.internals;
5
+
6
+ import java.io.ByteArrayInputStream;
7
+ import java.io.IOException;
8
+ import java.io.InputStream;
9
+ import java.util.LinkedList;
10
+ import java.util.List;
11
+ import java.util.concurrent.Callable;
12
+ import java.util.concurrent.Future;
13
+ import java.util.concurrent.FutureTask;
14
+ import java.util.concurrent.LinkedBlockingQueue;
15
+
16
+ import nokogiri.XmlSaxPushParser;
17
+
18
+ /**
19
+ * A smart input stream that signals the caller when a chunk of data is consumed
20
+ * from the stream. The main use of this stream is to synchronize the
21
+ * {@link XmlSaxPushParser} and the {@link XmlSaxParser} which runs in a
22
+ * different thread.
23
+ *
24
+ * @author John Shahid <jvshahid@gmail.com>
25
+ */
26
+ public class NokogiriBlockingQueueInputStream extends InputStream {
27
+ private final LinkedBlockingQueue<Task> queue;
28
+ protected Task currentTask;
29
+ protected ByteArrayInputStream currentStream;
30
+ protected int position;
31
+ protected boolean closed = false;
32
+
33
+ public static final ByteArrayInputStream END = new ByteArrayInputStream(new byte[0]);
34
+
35
+ private static class Task extends FutureTask<Void> {
36
+ private final ByteArrayInputStream stream;
37
+
38
+ public Task(ByteArrayInputStream stream) {
39
+ super(new Callable<Void>() {
40
+ @Override
41
+ public Void call() throws Exception {
42
+ // TODO Auto-generated method stub
43
+ return null;
44
+ }
45
+ });
46
+ this.stream = stream;
47
+ }
48
+
49
+ public ByteArrayInputStream getStream() {
50
+ return stream;
51
+ }
52
+
53
+ @Override
54
+ public void run() {
55
+ // don't do anything
56
+ }
57
+
58
+ @Override
59
+ public boolean runAndReset() {
60
+ // don't do anything
61
+ return true;
62
+ }
63
+
64
+ @Override
65
+ public void set(Void v) {
66
+ super.set(v);
67
+ }
68
+ }
69
+
70
+ public NokogiriBlockingQueueInputStream() {
71
+ queue = new LinkedBlockingQueue<Task>();
72
+ }
73
+
74
+ /**
75
+ * This method shouldn't be called unless the parser has finished parsing or
76
+ * threw an exception while doing so, otherwise, there'll be the protential
77
+ * that the read method will block indefinitely.
78
+ */
79
+ @Override
80
+ public synchronized void close() {
81
+ closed = true;
82
+ List<Task> tasks = new LinkedList<Task>();
83
+ queue.drainTo(tasks);
84
+ tasks.add(currentTask);
85
+ for (Task task : tasks) {
86
+ task.set(null);
87
+ }
88
+ }
89
+
90
+ /**
91
+ * Add @param stream to the end of the queue of data that will be returned by
92
+ * {@link #read()} and its variants. The method will @return a future whose
93
+ * {@link Future#get()} will block until the data in @param stream is read.
94
+ *
95
+ * Passing the special stream {@link #END} to this method, will cause
96
+ * {@link #read()} to return an eof indicator (i.e. -1) to the caller, after
97
+ * all the data inserted before {@link #END} is processed.
98
+ *
99
+ * @return
100
+ */
101
+ public synchronized Future<Void> addChunk(ByteArrayInputStream stream) throws ClosedStreamException {
102
+ if (closed)
103
+ throw new ClosedStreamException("Cannot add a chunk to a closed stream");
104
+ Task task = new Task(stream);
105
+ queue.add(task);
106
+ return task;
107
+ }
108
+
109
+ /*
110
+ * (non-Javadoc)
111
+ *
112
+ * @see java.io.InputStream#read()
113
+ */
114
+ @Override
115
+ public int read() throws IOException {
116
+ if (currentTask == null || currentStream.available() == 0)
117
+ if (getNextTask() == -1)
118
+ return -1;
119
+ return currentStream.read();
120
+ }
121
+
122
+ /*
123
+ * (non-Javadoc)
124
+ *
125
+ * @see java.io.InputStream#read(byte[], int, int)
126
+ */
127
+ @Override
128
+ public int read(byte[] bytes, int off, int len) {
129
+ if (currentTask == null || currentStream.available() == 0) {
130
+ if (getNextTask() == -1) {
131
+ currentTask.set(null);
132
+ return -1;
133
+ }
134
+ }
135
+ return currentStream.read(bytes, off, len);
136
+ }
137
+
138
+ protected int getNextTask() {
139
+ while (true) {
140
+ try {
141
+ if (currentTask != null)
142
+ currentTask.set(null);
143
+ currentTask = queue.take();
144
+ currentStream = currentTask.getStream();
145
+ return currentStream.available() == 0 ? -1 : currentStream.available();
146
+ } catch (InterruptedException ex) {
147
+ // keep retrying to read
148
+ }
149
+ }
150
+ }
151
+ }
@@ -53,23 +53,34 @@ import org.xml.sax.SAXException;
53
53
  *
54
54
  * @author Patrick Mahoney <pat@polycrystal.org>
55
55
  */
56
- public class XmlDomParser extends DOMParser {
57
- DOMParser dtd;
58
- ParserContext.Options options;
56
+ public class NokogiriDomParser extends DOMParser {
57
+ protected DOMParser dtd;
58
+ protected boolean xInclude;
59
+ protected XMLParserConfiguration config;
59
60
 
60
- public XmlDomParser(ParserContext.Options options) {
61
- super();
62
- this.options = options;
61
+ public NokogiriDomParser(XMLParserConfiguration config) {
62
+ super(config);
63
+ this.config = config;
64
+ initialize();
65
+ }
66
+
67
+ public NokogiriDomParser(ParserContext.Options options) {
68
+ xInclude = options.xInclude;
69
+ initialize();
70
+ }
71
+
72
+ protected void initialize() {
73
+ if (config == null) {
74
+ if (xInclude) {
75
+ config = new XIncludeParserConfiguration();
76
+ } else {
77
+ config = getXMLParserConfiguration();
78
+ }
79
+ }
63
80
 
64
81
  DTDConfiguration dtdConfig = new DTDConfiguration();
65
82
  dtd = new DOMParser(dtdConfig);
66
83
 
67
- XMLParserConfiguration config;
68
- if (options.xInclude) {
69
- config = new XIncludeParserConfiguration();
70
- } else {
71
- config = getXMLParserConfiguration();
72
- }
73
84
  config.setDTDHandler(dtdConfig);
74
85
  config.setDTDContentModelHandler(dtdConfig);
75
86
  }
@@ -77,7 +88,7 @@ public class XmlDomParser extends DOMParser {
77
88
  @Override
78
89
  public void parse(InputSource source) throws SAXException, IOException {
79
90
  dtd.reset();
80
- if (options.xInclude) {
91
+ if (xInclude) {
81
92
  setEntityResolver(new NokogiriXInlcudeEntityResolver(source));
82
93
  }
83
94
  super.parse(source);
@@ -87,7 +98,7 @@ public class XmlDomParser extends DOMParser {
87
98
 
88
99
  doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null);
89
100
  }
90
-
101
+
91
102
  private class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver {
92
103
  InputSource source;
93
104
  private NokogiriXInlcudeEntityResolver(InputSource source) {