nokogiri 1.5.6.rc1-java → 1.5.6.rc2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (36) hide show
  1. data/CHANGELOG.ja.rdoc +3 -0
  2. data/CHANGELOG.rdoc +3 -0
  3. data/Manifest.txt +8 -4
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +1 -1
  6. data/ROADMAP.md +3 -0
  7. data/Rakefile +26 -7
  8. data/build_all +40 -27
  9. data/ext/java/nokogiri/HtmlDocument.java +26 -0
  10. data/ext/java/nokogiri/XmlDocument.java +17 -4
  11. data/ext/java/nokogiri/XmlDocumentFragment.java +1 -39
  12. data/ext/java/nokogiri/XmlNode.java +3 -2
  13. data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
  14. data/ext/java/nokogiri/XsltStylesheet.java +4 -2
  15. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  16. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
  17. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  18. data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
  19. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
  20. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
  21. data/ext/java/nokogiri/internals/NokogiriHandler.java +12 -10
  22. data/ext/java/nokogiri/internals/NokogiriHelpers.java +12 -2
  23. data/ext/java/nokogiri/internals/XmlDomParserContext.java +1 -1
  24. data/ext/nokogiri/extconf.rb +1 -0
  25. data/ext/nokogiri/xslt_stylesheet.c +19 -2
  26. data/lib/nokogiri/nokogiri.jar +0 -0
  27. data/lib/nokogiri/version.rb +1 -1
  28. data/lib/nokogiri/xml/node.rb +43 -50
  29. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  30. data/lib/nokogiri/xslt.rb +1 -1
  31. data/tasks/cross_compile.rb +3 -3
  32. data/test/html/test_document.rb +23 -0
  33. data/test/test_xslt_transforms.rb +30 -0
  34. data/test/xml/sax/test_parser.rb +5 -0
  35. data/test/xml/test_node.rb +9 -1
  36. metadata +106 -80
@@ -163,7 +163,7 @@ public class XsltStylesheet extends RubyObject {
163
163
  try {
164
164
  xslt.init(args[1], doc);
165
165
  } catch (TransformerConfigurationException ex) {
166
- runtime.newRuntimeError("could not parse xslt stylesheet");
166
+ throw runtime.newRuntimeError("could not parse xslt stylesheet");
167
167
  }
168
168
 
169
169
  return xslt;
@@ -172,6 +172,8 @@ public class XsltStylesheet extends RubyObject {
172
172
  private void init(IRubyObject stylesheet, Document document) throws TransformerConfigurationException {
173
173
  this.stylesheet = stylesheet; // either RubyString or RubyFile
174
174
  if (factory == null) factory = TransformerFactory.newInstance();
175
+ NokogiriXsltErrorListener elistener = new NokogiriXsltErrorListener();
176
+ factory.setErrorListener(elistener);
175
177
  sheet = factory.newTemplates(new DOMSource(document));
176
178
  }
177
179
 
@@ -187,7 +189,7 @@ public class XsltStylesheet extends RubyObject {
187
189
  Ruby runtime = context.getRuntime();
188
190
  RubyArray errors_of_xmlDoc = (RubyArray) xmlDoc.getInstanceVariable("@errors");
189
191
  if (!errors_of_xmlDoc.isEmpty()) {
190
- throw runtime.newRuntimeError(errors_of_xmlDoc.first().asJavaString());
192
+ throw runtime.newRuntimeError(errors_of_xmlDoc.first().asString().asJavaString());
191
193
  }
192
194
  }
193
195
 
@@ -0,0 +1,10 @@
1
+ package nokogiri.internals;
2
+
3
+ @SuppressWarnings("serial")
4
+ public class ClosedStreamException extends Exception {
5
+
6
+ public ClosedStreamException(String message) {
7
+ super(message);
8
+ }
9
+
10
+ }
@@ -91,7 +91,8 @@ public class HtmlDomParserContext extends XmlDomParserContext {
91
91
  XMLDocumentFilter[] filters = { elementValidityCheckFilter};
92
92
 
93
93
  config.setErrorHandler(this.errorHandler);
94
- parser = new DOMParser(config);
94
+
95
+ parser = new NokogiriDomParser(config);
95
96
 
96
97
  // see http://nekohtml.sourceforge.net/settings.html for details
97
98
  setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
@@ -100,7 +101,6 @@ public class HtmlDomParserContext extends XmlDomParserContext {
100
101
  setProperty("http://cyberneko.org/html/properties/filters", filters);
101
102
  setFeature("http://cyberneko.org/html/features/report-errors", true);
102
103
  setFeature("http://xml.org/sax/features/namespaces", false);
103
- setFeature("http://cyberneko.org/html/features/insert-doctype", true);
104
104
  }
105
105
 
106
106
  /**
@@ -0,0 +1,151 @@
1
+ /**
2
+ *
3
+ */
4
+ package nokogiri.internals;
5
+
6
+ import java.io.ByteArrayInputStream;
7
+ import java.io.IOException;
8
+ import java.io.InputStream;
9
+ import java.util.LinkedList;
10
+ import java.util.List;
11
+ import java.util.concurrent.Callable;
12
+ import java.util.concurrent.Future;
13
+ import java.util.concurrent.FutureTask;
14
+ import java.util.concurrent.LinkedBlockingQueue;
15
+
16
+ import nokogiri.XmlSaxPushParser;
17
+
18
+ /**
19
+ * A smart input stream that signals the caller when a chunk of data is consumed
20
+ * from the stream. The main use of this stream is to synchronize the
21
+ * {@link XmlSaxPushParser} and the {@link XmlSaxParser} which runs in a
22
+ * different thread.
23
+ *
24
+ * @author John Shahid <jvshahid@gmail.com>
25
+ */
26
+ public class NokogiriBlockingQueueInputStream extends InputStream {
27
+ private final LinkedBlockingQueue<Task> queue;
28
+ protected Task currentTask;
29
+ protected ByteArrayInputStream currentStream;
30
+ protected int position;
31
+ protected boolean closed = false;
32
+
33
+ public static final ByteArrayInputStream END = new ByteArrayInputStream(new byte[0]);
34
+
35
+ private static class Task extends FutureTask<Void> {
36
+ private final ByteArrayInputStream stream;
37
+
38
+ public Task(ByteArrayInputStream stream) {
39
+ super(new Callable<Void>() {
40
+ @Override
41
+ public Void call() throws Exception {
42
+ // TODO Auto-generated method stub
43
+ return null;
44
+ }
45
+ });
46
+ this.stream = stream;
47
+ }
48
+
49
+ public ByteArrayInputStream getStream() {
50
+ return stream;
51
+ }
52
+
53
+ @Override
54
+ public void run() {
55
+ // don't do anything
56
+ }
57
+
58
+ @Override
59
+ public boolean runAndReset() {
60
+ // don't do anything
61
+ return true;
62
+ }
63
+
64
+ @Override
65
+ public void set(Void v) {
66
+ super.set(v);
67
+ }
68
+ }
69
+
70
+ public NokogiriBlockingQueueInputStream() {
71
+ queue = new LinkedBlockingQueue<Task>();
72
+ }
73
+
74
+ /**
75
+ * This method shouldn't be called unless the parser has finished parsing or
76
+ * threw an exception while doing so, otherwise, there'll be the protential
77
+ * that the read method will block indefinitely.
78
+ */
79
+ @Override
80
+ public synchronized void close() {
81
+ closed = true;
82
+ List<Task> tasks = new LinkedList<Task>();
83
+ queue.drainTo(tasks);
84
+ tasks.add(currentTask);
85
+ for (Task task : tasks) {
86
+ task.set(null);
87
+ }
88
+ }
89
+
90
+ /**
91
+ * Add @param stream to the end of the queue of data that will be returned by
92
+ * {@link #read()} and its variants. The method will @return a future whose
93
+ * {@link Future#get()} will block until the data in @param stream is read.
94
+ *
95
+ * Passing the special stream {@link #END} to this method, will cause
96
+ * {@link #read()} to return an eof indicator (i.e. -1) to the caller, after
97
+ * all the data inserted before {@link #END} is processed.
98
+ *
99
+ * @return
100
+ */
101
+ public synchronized Future<Void> addChunk(ByteArrayInputStream stream) throws ClosedStreamException {
102
+ if (closed)
103
+ throw new ClosedStreamException("Cannot add a chunk to a closed stream");
104
+ Task task = new Task(stream);
105
+ queue.add(task);
106
+ return task;
107
+ }
108
+
109
+ /*
110
+ * (non-Javadoc)
111
+ *
112
+ * @see java.io.InputStream#read()
113
+ */
114
+ @Override
115
+ public int read() throws IOException {
116
+ if (currentTask == null || currentStream.available() == 0)
117
+ if (getNextTask() == -1)
118
+ return -1;
119
+ return currentStream.read();
120
+ }
121
+
122
+ /*
123
+ * (non-Javadoc)
124
+ *
125
+ * @see java.io.InputStream#read(byte[], int, int)
126
+ */
127
+ @Override
128
+ public int read(byte[] bytes, int off, int len) {
129
+ if (currentTask == null || currentStream.available() == 0) {
130
+ if (getNextTask() == -1) {
131
+ currentTask.set(null);
132
+ return -1;
133
+ }
134
+ }
135
+ return currentStream.read(bytes, off, len);
136
+ }
137
+
138
+ protected int getNextTask() {
139
+ while (true) {
140
+ try {
141
+ if (currentTask != null)
142
+ currentTask.set(null);
143
+ currentTask = queue.take();
144
+ currentStream = currentTask.getStream();
145
+ return currentStream.available() == 0 ? -1 : currentStream.available();
146
+ } catch (InterruptedException ex) {
147
+ // keep retrying to read
148
+ }
149
+ }
150
+ }
151
+ }
@@ -53,23 +53,34 @@ import org.xml.sax.SAXException;
53
53
  *
54
54
  * @author Patrick Mahoney <pat@polycrystal.org>
55
55
  */
56
- public class XmlDomParser extends DOMParser {
57
- DOMParser dtd;
58
- ParserContext.Options options;
56
+ public class NokogiriDomParser extends DOMParser {
57
+ protected DOMParser dtd;
58
+ protected boolean xInclude;
59
+ protected XMLParserConfiguration config;
59
60
 
60
- public XmlDomParser(ParserContext.Options options) {
61
- super();
62
- this.options = options;
61
+ public NokogiriDomParser(XMLParserConfiguration config) {
62
+ super(config);
63
+ this.config = config;
64
+ initialize();
65
+ }
66
+
67
+ public NokogiriDomParser(ParserContext.Options options) {
68
+ xInclude = options.xInclude;
69
+ initialize();
70
+ }
71
+
72
+ protected void initialize() {
73
+ if (config == null) {
74
+ if (xInclude) {
75
+ config = new XIncludeParserConfiguration();
76
+ } else {
77
+ config = getXMLParserConfiguration();
78
+ }
79
+ }
63
80
 
64
81
  DTDConfiguration dtdConfig = new DTDConfiguration();
65
82
  dtd = new DOMParser(dtdConfig);
66
83
 
67
- XMLParserConfiguration config;
68
- if (options.xInclude) {
69
- config = new XIncludeParserConfiguration();
70
- } else {
71
- config = getXMLParserConfiguration();
72
- }
73
84
  config.setDTDHandler(dtdConfig);
74
85
  config.setDTDContentModelHandler(dtdConfig);
75
86
  }
@@ -77,7 +88,7 @@ public class XmlDomParser extends DOMParser {
77
88
  @Override
78
89
  public void parse(InputSource source) throws SAXException, IOException {
79
90
  dtd.reset();
80
- if (options.xInclude) {
91
+ if (xInclude) {
81
92
  setEntityResolver(new NokogiriXInlcudeEntityResolver(source));
82
93
  }
83
94
  super.parse(source);
@@ -87,7 +98,7 @@ public class XmlDomParser extends DOMParser {
87
98
 
88
99
  doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null);
89
100
  }
90
-
101
+
91
102
  private class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver {
92
103
  InputSource source;
93
104
  private NokogiriXInlcudeEntityResolver(InputSource source) {
@@ -0,0 +1,109 @@
1
+ package nokogiri.internals;
2
+
3
+ import java.io.IOException;
4
+ import java.io.InputStream;
5
+
6
+ import org.jruby.Ruby;
7
+ import org.jruby.RubyObject;
8
+ import org.jruby.RubyProcess.Sys;
9
+ import org.jruby.exceptions.RaiseException;
10
+ import org.jruby.javasupport.util.RuntimeHelpers;
11
+ import org.jruby.runtime.ThreadContext;
12
+ import org.jruby.runtime.builtin.IRubyObject;
13
+ import org.jruby.util.ByteList;
14
+
15
+ /**
16
+ * This class wraps the EncodingReader which act like a rewinding input stream,
17
+ * it tries to read the first 1K of data to detect the encoding, but save
18
+ * this data in a buffer for the subsequent read. Unfortunately, the EncodingReader
19
+ * will behave as expected only if encoding was detected, otherwise, the read data
20
+ * won't be stored and EncodingReader will fallback to read directory from the io stream.
21
+ * this is kind of lame, since we need to have similar logic in both layers. The alternative
22
+ * is to implement the encoding detection similar to the way C-Nokogiri does it; it starts
23
+ * parsing assuming encoding is unknown and if encoding is detected it will throw an exception
24
+ * causing parsing to stop, in which case we have to intercept the exception and set the encoding.
25
+ * Also in this case we don't have to restart the parsing since html/document.rb does that for us.
26
+ *
27
+ * @author John Shahid <jvshahid@gmail.com>
28
+ *
29
+ */
30
+ public class NokogiriEncodingReaderWrapper extends InputStream {
31
+ private final ThreadContext context;
32
+ private final IRubyObject encodingReader;
33
+ private final Ruby ruby;
34
+ private IRubyObject detectedEncoding;
35
+ private final byte[] firstChunk = new byte[1024];
36
+ private int firstChunkOff = 0;
37
+ private int firstChunkLength = 0;
38
+
39
+ public NokogiriEncodingReaderWrapper(ThreadContext context, RubyObject encodingReader) {
40
+ this.context = context;
41
+ this.encodingReader = encodingReader;
42
+ this.ruby = context.getRuntime();
43
+
44
+ if (!RuntimeHelpers.invoke(context, encodingReader, "respond_to?", ruby.newSymbol("read").to_sym()).isTrue()
45
+ || encodingReader.getInstanceVariable("@io") == null) {
46
+ throw ruby.newArgumentError("Argument doesn't respond to read or doesn't have instance variable @io");
47
+ }
48
+ }
49
+
50
+ public boolean detectEncoding() {
51
+ try {
52
+ firstChunkLength = read(firstChunk);
53
+ } catch (RaiseException e) {
54
+ detectedEncoding = e.getException().getInstanceVariable("@found_encoding");
55
+ return true;
56
+ }
57
+ detectedEncoding = context.nil;
58
+ return false;
59
+ }
60
+
61
+ public IRubyObject getEncoding() {
62
+ return detectedEncoding;
63
+ }
64
+
65
+ @Override
66
+ public int read(byte b[]) {
67
+ return read(b, 0, b.length);
68
+ }
69
+
70
+ @Override
71
+ public int read(byte b[], int off, int len) {
72
+ if (b == null) {
73
+ throw new NullPointerException();
74
+ } else if (off < 0 || len < 0 || len > b.length - off) {
75
+ throw new IndexOutOfBoundsException();
76
+ } else if (len == 0) {
77
+ return 0;
78
+ }
79
+
80
+ int copyLength = Math.min(firstChunkLength - firstChunkOff, len);
81
+ if (copyLength > 0) {
82
+ System.arraycopy(firstChunk, firstChunkOff, b, off, copyLength);
83
+ len -= copyLength;
84
+ firstChunkOff += copyLength;
85
+ }
86
+
87
+ if (len <= 0)
88
+ return copyLength;
89
+
90
+ IRubyObject returnValue = encodingReader.callMethod(context, "read", ruby.newFixnum(len));
91
+ if (returnValue.isNil())
92
+ return -1;
93
+
94
+ ByteList bytes = returnValue.asString().getByteList();
95
+ int length = bytes.length();
96
+ System.arraycopy(bytes.unsafeBytes(), bytes.getBegin(), b, off + copyLength, length);
97
+ return length + copyLength;
98
+ }
99
+
100
+ @Override
101
+ public int read() {
102
+ byte[] bytes = new byte[1];
103
+ int count = read(bytes, 0, 1);
104
+ if (count < 1)
105
+ return count;
106
+ return bytes[0];
107
+ }
108
+
109
+ }
@@ -0,0 +1,123 @@
1
+ package nokogiri.internals;
2
+
3
+ import java.io.ByteArrayInputStream;
4
+ import java.io.File;
5
+ import java.io.IOException;
6
+ import java.net.URI;
7
+
8
+ import nokogiri.internals.ParserContext.Options;
9
+
10
+ import org.jruby.Ruby;
11
+ import org.xml.sax.InputSource;
12
+ import org.xml.sax.SAXException;
13
+ import org.xml.sax.ext.EntityResolver2;
14
+
15
+ /**
16
+ * An entity resolver aware of the fact that the Ruby runtime can
17
+ * change directory but the JVM cannot. Thus any file based
18
+ * entity resolution that uses relative paths must be translated
19
+ * to be relative to the current directory of the Ruby runtime.
20
+ */
21
+ public class NokogiriEntityResolver implements EntityResolver2 {
22
+ protected Ruby runtime;
23
+ private final NokogiriErrorHandler handler;
24
+ private final Options options;
25
+
26
+ public NokogiriEntityResolver(Ruby runtime, NokogiriErrorHandler handler, Options options) {
27
+ super();
28
+ this.runtime = runtime;
29
+ this.handler = handler;
30
+ this.options = options;
31
+ }
32
+
33
+ @Override
34
+ public InputSource getExternalSubset(String name, String baseURI)
35
+ throws SAXException, IOException {
36
+ return null;
37
+ }
38
+
39
+ @Override
40
+ public InputSource resolveEntity(String publicId, String systemId)
41
+ throws SAXException, IOException {
42
+ return resolveEntity(runtime, null, publicId, null, systemId);
43
+ }
44
+
45
+ @Override
46
+ public InputSource resolveEntity(String name,
47
+ String publicId,
48
+ String baseURI,
49
+ String systemId)
50
+ throws SAXException, IOException {
51
+ return resolveEntity(runtime, name, publicId, baseURI, systemId);
52
+ }
53
+
54
+ private File join(String parent, String child) {
55
+ if (new File(parent).isFile()) {
56
+ parent = new File(parent).getParent();
57
+ }
58
+
59
+ return new File(parent, child);
60
+ }
61
+
62
+ private InputSource emptyInputSource(InputSource source) {
63
+ source.setByteStream(new ByteArrayInputStream(new byte[0]));
64
+ return source;
65
+ }
66
+
67
+ private boolean shouldLoadDtd() {
68
+ return options.dtdLoad || options.dtdValid;
69
+ }
70
+
71
+ private void addError(String errorMessage) {
72
+ if (handler != null)
73
+ handler.errors.add(new Exception(errorMessage));
74
+ }
75
+
76
+ /**
77
+ * Create a file base input source taking into account the current
78
+ * directory of <code>runtime</code>.
79
+ * @throws SAXException
80
+ */
81
+ protected InputSource resolveEntity(Ruby runtime, String name, String publicId, String baseURI, String systemId)
82
+ throws IOException, SAXException {
83
+ InputSource s = new InputSource();
84
+ if (name.equals("[dtd]") && !shouldLoadDtd()) {
85
+ return emptyInputSource(s);
86
+ } else if (!name.equals("[dtd]") && !options.noEnt) {
87
+ return emptyInputSource(s);
88
+ }
89
+ String adjustedSystemId;
90
+ URI uri = URI.create(systemId);
91
+ if (options.noNet && uri.getHost() != null) {
92
+ addError("Attempt to load network entity " + systemId);
93
+ return emptyInputSource(s);
94
+ }
95
+ // if this is a url or absolute file name then use it
96
+ if (uri.isAbsolute() && !uri.isOpaque()) {
97
+ adjustedSystemId = uri.toURL().toString();
98
+ } else if (new File(uri.getPath()).isAbsolute()) {
99
+ adjustedSystemId = uri.getPath();
100
+ } else if (baseURI != null) {
101
+ URI baseuri = URI.create(baseURI);
102
+ if (options.noNet && baseuri.getHost() != null) {
103
+ addError("Attempt to load network entity " + systemId);
104
+ return emptyInputSource(s);
105
+ }
106
+ if (baseuri.getHost() == null) {
107
+ // this is a local file
108
+ adjustedSystemId = join(baseuri.getPath(), uri.getPath()).getCanonicalPath();
109
+ } else {
110
+ // this is a url, then resolve uri using baseuri
111
+ adjustedSystemId = baseuri.resolve(systemId).toURL().toString();
112
+ }
113
+ } else {
114
+ // baseURI is null we have to use the current working directory to resolve the entity
115
+ String pwd = runtime.getCurrentDirectory();
116
+ adjustedSystemId = join(pwd, uri.getPath()).getCanonicalPath();
117
+ }
118
+ s.setSystemId(adjustedSystemId);
119
+ s.setPublicId(publicId);
120
+ return s;
121
+ }
122
+
123
+ }