nokogiri 1.5.5.rc2-java → 1.5.5.rc3-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +1 -0
- data/CHANGELOG.rdoc +1 -0
- data/README.ja.rdoc +5 -5
- data/README.rdoc +9 -9
- data/ROADMAP.md +20 -20
- data/Rakefile +5 -0
- data/Y_U_NO_GEMSPEC.md +3 -3
- data/ext/java/nokogiri/HtmlElementDescription.java +1 -1
- data/ext/java/nokogiri/HtmlEntityLookup.java +1 -1
- data/ext/java/nokogiri/XmlElement.java +0 -40
- data/ext/java/nokogiri/XmlNode.java +31 -2
- data/ext/java/nokogiri/XmlReader.java +48 -15
- data/ext/java/nokogiri/XmlSyntaxError.java +9 -4
- data/ext/java/nokogiri/internals/NokogiriHandler.java +7 -1
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +1 -1
- data/ext/java/nokogiri/internals/ParserContext.java +64 -111
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +1 -1
- data/ext/java/nokogiri/internals/XmlDomParser.java +4 -6
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +15 -12
- data/ext/nokogiri/xml_node.c +2 -2
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/node/save_options.rb +1 -1
- data/test/html/test_document.rb +2 -2
- data/test/xml/test_document.rb +1 -1
- data/test/xml/test_dtd.rb +3 -7
- data/test/xml/test_entity_reference.rb +214 -0
- data/test/xml/test_node_reparenting.rb +1 -1
- data/test/xml/test_xpath.rb +1 -1
- metadata +2 -2
@@ -75,23 +75,27 @@ public class XmlSyntaxError extends RubyException {
|
|
75
75
|
}
|
76
76
|
|
77
77
|
public static XmlSyntaxError createWarning(Ruby runtime, SAXParseException e) {
|
78
|
-
XmlSyntaxError xmlSyntaxError = (
|
78
|
+
XmlSyntaxError xmlSyntaxError = createNokogiriXmlSyntaxError(runtime);
|
79
79
|
xmlSyntaxError.setException(runtime, e, 1);
|
80
80
|
return xmlSyntaxError;
|
81
81
|
}
|
82
82
|
|
83
83
|
public static XmlSyntaxError createError(Ruby runtime, SAXParseException e) {
|
84
|
-
XmlSyntaxError xmlSyntaxError = (
|
84
|
+
XmlSyntaxError xmlSyntaxError = createNokogiriXmlSyntaxError(runtime);
|
85
85
|
xmlSyntaxError.setException(runtime, e, 2);
|
86
86
|
return xmlSyntaxError;
|
87
87
|
}
|
88
88
|
|
89
89
|
public static XmlSyntaxError createFatalError(Ruby runtime, SAXParseException e) {
|
90
|
-
XmlSyntaxError xmlSyntaxError = (
|
90
|
+
XmlSyntaxError xmlSyntaxError = createNokogiriXmlSyntaxError(runtime);
|
91
91
|
xmlSyntaxError.setException(runtime, e, 3);
|
92
92
|
return xmlSyntaxError;
|
93
93
|
}
|
94
|
-
|
94
|
+
|
95
|
+
public static XmlSyntaxError createNokogiriXmlSyntaxError(Ruby runtime) {
|
96
|
+
return (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::SyntaxError"));
|
97
|
+
}
|
98
|
+
|
95
99
|
public void setException(Exception exception) {
|
96
100
|
this.exception = exception;
|
97
101
|
}
|
@@ -112,6 +116,7 @@ public class XmlSyntaxError extends RubyException {
|
|
112
116
|
//@Override
|
113
117
|
//"to_s" method was branched in 1.8 and 1.9 since JRuby 1.6.6
|
114
118
|
// to support older version of JRuby, the annotation is commented out
|
119
|
+
@Override
|
115
120
|
@JRubyMethod(name = "to_s", compat = CompatVersion.RUBY1_8)
|
116
121
|
public IRubyObject to_s(ThreadContext context) {
|
117
122
|
if (exception != null && exception.getMessage() != null)
|
@@ -92,7 +92,13 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
|
|
92
92
|
String objectName = object.getMetaClass().getName();
|
93
93
|
if (htmlParserName.equals(objectName)) needEmptyAttrCheck = true;
|
94
94
|
}
|
95
|
-
|
95
|
+
|
96
|
+
@Override
|
97
|
+
public void skippedEntity(String skippedEntity) {
|
98
|
+
call("error", ruby.newString("Entity '" + skippedEntity + "' not defined\n"));
|
99
|
+
}
|
100
|
+
|
101
|
+
@Override
|
96
102
|
public void setDocumentLocator(Locator locator) {
|
97
103
|
this.locator = locator;
|
98
104
|
}
|
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -33,7 +33,6 @@
|
|
33
33
|
package nokogiri.internals;
|
34
34
|
|
35
35
|
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
36
|
-
import static nokogiri.internals.NokogiriHelpers.adjustSystemIdIfNecessary;
|
37
36
|
import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
|
38
37
|
|
39
38
|
import java.io.ByteArrayInputStream;
|
@@ -59,7 +58,6 @@ import org.jruby.util.ByteList;
|
|
59
58
|
import org.jruby.util.TypeConverter;
|
60
59
|
import org.xml.sax.InputSource;
|
61
60
|
import org.xml.sax.SAXException;
|
62
|
-
import org.xml.sax.ext.EntityResolver2;
|
63
61
|
|
64
62
|
/**
|
65
63
|
* Base class for the various parser contexts. Handles converting
|
@@ -73,22 +71,6 @@ public class ParserContext extends RubyObject {
|
|
73
71
|
protected IRubyObject detected_encoding = null;
|
74
72
|
protected int stringDataSize = -1;
|
75
73
|
|
76
|
-
/**
|
77
|
-
* Create a file base input source taking into account the current
|
78
|
-
* directory of <code>runtime</code>.
|
79
|
-
*/
|
80
|
-
public static InputSource resolveEntity(Ruby runtime, String publicId, String baseURI, String systemId)
|
81
|
-
throws IOException {
|
82
|
-
InputSource s = new InputSource();
|
83
|
-
String adjusted = adjustSystemIdIfNecessary(runtime.getCurrentDirectory(), runtime.getInstanceConfig().getScriptFileName(), baseURI, systemId);
|
84
|
-
if (adjusted == null && publicId == null) {
|
85
|
-
throw runtime.newRuntimeError("SystemId \"" + systemId + "\" is not correct.");
|
86
|
-
}
|
87
|
-
s.setSystemId(adjusted);
|
88
|
-
s.setPublicId(publicId);
|
89
|
-
return s;
|
90
|
-
}
|
91
|
-
|
92
74
|
public ParserContext(Ruby runtime) {
|
93
75
|
// default to class 'Object' because this class isn't exposed to Ruby
|
94
76
|
super(runtime, runtime.getObject());
|
@@ -107,37 +89,17 @@ public class ParserContext extends RubyObject {
|
|
107
89
|
* which may be an IO object, a String, or a StringIO.
|
108
90
|
*/
|
109
91
|
public void setInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
|
92
|
+
source = new InputSource();
|
93
|
+
|
110
94
|
Ruby ruby = context.getRuntime();
|
111
|
-
String path = (String) url.toJava(String.class);
|
112
|
-
if (data.getType().respondsTo("detect_encoding")) {
|
113
|
-
// data is EnocodingReader
|
114
|
-
try {
|
115
|
-
data.callMethod(context, "read", RubyFixnum.newFixnum(context.getRuntime(), 1024));
|
116
|
-
} catch (RaiseException e) {
|
117
|
-
detected_encoding = e.getException().getInstanceVariable("@found_encoding");
|
118
|
-
}
|
119
|
-
}
|
120
95
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
// fallback to the old behavior
|
129
|
-
if (isAbsolutePath(path)) {
|
130
|
-
returnWithSystemId(path);
|
131
|
-
return;
|
132
|
-
}
|
133
|
-
String currentDir = context.getRuntime().getCurrentDirectory();
|
134
|
-
String absPath = currentDir + "/" + path;
|
135
|
-
if (isAbsolutePath(absPath)) {
|
136
|
-
returnWithSystemId(absPath);
|
137
|
-
return;
|
138
|
-
}
|
139
|
-
}
|
140
|
-
}
|
96
|
+
ParserContext.setUrl(context, source, url);
|
97
|
+
|
98
|
+
// if setEncoding returned true, then the stream is set
|
99
|
+
// to the EncodingReaderInputStream
|
100
|
+
if (setEncoding(context, data))
|
101
|
+
return;
|
102
|
+
|
141
103
|
RubyString stringData = null;
|
142
104
|
if (invoke(context, data, "respond_to?",
|
143
105
|
ruby.newSymbol("to_io").to_sym()).isTrue()) {
|
@@ -146,17 +108,7 @@ public class ParserContext extends RubyObject {
|
|
146
108
|
(RubyIO) TypeConverter.convertToType(data,
|
147
109
|
ruby.getIO(),
|
148
110
|
"to_io");
|
149
|
-
source
|
150
|
-
} else if (((RubyObject)data).getInstanceVariable("@io") != null) {
|
151
|
-
// in case of EncodingReader is used
|
152
|
-
// since EncodingReader won't respond to :to_io
|
153
|
-
RubyObject dataObject = (RubyObject) ((RubyObject)data).getInstanceVariable("@io");
|
154
|
-
if (dataObject instanceof RubyIO) {
|
155
|
-
RubyIO io = (RubyIO)dataObject;
|
156
|
-
source = new InputSource(io.getInStream());
|
157
|
-
} else if (dataObject instanceof RubyStringIO) {
|
158
|
-
stringData = (RubyString)((RubyStringIO)dataObject).string();
|
159
|
-
}
|
111
|
+
source.setByteStream(io.getInStream());
|
160
112
|
} else {
|
161
113
|
if (invoke(context, data, "respond_to?",
|
162
114
|
ruby.newSymbol("string").to_sym()).isTrue()) {
|
@@ -185,41 +137,64 @@ public class ParserContext extends RubyObject {
|
|
185
137
|
ByteList bytes = stringData.getByteList();
|
186
138
|
if (charset != null) {
|
187
139
|
StringReader reader = new StringReader(new String(bytes.unsafeBytes(), bytes.begin(), bytes.length(), charset));
|
188
|
-
source
|
140
|
+
source.setCharacterStream(reader);
|
189
141
|
source.setEncoding(charset.name());
|
190
142
|
} else {
|
191
143
|
stringDataSize = bytes.length() - bytes.begin();
|
192
|
-
|
144
|
+
ByteArrayInputStream stream = new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
|
145
|
+
source.setByteStream(stream);
|
193
146
|
}
|
194
147
|
}
|
195
148
|
}
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
149
|
+
|
150
|
+
public static void setUrl(ThreadContext context, InputSource source, IRubyObject url) {
|
151
|
+
String path = rubyStringToString(url);
|
152
|
+
// Dir.chdir might be called at some point before this.
|
153
|
+
if (path != null) {
|
154
|
+
try {
|
155
|
+
URI uri = URI.create(path);
|
156
|
+
source.setSystemId(uri.toURL().toString());
|
157
|
+
} catch (Exception ex) {
|
158
|
+
// fallback to the old behavior
|
159
|
+
File file = new File(path);
|
160
|
+
if (file.isAbsolute()) {
|
161
|
+
source.setSystemId(path);
|
162
|
+
} else {
|
163
|
+
String pwd = context.getRuntime().getCurrentDirectory();
|
164
|
+
String absolutePath;
|
165
|
+
try {
|
166
|
+
absolutePath = new File(pwd, path).getCanonicalPath();
|
167
|
+
} catch (IOException e) {
|
168
|
+
absolutePath = new File(pwd, path).getAbsolutePath();
|
169
|
+
}
|
170
|
+
source.setSystemId(absolutePath);
|
171
|
+
}
|
172
|
+
}
|
173
|
+
}
|
200
174
|
}
|
201
|
-
|
202
|
-
private
|
203
|
-
|
204
|
-
|
205
|
-
|
175
|
+
|
176
|
+
private boolean setEncoding(ThreadContext context, IRubyObject data) {
|
177
|
+
if (data.getType().respondsTo("detect_encoding")) {
|
178
|
+
// in case of EncodingReader is used
|
179
|
+
// since EncodingReader won't respond to :to_io
|
180
|
+
NokogiriEncodingReaderWrapper reader = new NokogiriEncodingReaderWrapper(context, (RubyObject) data);
|
181
|
+
source.setByteStream(reader);
|
182
|
+
// data is EnocodingReader
|
183
|
+
if(reader.detectEncoding()) {
|
184
|
+
detected_encoding = reader.getEncoding();
|
185
|
+
source.setEncoding(detected_encoding.asJavaString());
|
186
|
+
}
|
187
|
+
return true;
|
206
188
|
}
|
207
|
-
|
208
|
-
return;
|
189
|
+
return false;
|
209
190
|
}
|
210
191
|
|
211
192
|
/**
|
212
193
|
* Set the InputSource to read from <code>file</code>, a String filename.
|
213
194
|
*/
|
214
195
|
public void setInputSourceFile(ThreadContext context, IRubyObject file) {
|
215
|
-
|
216
|
-
|
217
|
-
try{
|
218
|
-
source = resolveEntity(context.getRuntime(), null, null, filename);
|
219
|
-
} catch (Exception e) {
|
220
|
-
throw RaiseException.createNativeRaiseException(context.getRuntime(), e);
|
221
|
-
}
|
222
|
-
|
196
|
+
source = new InputSource();
|
197
|
+
ParserContext.setUrl(context, source, file);
|
223
198
|
}
|
224
199
|
|
225
200
|
/**
|
@@ -295,41 +270,19 @@ public class ParserContext extends RubyObject {
|
|
295
270
|
}
|
296
271
|
}
|
297
272
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
* to be relative to the current directory of the Ruby runtime.
|
303
|
-
*/
|
304
|
-
public static class ChdirEntityResolver implements EntityResolver2 {
|
305
|
-
protected Ruby runtime;
|
306
|
-
|
307
|
-
public ChdirEntityResolver(Ruby runtime) {
|
308
|
-
super();
|
309
|
-
this.runtime = runtime;
|
310
|
-
}
|
311
|
-
|
312
|
-
@Override
|
313
|
-
public InputSource getExternalSubset(String name, String baseURI)
|
314
|
-
throws SAXException, IOException {
|
315
|
-
return null;
|
273
|
+
public static class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver {
|
274
|
+
InputSource source;
|
275
|
+
public NokogiriXInlcudeEntityResolver(InputSource source) {
|
276
|
+
this.source = source;
|
316
277
|
}
|
317
278
|
|
318
279
|
@Override
|
319
280
|
public InputSource resolveEntity(String publicId, String systemId)
|
320
|
-
|
321
|
-
|
281
|
+
throws SAXException, IOException {
|
282
|
+
if (systemId != null) source.setSystemId(systemId);
|
283
|
+
if (publicId != null) source.setPublicId(publicId);
|
284
|
+
return source;
|
322
285
|
}
|
323
|
-
|
324
|
-
@Override
|
325
|
-
public InputSource resolveEntity(String name,
|
326
|
-
String publicId,
|
327
|
-
String baseURI,
|
328
|
-
String systemId)
|
329
|
-
throws SAXException, IOException {
|
330
|
-
return ParserContext.resolveEntity(runtime, publicId, baseURI, systemId);
|
331
|
-
}
|
332
|
-
|
333
286
|
}
|
334
287
|
|
335
288
|
}
|
@@ -35,18 +35,16 @@ package nokogiri.internals;
|
|
35
35
|
import java.io.IOException;
|
36
36
|
|
37
37
|
import nokogiri.XmlDocument;
|
38
|
-
import nokogiri.internals.ParserContext.Options;
|
39
38
|
|
40
39
|
import org.apache.xerces.parsers.DOMParser;
|
41
40
|
import org.apache.xerces.parsers.XIncludeParserConfiguration;
|
42
|
-
import org.apache.xerces.xni.XMLResourceIdentifier;
|
43
|
-
import org.apache.xerces.xni.XNIException;
|
44
|
-
import org.apache.xerces.xni.parser.XMLEntityResolver;
|
45
|
-
import org.apache.xerces.xni.parser.XMLInputSource;
|
46
41
|
import org.apache.xerces.xni.parser.XMLParserConfiguration;
|
47
42
|
import org.cyberneko.dtd.DTDConfiguration;
|
48
43
|
import org.w3c.dom.Document;
|
44
|
+
import org.xml.sax.Attributes;
|
45
|
+
import org.xml.sax.ContentHandler;
|
49
46
|
import org.xml.sax.InputSource;
|
47
|
+
import org.xml.sax.Locator;
|
50
48
|
import org.xml.sax.SAXException;
|
51
49
|
|
52
50
|
/**
|
@@ -80,7 +78,7 @@ public class XmlDomParser extends DOMParser {
|
|
80
78
|
public void parse(InputSource source) throws SAXException, IOException {
|
81
79
|
dtd.reset();
|
82
80
|
if (options.xInclude) {
|
83
|
-
|
81
|
+
setEntityResolver(new NokogiriXInlcudeEntityResolver(source));
|
84
82
|
}
|
85
83
|
super.parse(source);
|
86
84
|
Document doc = getDocument();
|
@@ -68,8 +68,12 @@ import org.xml.sax.SAXException;
|
|
68
68
|
public class XmlDomParserContext extends ParserContext {
|
69
69
|
protected static final String FEATURE_LOAD_EXTERNAL_DTD =
|
70
70
|
"http://apache.org/xml/features/nonvalidating/load-external-dtd";
|
71
|
+
protected static final String FEATURE_LOAD_DTD_GRAMMAR =
|
72
|
+
"http://apache.org/xml/features/nonvalidating/load-dtd-grammar";
|
71
73
|
protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE =
|
72
74
|
"http://apache.org/xml/features/dom/include-ignorable-whitespace";
|
75
|
+
protected static final String FEATURE_NOT_EXPAND_ENTITY =
|
76
|
+
"http://apache.org/xml/features/dom/create-entity-ref-nodes";
|
73
77
|
protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
|
74
78
|
private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude";
|
75
79
|
private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager";
|
@@ -120,22 +124,18 @@ public class XmlDomParserContext extends ParserContext {
|
|
120
124
|
if (options.dtdValid) {
|
121
125
|
setFeature(FEATURE_VALIDATION, true);
|
122
126
|
}
|
127
|
+
|
128
|
+
if (!options.noEnt) {
|
129
|
+
setFeature(FEATURE_NOT_EXPAND_ENTITY, true);
|
130
|
+
}
|
123
131
|
// If we turn off loading of external DTDs complete, we don't
|
124
132
|
// getthe publicID. Instead of turning off completely, we use
|
125
133
|
// an entity resolver that returns empty documents.
|
126
134
|
if (options.dtdLoad) {
|
127
135
|
setFeature(FEATURE_LOAD_EXTERNAL_DTD, true);
|
128
|
-
|
129
|
-
} else {
|
130
|
-
parser.setEntityResolver(new EntityResolver() {
|
131
|
-
public InputSource resolveEntity(String arg0, String arg1)
|
132
|
-
throws SAXException, IOException {
|
133
|
-
ByteArrayInputStream empty =
|
134
|
-
new ByteArrayInputStream(new byte[0]);
|
135
|
-
return new InputSource(empty);
|
136
|
-
}
|
137
|
-
});
|
136
|
+
setFeature(FEATURE_LOAD_DTD_GRAMMAR, true);
|
138
137
|
}
|
138
|
+
parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options));
|
139
139
|
}
|
140
140
|
|
141
141
|
/**
|
@@ -210,8 +210,11 @@ public class XmlDomParserContext extends ParserContext {
|
|
210
210
|
xmlDocument.setEncoding(ruby_encoding);
|
211
211
|
|
212
212
|
if (options.dtdLoad) {
|
213
|
-
|
214
|
-
|
213
|
+
IRubyObject xmlDtdOrNil = XmlDtd.newFromExternalSubset(context.getRuntime(), doc);
|
214
|
+
if (!xmlDtdOrNil.isNil()) {
|
215
|
+
XmlDtd xmlDtd = (XmlDtd) xmlDtdOrNil;
|
216
|
+
doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, xmlDtd, null);
|
217
|
+
}
|
215
218
|
}
|
216
219
|
return xmlDocument;
|
217
220
|
}
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -1329,8 +1329,8 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
|
|
1329
1329
|
return DOC_RUBY_OBJECT(node->doc);
|
1330
1330
|
|
1331
1331
|
/* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
|
1332
|
-
/* see https://github.com/
|
1333
|
-
/* and https://github.com/
|
1332
|
+
/* see https://github.com/sparklemotion/nokogiri/issues/95 */
|
1333
|
+
/* and https://github.com/sparklemotion/nokogiri/issues/439 */
|
1334
1334
|
doc = node->doc;
|
1335
1335
|
if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
|
1336
1336
|
node_has_a_document = DOC_RUBY_OBJECT_TEST(doc);
|
data/lib/nokogiri/nokogiri.jar
CHANGED
Binary file
|