nokogiri 1.16.8-java → 1.17.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/README.md +4 -0
- data/dependencies.yml +6 -6
- data/ext/java/nokogiri/Html4Document.java +3 -3
- data/ext/java/nokogiri/Html4SaxParserContext.java +47 -175
- data/ext/java/nokogiri/NokogiriService.java +2 -2
- data/ext/java/nokogiri/XmlCdata.java +3 -0
- data/ext/java/nokogiri/XmlDocument.java +7 -14
- data/ext/java/nokogiri/XmlDocumentFragment.java +4 -92
- data/ext/java/nokogiri/XmlDtd.java +2 -2
- data/ext/java/nokogiri/XmlEntityReference.java +16 -12
- data/ext/java/nokogiri/XmlNode.java +26 -47
- data/ext/java/nokogiri/XmlNodeSet.java +10 -1
- data/ext/java/nokogiri/XmlSaxParserContext.java +73 -36
- data/ext/java/nokogiri/XmlSchema.java +15 -16
- data/ext/java/nokogiri/XsltStylesheet.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +3 -3
- data/ext/java/nokogiri/internals/NokogiriHandler.java +59 -15
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +1 -1
- data/ext/java/nokogiri/internals/ParserContext.java +51 -21
- data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +8 -19
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +1 -1
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +1 -1
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -11
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +5 -5
- data/ext/java/nokogiri/internals/c14n/{UtfHelpper.java → UtfHelper.java} +2 -2
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +8 -8
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +2 -2
- data/ext/nokogiri/extconf.rb +191 -137
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +130 -104
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +213 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +2 -2
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +6 -8
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- metadata +9 -5
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -287,7 +287,7 @@ public class XmlNode extends RubyObject
|
|
287
287
|
* args but not for an exact number. Any extra args will then be
|
288
288
|
* passed to 'initialize'. The way 'new' and this 'init' function
|
289
289
|
* interact means that subclasses cannot arbitrarily change the
|
290
|
-
* require
|
290
|
+
* require arguments by defining an 'initialize' method. This is
|
291
291
|
* how the C libxml wrapper works also.
|
292
292
|
*
|
293
293
|
* As written it performs initialization for a new Element with
|
@@ -407,7 +407,7 @@ public class XmlNode extends RubyObject
|
|
407
407
|
/**
|
408
408
|
* This method should be called after a node has been adopted in a new
|
409
409
|
* document. This method will ensure that the node is renamed with the
|
410
|
-
*
|
410
|
+
* appropriate NS uri. First the prefix of the node is extracted, then is
|
411
411
|
* used to lookup the namespace uri in the new document starting at the
|
412
412
|
* current node and traversing the ancestors. If the namespace uri wasn't
|
413
413
|
* empty (or null) all children and the node has attributes and/or children
|
@@ -643,12 +643,22 @@ public class XmlNode extends RubyObject
|
|
643
643
|
|
644
644
|
@JRubyMethod(name = {"attribute", "attr"})
|
645
645
|
public IRubyObject
|
646
|
-
attribute(ThreadContext context, IRubyObject
|
646
|
+
attribute(ThreadContext context, IRubyObject rbName)
|
647
647
|
{
|
648
|
-
NamedNodeMap
|
649
|
-
|
650
|
-
|
651
|
-
|
648
|
+
NamedNodeMap attributes = this.node.getAttributes();
|
649
|
+
String name = rubyStringToString(rbName);
|
650
|
+
|
651
|
+
for (int j = 0 ; j < attributes.getLength() ; j++) {
|
652
|
+
Node attribute = attributes.item(j);
|
653
|
+
String localName = attribute.getLocalName();
|
654
|
+
if (localName == null) {
|
655
|
+
continue;
|
656
|
+
}
|
657
|
+
if (localName.equals(name)) {
|
658
|
+
return getCachedNodeOrCreate(context.runtime, attribute);
|
659
|
+
}
|
660
|
+
}
|
661
|
+
return context.nil;
|
652
662
|
}
|
653
663
|
|
654
664
|
@JRubyMethod
|
@@ -966,45 +976,13 @@ public class XmlNode extends RubyObject
|
|
966
976
|
return doc;
|
967
977
|
}
|
968
978
|
|
979
|
+
@JRubyMethod(visibility = Visibility.PROTECTED)
|
969
980
|
public IRubyObject
|
970
|
-
|
971
|
-
{
|
972
|
-
return dup_implementation(getMetaClass().getClassRuntime(), true);
|
973
|
-
}
|
974
|
-
|
975
|
-
@JRubyMethod
|
976
|
-
public IRubyObject
|
977
|
-
dup(ThreadContext context)
|
978
|
-
{
|
979
|
-
return dup_implementation(context, true);
|
980
|
-
}
|
981
|
-
|
982
|
-
@JRubyMethod
|
983
|
-
public IRubyObject
|
984
|
-
dup(ThreadContext context, IRubyObject depth)
|
981
|
+
initialize_copy_with_args(ThreadContext context, IRubyObject other, IRubyObject level, IRubyObject _ignored)
|
985
982
|
{
|
986
|
-
boolean deep =
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
protected final IRubyObject
|
991
|
-
dup_implementation(ThreadContext context, boolean deep)
|
992
|
-
{
|
993
|
-
return dup_implementation(context.runtime, deep);
|
994
|
-
}
|
995
|
-
|
996
|
-
protected IRubyObject
|
997
|
-
dup_implementation(Ruby runtime, boolean deep)
|
998
|
-
{
|
999
|
-
XmlNode clone;
|
1000
|
-
try {
|
1001
|
-
clone = (XmlNode) clone();
|
1002
|
-
} catch (CloneNotSupportedException e) {
|
1003
|
-
throw runtime.newRuntimeError(e.toString());
|
1004
|
-
}
|
1005
|
-
Node newNode = node.cloneNode(deep);
|
1006
|
-
clone.node = newNode;
|
1007
|
-
return clone;
|
983
|
+
boolean deep = level instanceof RubyInteger && RubyFixnum.fix2int(level) != 0;
|
984
|
+
this.node = asXmlNode(context, other).node.cloneNode(deep);
|
985
|
+
return this;
|
1008
986
|
}
|
1009
987
|
|
1010
988
|
public static RubyString
|
@@ -1447,11 +1425,12 @@ public class XmlNode extends RubyObject
|
|
1447
1425
|
}
|
1448
1426
|
}
|
1449
1427
|
|
1450
|
-
if (uri
|
1451
|
-
element.setAttributeNS(uri, key, val);
|
1452
|
-
} else {
|
1428
|
+
if (colonIndex > 0 && uri == null) {
|
1453
1429
|
element.setAttribute(key, val);
|
1430
|
+
} else {
|
1431
|
+
element.setAttributeNS(uri, key, val);
|
1454
1432
|
}
|
1433
|
+
|
1455
1434
|
clearXpathContext(node);
|
1456
1435
|
}
|
1457
1436
|
|
@@ -15,6 +15,7 @@ import org.jruby.RubyRange;
|
|
15
15
|
import org.jruby.anno.JRubyClass;
|
16
16
|
import org.jruby.anno.JRubyMethod;
|
17
17
|
import org.jruby.runtime.ThreadContext;
|
18
|
+
import org.jruby.runtime.Visibility;
|
18
19
|
import org.jruby.runtime.builtin.IRubyObject;
|
19
20
|
import org.w3c.dom.Node;
|
20
21
|
import org.w3c.dom.NodeList;
|
@@ -201,7 +202,6 @@ public class XmlNodeSet extends RubyObject implements NodeList
|
|
201
202
|
return context.nil;
|
202
203
|
}
|
203
204
|
|
204
|
-
@JRubyMethod
|
205
205
|
public IRubyObject
|
206
206
|
dup(ThreadContext context)
|
207
207
|
{
|
@@ -210,6 +210,15 @@ public class XmlNodeSet extends RubyObject implements NodeList
|
|
210
210
|
return dup;
|
211
211
|
}
|
212
212
|
|
213
|
+
@JRubyMethod(visibility = Visibility.PROTECTED)
|
214
|
+
public IRubyObject
|
215
|
+
initialize_copy(ThreadContext context, IRubyObject other)
|
216
|
+
{
|
217
|
+
setNodes(getNodes(context, other));
|
218
|
+
initializeFrom(context, (XmlNodeSet)other);
|
219
|
+
return this;
|
220
|
+
}
|
221
|
+
|
213
222
|
@JRubyMethod(name = "include?")
|
214
223
|
public IRubyObject
|
215
224
|
include_p(ThreadContext context, IRubyObject node_or_namespace)
|
@@ -1,10 +1,14 @@
|
|
1
1
|
package nokogiri;
|
2
2
|
|
3
3
|
import nokogiri.internals.*;
|
4
|
+
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
5
|
+
|
4
6
|
import org.apache.xerces.parsers.AbstractSAXParser;
|
5
7
|
import org.jruby.Ruby;
|
6
8
|
import org.jruby.RubyClass;
|
9
|
+
import org.jruby.RubyEncoding;
|
7
10
|
import org.jruby.RubyFixnum;
|
11
|
+
import org.jruby.RubyString;
|
8
12
|
import org.jruby.anno.JRubyClass;
|
9
13
|
import org.jruby.anno.JRubyMethod;
|
10
14
|
import org.jruby.exceptions.RaiseException;
|
@@ -14,6 +18,7 @@ import org.jruby.runtime.builtin.IRubyObject;
|
|
14
18
|
import org.xml.sax.SAXException;
|
15
19
|
import org.xml.sax.SAXParseException;
|
16
20
|
|
21
|
+
import java.io.ByteArrayInputStream;
|
17
22
|
import java.io.IOException;
|
18
23
|
import java.io.InputStream;
|
19
24
|
|
@@ -43,7 +48,7 @@ public class XmlSaxParserContext extends ParserContext
|
|
43
48
|
|
44
49
|
protected NokogiriHandler handler;
|
45
50
|
protected NokogiriErrorHandler errorHandler;
|
46
|
-
private boolean replaceEntities =
|
51
|
+
private boolean replaceEntities = false;
|
47
52
|
private boolean recovery = false;
|
48
53
|
|
49
54
|
public
|
@@ -90,16 +95,26 @@ public class XmlSaxParserContext extends ParserContext
|
|
90
95
|
* Create a new parser context that will parse the string
|
91
96
|
* <code>data</code>.
|
92
97
|
*/
|
93
|
-
@JRubyMethod(name = "
|
98
|
+
@JRubyMethod(name = "native_memory", meta = true)
|
94
99
|
public static IRubyObject
|
95
|
-
parse_memory(ThreadContext context,
|
96
|
-
IRubyObject klazz,
|
97
|
-
IRubyObject data)
|
100
|
+
parse_memory(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
|
98
101
|
{
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
102
|
+
String java_encoding = null;
|
103
|
+
if (encoding != context.runtime.getNil()) {
|
104
|
+
if (!(encoding instanceof RubyEncoding)) {
|
105
|
+
throw context.runtime.newTypeError("encoding must be kind_of Encoding");
|
106
|
+
}
|
107
|
+
java_encoding = ((RubyEncoding)encoding).toString();
|
108
|
+
}
|
109
|
+
|
110
|
+
XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
|
111
|
+
ctx.initialize(context.runtime);
|
112
|
+
ctx.setStringInputSourceNoEnc(context, data, context.runtime.getNil());
|
113
|
+
|
114
|
+
if (java_encoding != null) {
|
115
|
+
ctx.getInputSource().setEncoding(java_encoding);
|
116
|
+
}
|
117
|
+
|
103
118
|
return ctx;
|
104
119
|
}
|
105
120
|
|
@@ -107,16 +122,26 @@ public class XmlSaxParserContext extends ParserContext
|
|
107
122
|
* Create a new parser context that will read from the file
|
108
123
|
* <code>data</code> and parse.
|
109
124
|
*/
|
110
|
-
@JRubyMethod(name = "
|
125
|
+
@JRubyMethod(name = "native_file", meta = true)
|
111
126
|
public static IRubyObject
|
112
|
-
parse_file(ThreadContext context,
|
113
|
-
IRubyObject klazz,
|
114
|
-
IRubyObject data)
|
127
|
+
parse_file(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
|
115
128
|
{
|
116
|
-
|
117
|
-
|
118
|
-
|
129
|
+
String java_encoding = null;
|
130
|
+
if (encoding != context.runtime.getNil()) {
|
131
|
+
if (!(encoding instanceof RubyEncoding)) {
|
132
|
+
throw context.runtime.newTypeError("encoding must be kind_of Encoding");
|
133
|
+
}
|
134
|
+
java_encoding = ((RubyEncoding)encoding).toString();
|
135
|
+
}
|
136
|
+
|
137
|
+
XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
|
138
|
+
ctx.initialize(context.runtime);
|
119
139
|
ctx.setInputSourceFile(context, data);
|
140
|
+
|
141
|
+
if (java_encoding != null) {
|
142
|
+
ctx.getInputSource().setEncoding(java_encoding);
|
143
|
+
}
|
144
|
+
|
120
145
|
return ctx;
|
121
146
|
}
|
122
147
|
|
@@ -126,21 +151,30 @@ public class XmlSaxParserContext extends ParserContext
|
|
126
151
|
*
|
127
152
|
* TODO: Currently ignores encoding <code>enc</code>.
|
128
153
|
*/
|
129
|
-
@JRubyMethod(name = "
|
154
|
+
@JRubyMethod(name = "native_io", meta = true)
|
130
155
|
public static IRubyObject
|
131
|
-
parse_io(ThreadContext context,
|
132
|
-
IRubyObject klazz,
|
133
|
-
IRubyObject data,
|
134
|
-
IRubyObject encoding)
|
156
|
+
parse_io(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
|
135
157
|
{
|
136
|
-
|
137
|
-
|
138
|
-
throw context.getRuntime().newTypeError("encoding must be kind_of String");
|
158
|
+
if (!invoke(context, data, "respond_to?", context.runtime.newSymbol("read")).isTrue()) {
|
159
|
+
throw context.runtime.newTypeError("argument expected to respond to :read");
|
139
160
|
}
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
161
|
+
|
162
|
+
String java_encoding = null;
|
163
|
+
if (encoding != context.runtime.getNil()) {
|
164
|
+
if (!(encoding instanceof RubyEncoding)) {
|
165
|
+
throw context.runtime.newTypeError("encoding must be kind_of Encoding");
|
166
|
+
}
|
167
|
+
java_encoding = ((RubyEncoding)encoding).toString();
|
168
|
+
}
|
169
|
+
|
170
|
+
XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
|
171
|
+
ctx.initialize(context.runtime);
|
172
|
+
ctx.setIOInputSource(context, data, context.runtime.getNil());
|
173
|
+
|
174
|
+
if (java_encoding != null) {
|
175
|
+
ctx.getInputSource().setEncoding(java_encoding);
|
176
|
+
}
|
177
|
+
|
144
178
|
return ctx;
|
145
179
|
}
|
146
180
|
|
@@ -222,9 +256,12 @@ public class XmlSaxParserContext extends ParserContext
|
|
222
256
|
|
223
257
|
/* TODO: how should we pass in parse options? */
|
224
258
|
ParserContext.Options options = defaultParseOptions(context);
|
259
|
+
if (replaceEntities) {
|
260
|
+
options.noEnt = true;
|
261
|
+
}
|
225
262
|
|
226
263
|
errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning);
|
227
|
-
handler = new NokogiriHandler(runtime, handlerRuby, errorHandler);
|
264
|
+
handler = new NokogiriHandler(runtime, handlerRuby, errorHandler, options.noEnt);
|
228
265
|
|
229
266
|
preParse(runtime, handlerRuby, handler);
|
230
267
|
parser.setContentHandler(handler);
|
@@ -233,6 +270,7 @@ public class XmlSaxParserContext extends ParserContext
|
|
233
270
|
|
234
271
|
try {
|
235
272
|
parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
|
273
|
+
parser.setProperty("http://xml.org/sax/properties/declaration-handler", handler);
|
236
274
|
} catch (Exception ex) {
|
237
275
|
throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
|
238
276
|
}
|
@@ -241,16 +279,15 @@ public class XmlSaxParserContext extends ParserContext
|
|
241
279
|
try {
|
242
280
|
do_parse();
|
243
281
|
} catch (SAXParseException ex) {
|
244
|
-
//
|
245
|
-
//
|
246
|
-
|
247
|
-
// However, an EMPTY document should raise a RuntimeError.
|
248
|
-
// This is a bit kludgy, but AFAIK SAX doesn't distinguish
|
249
|
-
// between empty and bad whereas Nokogiri does.
|
282
|
+
// An EMPTY document should raise a RuntimeError. This is a bit kludgy, but AFAIK SAX
|
283
|
+
// doesn't distinguish between empty and bad whereas Nokogiri does.
|
250
284
|
String message = ex.getMessage();
|
251
285
|
if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) {
|
252
|
-
throw runtime.newRuntimeError("
|
286
|
+
throw runtime.newRuntimeError("input string cannot be empty");
|
253
287
|
}
|
288
|
+
|
289
|
+
// A bad document (<foo><bar></foo>) should call the
|
290
|
+
// error handler instead of raising a SAX exception.
|
254
291
|
handler.error(ex);
|
255
292
|
}
|
256
293
|
} catch (SAXException ex) {
|
@@ -175,19 +175,6 @@ public class XmlSchema extends RubyObject
|
|
175
175
|
return getSchema(context, (RubyClass)klazz, source, parseOptions);
|
176
176
|
}
|
177
177
|
|
178
|
-
@JRubyMethod(meta = true, required = 1, optional = 1)
|
179
|
-
public static IRubyObject
|
180
|
-
read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
|
181
|
-
{
|
182
|
-
IRubyObject content = args[0];
|
183
|
-
IRubyObject parseOptions = null;
|
184
|
-
if (args.length > 1) {
|
185
|
-
parseOptions = args[1];
|
186
|
-
}
|
187
|
-
String data = content.convertToString().asJavaString();
|
188
|
-
return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions);
|
189
|
-
}
|
190
|
-
|
191
178
|
private static IRubyObject
|
192
179
|
getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions)
|
193
180
|
{
|
@@ -215,14 +202,22 @@ public class XmlSchema extends RubyObject
|
|
215
202
|
|
216
203
|
XmlDomParserContext ctx = new XmlDomParserContext(runtime, RubyFixnum.newFixnum(runtime, 1L));
|
217
204
|
ctx.setInputSourceFile(context, file);
|
218
|
-
|
219
|
-
|
205
|
+
try {
|
206
|
+
XmlDocument xmlDocument = ctx.parse(context, getNokogiriClass(runtime, "Nokogiri::XML::Document"), context.nil);
|
207
|
+
return validate_document_or_file(context, xmlDocument);
|
208
|
+
} catch (Exception ex) {
|
209
|
+
RubyArray errors = (RubyArray)context.runtime.newEmptyArray();
|
210
|
+
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
|
211
|
+
xmlSyntaxError.setException(ex);
|
212
|
+
errors.append(xmlSyntaxError);
|
213
|
+
return errors;
|
214
|
+
}
|
220
215
|
}
|
221
216
|
|
222
217
|
IRubyObject
|
223
218
|
validate_document_or_file(ThreadContext context, XmlDocument xmlDocument)
|
224
219
|
{
|
225
|
-
RubyArray
|
220
|
+
RubyArray errors = context.runtime.newEmptyArray();
|
226
221
|
ErrorHandler errorHandler = new SchemaErrorHandler(context.runtime, errors);
|
227
222
|
setErrorHandler(errorHandler);
|
228
223
|
|
@@ -248,6 +243,10 @@ public class XmlSchema extends RubyObject
|
|
248
243
|
protected void
|
249
244
|
validate(Document document) throws SAXException, IOException
|
250
245
|
{
|
246
|
+
if (document.getDocumentElement() == null) {
|
247
|
+
throw new SAXException("Document is empty");
|
248
|
+
}
|
249
|
+
|
251
250
|
DOMSource docSource = new DOMSource(document);
|
252
251
|
validator.validate(docSource);
|
253
252
|
}
|
@@ -135,7 +135,7 @@ public class XsltStylesheet extends RubyObject
|
|
135
135
|
XmlDocument xmlDoc = (XmlDocument) args[0];
|
136
136
|
ensureDocumentHasNoError(context, xmlDoc);
|
137
137
|
|
138
|
-
Document doc = ((XmlDocument)
|
138
|
+
Document doc = ((XmlDocument)xmlDoc.callMethod(context, "dup", runtime.newFixnum(1))).getDocument();
|
139
139
|
|
140
140
|
XsltStylesheet xslt =
|
141
141
|
(XsltStylesheet) NokogiriService.XSLT_STYLESHEET_ALLOCATOR.allocate(runtime, (RubyClass)klazz);
|
@@ -87,7 +87,7 @@ public class NokogiriBlockingQueueInputStream extends InputStream
|
|
87
87
|
|
88
88
|
/**
|
89
89
|
* This method shouldn't be called unless the parser has finished parsing or
|
90
|
-
* threw an exception while doing so, otherwise, there'll be the
|
90
|
+
* threw an exception while doing so, otherwise, there'll be the potential
|
91
91
|
* that the read method will block indefinitely.
|
92
92
|
*/
|
93
93
|
@Override
|
@@ -66,7 +66,7 @@ public class NokogiriDomParser extends DOMParser
|
|
66
66
|
{
|
67
67
|
dtd.reset();
|
68
68
|
if (xInclude) {
|
69
|
-
setEntityResolver(new
|
69
|
+
setEntityResolver(new NokogiriXIncludeEntityResolver(source));
|
70
70
|
}
|
71
71
|
super.parse(source);
|
72
72
|
Document doc = getDocument();
|
@@ -77,11 +77,11 @@ public class NokogiriDomParser extends DOMParser
|
|
77
77
|
doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null);
|
78
78
|
}
|
79
79
|
|
80
|
-
private static class
|
80
|
+
private static class NokogiriXIncludeEntityResolver implements org.xml.sax.EntityResolver
|
81
81
|
{
|
82
82
|
InputSource source;
|
83
83
|
private
|
84
|
-
|
84
|
+
NokogiriXIncludeEntityResolver(InputSource source)
|
85
85
|
{
|
86
86
|
this.source = source;
|
87
87
|
}
|
@@ -42,27 +42,26 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
42
42
|
|
43
43
|
private Locator locator;
|
44
44
|
private boolean needEmptyAttrCheck;
|
45
|
+
private boolean replaceEntities;
|
46
|
+
private Set<String> entities = new HashSet<String>();
|
45
47
|
|
46
48
|
public
|
47
|
-
NokogiriHandler(Ruby runtime,
|
49
|
+
NokogiriHandler(Ruby runtime,
|
50
|
+
IRubyObject object,
|
51
|
+
NokogiriErrorHandler errorHandler,
|
52
|
+
boolean replaceEntities)
|
48
53
|
{
|
49
54
|
assert object != null;
|
50
55
|
this.runtime = runtime;
|
51
56
|
this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute");
|
52
57
|
this.object = object;
|
53
58
|
this.errorHandler = errorHandler;
|
59
|
+
this.replaceEntities = replaceEntities;
|
54
60
|
charactersBuilder = new StringBuilder();
|
55
61
|
String objectName = object.getMetaClass().getName();
|
56
62
|
if ("Nokogiri::HTML4::SAX::Parser".equals(objectName)) { needEmptyAttrCheck = true; }
|
57
63
|
}
|
58
64
|
|
59
|
-
@Override
|
60
|
-
public void
|
61
|
-
skippedEntity(String skippedEntity)
|
62
|
-
{
|
63
|
-
call("error", runtime.newString("Entity '" + skippedEntity + "' not defined\n"));
|
64
|
-
}
|
65
|
-
|
66
65
|
@Override
|
67
66
|
public void
|
68
67
|
setDocumentLocator(Locator locator)
|
@@ -88,7 +87,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
88
87
|
public void
|
89
88
|
endDocument()
|
90
89
|
{
|
91
|
-
|
90
|
+
flushCharacters();
|
92
91
|
call("end_document");
|
93
92
|
}
|
94
93
|
|
@@ -161,12 +160,12 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
161
160
|
}
|
162
161
|
|
163
162
|
if (localName == null || localName.isEmpty()) { localName = getLocalPart(qName); }
|
164
|
-
|
163
|
+
flushCharacters();
|
165
164
|
call("start_element_namespace",
|
166
165
|
stringOrNil(runtime, localName),
|
167
166
|
rubyAttr,
|
168
167
|
stringOrNil(runtime, getPrefix(qName)),
|
169
|
-
stringOrNil(runtime, uri),
|
168
|
+
uri.length() > 0 ? stringOrNil(runtime, uri) : runtime.getNil(),
|
170
169
|
rubyNSAttr);
|
171
170
|
}
|
172
171
|
|
@@ -204,7 +203,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
204
203
|
public void
|
205
204
|
endElement(String uri, String localName, String qName)
|
206
205
|
{
|
207
|
-
|
206
|
+
flushCharacters();
|
208
207
|
call("end_element_namespace",
|
209
208
|
stringOrNil(runtime, localName),
|
210
209
|
stringOrNil(runtime, getPrefix(qName)),
|
@@ -218,11 +217,56 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
218
217
|
charactersBuilder.append(ch, start, length);
|
219
218
|
}
|
220
219
|
|
220
|
+
@Override
|
221
|
+
public void
|
222
|
+
externalEntityDecl(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
|
223
|
+
throws SAXException
|
224
|
+
{
|
225
|
+
entities.add(name);
|
226
|
+
}
|
227
|
+
|
228
|
+
@Override
|
229
|
+
public void
|
230
|
+
internalEntityDecl(java.lang.String name, java.lang.String value)
|
231
|
+
throws SAXException
|
232
|
+
{
|
233
|
+
entities.add(name);
|
234
|
+
}
|
235
|
+
|
236
|
+
@Override
|
237
|
+
public void
|
238
|
+
skippedEntity(String name)
|
239
|
+
{
|
240
|
+
call("error", runtime.newString("Entity '" + name + "' not defined\n"));
|
241
|
+
if (!replaceEntities) {
|
242
|
+
call("reference", runtime.newString(name), runtime.getNil());
|
243
|
+
}
|
244
|
+
}
|
245
|
+
|
246
|
+
@Override
|
247
|
+
public void
|
248
|
+
startEntity(String name)
|
249
|
+
{
|
250
|
+
flushCharacters();
|
251
|
+
}
|
252
|
+
|
253
|
+
@Override
|
254
|
+
public void
|
255
|
+
endEntity(String name)
|
256
|
+
{
|
257
|
+
IRubyObject content = charactersBuilder.length() > 0 ? runtime.newString(charactersBuilder.toString()) :
|
258
|
+
runtime.getNil();
|
259
|
+
if (!replaceEntities && entities.contains(name)) {
|
260
|
+
call("reference", runtime.newString(name), content);
|
261
|
+
}
|
262
|
+
flushCharacters();
|
263
|
+
}
|
264
|
+
|
221
265
|
@Override
|
222
266
|
public void
|
223
267
|
comment(char[] ch, int start, int length)
|
224
268
|
{
|
225
|
-
|
269
|
+
flushCharacters();
|
226
270
|
call("comment", runtime.newString(new String(ch, start, length)));
|
227
271
|
}
|
228
272
|
|
@@ -230,7 +274,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
230
274
|
public void
|
231
275
|
startCDATA()
|
232
276
|
{
|
233
|
-
|
277
|
+
flushCharacters();
|
234
278
|
}
|
235
279
|
|
236
280
|
@Override
|
@@ -329,7 +373,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
329
373
|
}
|
330
374
|
|
331
375
|
protected void
|
332
|
-
|
376
|
+
flushCharacters()
|
333
377
|
{
|
334
378
|
if (charactersBuilder.length() > 0) {
|
335
379
|
call("characters", runtime.newString(charactersBuilder.toString()));
|
@@ -724,7 +724,7 @@ public class NokogiriHelpers
|
|
724
724
|
convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
|
725
725
|
{
|
726
726
|
if (!(doc instanceof Html4Document)) { return str; }
|
727
|
-
String parsed_encoding = ((Html4Document)doc).
|
727
|
+
String parsed_encoding = ((Html4Document)doc).getParsedEncoding();
|
728
728
|
if (parsed_encoding == null) { return str; }
|
729
729
|
String ruby_encoding = rubyStringToString(doc.getEncoding());
|
730
730
|
if (ruby_encoding == null) { return str; }
|