nokogiri 1.16.8-java → 1.17.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/README.md +4 -0
- data/dependencies.yml +6 -6
- data/ext/java/nokogiri/Html4Document.java +3 -3
- data/ext/java/nokogiri/Html4SaxParserContext.java +47 -175
- data/ext/java/nokogiri/NokogiriService.java +2 -2
- data/ext/java/nokogiri/XmlCdata.java +3 -0
- data/ext/java/nokogiri/XmlDocument.java +7 -14
- data/ext/java/nokogiri/XmlDocumentFragment.java +4 -92
- data/ext/java/nokogiri/XmlDtd.java +2 -2
- data/ext/java/nokogiri/XmlEntityReference.java +16 -12
- data/ext/java/nokogiri/XmlNode.java +26 -47
- data/ext/java/nokogiri/XmlNodeSet.java +10 -1
- data/ext/java/nokogiri/XmlSaxParserContext.java +73 -36
- data/ext/java/nokogiri/XmlSchema.java +15 -16
- data/ext/java/nokogiri/XsltStylesheet.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +3 -3
- data/ext/java/nokogiri/internals/NokogiriHandler.java +59 -15
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +1 -1
- data/ext/java/nokogiri/internals/ParserContext.java +51 -21
- data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +8 -19
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +1 -1
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +1 -1
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -11
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +5 -5
- data/ext/java/nokogiri/internals/c14n/{UtfHelpper.java → UtfHelper.java} +2 -2
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +8 -8
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +2 -2
- data/ext/nokogiri/extconf.rb +191 -137
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +130 -104
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +213 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +2 -2
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +6 -8
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- metadata +9 -5
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -287,7 +287,7 @@ public class XmlNode extends RubyObject
|
|
287
287
|
* args but not for an exact number. Any extra args will then be
|
288
288
|
* passed to 'initialize'. The way 'new' and this 'init' function
|
289
289
|
* interact means that subclasses cannot arbitrarily change the
|
290
|
-
* require
|
290
|
+
* require arguments by defining an 'initialize' method. This is
|
291
291
|
* how the C libxml wrapper works also.
|
292
292
|
*
|
293
293
|
* As written it performs initialization for a new Element with
|
@@ -407,7 +407,7 @@ public class XmlNode extends RubyObject
|
|
407
407
|
/**
|
408
408
|
* This method should be called after a node has been adopted in a new
|
409
409
|
* document. This method will ensure that the node is renamed with the
|
410
|
-
*
|
410
|
+
* appropriate NS uri. First the prefix of the node is extracted, then is
|
411
411
|
* used to lookup the namespace uri in the new document starting at the
|
412
412
|
* current node and traversing the ancestors. If the namespace uri wasn't
|
413
413
|
* empty (or null) all children and the node has attributes and/or children
|
@@ -643,12 +643,22 @@ public class XmlNode extends RubyObject
|
|
643
643
|
|
644
644
|
@JRubyMethod(name = {"attribute", "attr"})
|
645
645
|
public IRubyObject
|
646
|
-
attribute(ThreadContext context, IRubyObject
|
646
|
+
attribute(ThreadContext context, IRubyObject rbName)
|
647
647
|
{
|
648
|
-
NamedNodeMap
|
649
|
-
|
650
|
-
|
651
|
-
|
648
|
+
NamedNodeMap attributes = this.node.getAttributes();
|
649
|
+
String name = rubyStringToString(rbName);
|
650
|
+
|
651
|
+
for (int j = 0 ; j < attributes.getLength() ; j++) {
|
652
|
+
Node attribute = attributes.item(j);
|
653
|
+
String localName = attribute.getLocalName();
|
654
|
+
if (localName == null) {
|
655
|
+
continue;
|
656
|
+
}
|
657
|
+
if (localName.equals(name)) {
|
658
|
+
return getCachedNodeOrCreate(context.runtime, attribute);
|
659
|
+
}
|
660
|
+
}
|
661
|
+
return context.nil;
|
652
662
|
}
|
653
663
|
|
654
664
|
@JRubyMethod
|
@@ -966,45 +976,13 @@ public class XmlNode extends RubyObject
|
|
966
976
|
return doc;
|
967
977
|
}
|
968
978
|
|
979
|
+
@JRubyMethod(visibility = Visibility.PROTECTED)
|
969
980
|
public IRubyObject
|
970
|
-
|
971
|
-
{
|
972
|
-
return dup_implementation(getMetaClass().getClassRuntime(), true);
|
973
|
-
}
|
974
|
-
|
975
|
-
@JRubyMethod
|
976
|
-
public IRubyObject
|
977
|
-
dup(ThreadContext context)
|
978
|
-
{
|
979
|
-
return dup_implementation(context, true);
|
980
|
-
}
|
981
|
-
|
982
|
-
@JRubyMethod
|
983
|
-
public IRubyObject
|
984
|
-
dup(ThreadContext context, IRubyObject depth)
|
981
|
+
initialize_copy_with_args(ThreadContext context, IRubyObject other, IRubyObject level, IRubyObject _ignored)
|
985
982
|
{
|
986
|
-
boolean deep =
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
protected final IRubyObject
|
991
|
-
dup_implementation(ThreadContext context, boolean deep)
|
992
|
-
{
|
993
|
-
return dup_implementation(context.runtime, deep);
|
994
|
-
}
|
995
|
-
|
996
|
-
protected IRubyObject
|
997
|
-
dup_implementation(Ruby runtime, boolean deep)
|
998
|
-
{
|
999
|
-
XmlNode clone;
|
1000
|
-
try {
|
1001
|
-
clone = (XmlNode) clone();
|
1002
|
-
} catch (CloneNotSupportedException e) {
|
1003
|
-
throw runtime.newRuntimeError(e.toString());
|
1004
|
-
}
|
1005
|
-
Node newNode = node.cloneNode(deep);
|
1006
|
-
clone.node = newNode;
|
1007
|
-
return clone;
|
983
|
+
boolean deep = level instanceof RubyInteger && RubyFixnum.fix2int(level) != 0;
|
984
|
+
this.node = asXmlNode(context, other).node.cloneNode(deep);
|
985
|
+
return this;
|
1008
986
|
}
|
1009
987
|
|
1010
988
|
public static RubyString
|
@@ -1447,11 +1425,12 @@ public class XmlNode extends RubyObject
|
|
1447
1425
|
}
|
1448
1426
|
}
|
1449
1427
|
|
1450
|
-
if (uri
|
1451
|
-
element.setAttributeNS(uri, key, val);
|
1452
|
-
} else {
|
1428
|
+
if (colonIndex > 0 && uri == null) {
|
1453
1429
|
element.setAttribute(key, val);
|
1430
|
+
} else {
|
1431
|
+
element.setAttributeNS(uri, key, val);
|
1454
1432
|
}
|
1433
|
+
|
1455
1434
|
clearXpathContext(node);
|
1456
1435
|
}
|
1457
1436
|
|
@@ -15,6 +15,7 @@ import org.jruby.RubyRange;
|
|
15
15
|
import org.jruby.anno.JRubyClass;
|
16
16
|
import org.jruby.anno.JRubyMethod;
|
17
17
|
import org.jruby.runtime.ThreadContext;
|
18
|
+
import org.jruby.runtime.Visibility;
|
18
19
|
import org.jruby.runtime.builtin.IRubyObject;
|
19
20
|
import org.w3c.dom.Node;
|
20
21
|
import org.w3c.dom.NodeList;
|
@@ -201,7 +202,6 @@ public class XmlNodeSet extends RubyObject implements NodeList
|
|
201
202
|
return context.nil;
|
202
203
|
}
|
203
204
|
|
204
|
-
@JRubyMethod
|
205
205
|
public IRubyObject
|
206
206
|
dup(ThreadContext context)
|
207
207
|
{
|
@@ -210,6 +210,15 @@ public class XmlNodeSet extends RubyObject implements NodeList
|
|
210
210
|
return dup;
|
211
211
|
}
|
212
212
|
|
213
|
+
@JRubyMethod(visibility = Visibility.PROTECTED)
|
214
|
+
public IRubyObject
|
215
|
+
initialize_copy(ThreadContext context, IRubyObject other)
|
216
|
+
{
|
217
|
+
setNodes(getNodes(context, other));
|
218
|
+
initializeFrom(context, (XmlNodeSet)other);
|
219
|
+
return this;
|
220
|
+
}
|
221
|
+
|
213
222
|
@JRubyMethod(name = "include?")
|
214
223
|
public IRubyObject
|
215
224
|
include_p(ThreadContext context, IRubyObject node_or_namespace)
|
@@ -1,10 +1,14 @@
|
|
1
1
|
package nokogiri;
|
2
2
|
|
3
3
|
import nokogiri.internals.*;
|
4
|
+
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
5
|
+
|
4
6
|
import org.apache.xerces.parsers.AbstractSAXParser;
|
5
7
|
import org.jruby.Ruby;
|
6
8
|
import org.jruby.RubyClass;
|
9
|
+
import org.jruby.RubyEncoding;
|
7
10
|
import org.jruby.RubyFixnum;
|
11
|
+
import org.jruby.RubyString;
|
8
12
|
import org.jruby.anno.JRubyClass;
|
9
13
|
import org.jruby.anno.JRubyMethod;
|
10
14
|
import org.jruby.exceptions.RaiseException;
|
@@ -14,6 +18,7 @@ import org.jruby.runtime.builtin.IRubyObject;
|
|
14
18
|
import org.xml.sax.SAXException;
|
15
19
|
import org.xml.sax.SAXParseException;
|
16
20
|
|
21
|
+
import java.io.ByteArrayInputStream;
|
17
22
|
import java.io.IOException;
|
18
23
|
import java.io.InputStream;
|
19
24
|
|
@@ -43,7 +48,7 @@ public class XmlSaxParserContext extends ParserContext
|
|
43
48
|
|
44
49
|
protected NokogiriHandler handler;
|
45
50
|
protected NokogiriErrorHandler errorHandler;
|
46
|
-
private boolean replaceEntities =
|
51
|
+
private boolean replaceEntities = false;
|
47
52
|
private boolean recovery = false;
|
48
53
|
|
49
54
|
public
|
@@ -90,16 +95,26 @@ public class XmlSaxParserContext extends ParserContext
|
|
90
95
|
* Create a new parser context that will parse the string
|
91
96
|
* <code>data</code>.
|
92
97
|
*/
|
93
|
-
@JRubyMethod(name = "
|
98
|
+
@JRubyMethod(name = "native_memory", meta = true)
|
94
99
|
public static IRubyObject
|
95
|
-
parse_memory(ThreadContext context,
|
96
|
-
IRubyObject klazz,
|
97
|
-
IRubyObject data)
|
100
|
+
parse_memory(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
|
98
101
|
{
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
102
|
+
String java_encoding = null;
|
103
|
+
if (encoding != context.runtime.getNil()) {
|
104
|
+
if (!(encoding instanceof RubyEncoding)) {
|
105
|
+
throw context.runtime.newTypeError("encoding must be kind_of Encoding");
|
106
|
+
}
|
107
|
+
java_encoding = ((RubyEncoding)encoding).toString();
|
108
|
+
}
|
109
|
+
|
110
|
+
XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
|
111
|
+
ctx.initialize(context.runtime);
|
112
|
+
ctx.setStringInputSourceNoEnc(context, data, context.runtime.getNil());
|
113
|
+
|
114
|
+
if (java_encoding != null) {
|
115
|
+
ctx.getInputSource().setEncoding(java_encoding);
|
116
|
+
}
|
117
|
+
|
103
118
|
return ctx;
|
104
119
|
}
|
105
120
|
|
@@ -107,16 +122,26 @@ public class XmlSaxParserContext extends ParserContext
|
|
107
122
|
* Create a new parser context that will read from the file
|
108
123
|
* <code>data</code> and parse.
|
109
124
|
*/
|
110
|
-
@JRubyMethod(name = "
|
125
|
+
@JRubyMethod(name = "native_file", meta = true)
|
111
126
|
public static IRubyObject
|
112
|
-
parse_file(ThreadContext context,
|
113
|
-
IRubyObject klazz,
|
114
|
-
IRubyObject data)
|
127
|
+
parse_file(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
|
115
128
|
{
|
116
|
-
|
117
|
-
|
118
|
-
|
129
|
+
String java_encoding = null;
|
130
|
+
if (encoding != context.runtime.getNil()) {
|
131
|
+
if (!(encoding instanceof RubyEncoding)) {
|
132
|
+
throw context.runtime.newTypeError("encoding must be kind_of Encoding");
|
133
|
+
}
|
134
|
+
java_encoding = ((RubyEncoding)encoding).toString();
|
135
|
+
}
|
136
|
+
|
137
|
+
XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
|
138
|
+
ctx.initialize(context.runtime);
|
119
139
|
ctx.setInputSourceFile(context, data);
|
140
|
+
|
141
|
+
if (java_encoding != null) {
|
142
|
+
ctx.getInputSource().setEncoding(java_encoding);
|
143
|
+
}
|
144
|
+
|
120
145
|
return ctx;
|
121
146
|
}
|
122
147
|
|
@@ -126,21 +151,30 @@ public class XmlSaxParserContext extends ParserContext
|
|
126
151
|
*
|
127
152
|
* TODO: Currently ignores encoding <code>enc</code>.
|
128
153
|
*/
|
129
|
-
@JRubyMethod(name = "
|
154
|
+
@JRubyMethod(name = "native_io", meta = true)
|
130
155
|
public static IRubyObject
|
131
|
-
parse_io(ThreadContext context,
|
132
|
-
IRubyObject klazz,
|
133
|
-
IRubyObject data,
|
134
|
-
IRubyObject encoding)
|
156
|
+
parse_io(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
|
135
157
|
{
|
136
|
-
|
137
|
-
|
138
|
-
throw context.getRuntime().newTypeError("encoding must be kind_of String");
|
158
|
+
if (!invoke(context, data, "respond_to?", context.runtime.newSymbol("read")).isTrue()) {
|
159
|
+
throw context.runtime.newTypeError("argument expected to respond to :read");
|
139
160
|
}
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
161
|
+
|
162
|
+
String java_encoding = null;
|
163
|
+
if (encoding != context.runtime.getNil()) {
|
164
|
+
if (!(encoding instanceof RubyEncoding)) {
|
165
|
+
throw context.runtime.newTypeError("encoding must be kind_of Encoding");
|
166
|
+
}
|
167
|
+
java_encoding = ((RubyEncoding)encoding).toString();
|
168
|
+
}
|
169
|
+
|
170
|
+
XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
|
171
|
+
ctx.initialize(context.runtime);
|
172
|
+
ctx.setIOInputSource(context, data, context.runtime.getNil());
|
173
|
+
|
174
|
+
if (java_encoding != null) {
|
175
|
+
ctx.getInputSource().setEncoding(java_encoding);
|
176
|
+
}
|
177
|
+
|
144
178
|
return ctx;
|
145
179
|
}
|
146
180
|
|
@@ -222,9 +256,12 @@ public class XmlSaxParserContext extends ParserContext
|
|
222
256
|
|
223
257
|
/* TODO: how should we pass in parse options? */
|
224
258
|
ParserContext.Options options = defaultParseOptions(context);
|
259
|
+
if (replaceEntities) {
|
260
|
+
options.noEnt = true;
|
261
|
+
}
|
225
262
|
|
226
263
|
errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning);
|
227
|
-
handler = new NokogiriHandler(runtime, handlerRuby, errorHandler);
|
264
|
+
handler = new NokogiriHandler(runtime, handlerRuby, errorHandler, options.noEnt);
|
228
265
|
|
229
266
|
preParse(runtime, handlerRuby, handler);
|
230
267
|
parser.setContentHandler(handler);
|
@@ -233,6 +270,7 @@ public class XmlSaxParserContext extends ParserContext
|
|
233
270
|
|
234
271
|
try {
|
235
272
|
parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
|
273
|
+
parser.setProperty("http://xml.org/sax/properties/declaration-handler", handler);
|
236
274
|
} catch (Exception ex) {
|
237
275
|
throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
|
238
276
|
}
|
@@ -241,16 +279,15 @@ public class XmlSaxParserContext extends ParserContext
|
|
241
279
|
try {
|
242
280
|
do_parse();
|
243
281
|
} catch (SAXParseException ex) {
|
244
|
-
//
|
245
|
-
//
|
246
|
-
|
247
|
-
// However, an EMPTY document should raise a RuntimeError.
|
248
|
-
// This is a bit kludgy, but AFAIK SAX doesn't distinguish
|
249
|
-
// between empty and bad whereas Nokogiri does.
|
282
|
+
// An EMPTY document should raise a RuntimeError. This is a bit kludgy, but AFAIK SAX
|
283
|
+
// doesn't distinguish between empty and bad whereas Nokogiri does.
|
250
284
|
String message = ex.getMessage();
|
251
285
|
if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) {
|
252
|
-
throw runtime.newRuntimeError("
|
286
|
+
throw runtime.newRuntimeError("input string cannot be empty");
|
253
287
|
}
|
288
|
+
|
289
|
+
// A bad document (<foo><bar></foo>) should call the
|
290
|
+
// error handler instead of raising a SAX exception.
|
254
291
|
handler.error(ex);
|
255
292
|
}
|
256
293
|
} catch (SAXException ex) {
|
@@ -175,19 +175,6 @@ public class XmlSchema extends RubyObject
|
|
175
175
|
return getSchema(context, (RubyClass)klazz, source, parseOptions);
|
176
176
|
}
|
177
177
|
|
178
|
-
@JRubyMethod(meta = true, required = 1, optional = 1)
|
179
|
-
public static IRubyObject
|
180
|
-
read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
|
181
|
-
{
|
182
|
-
IRubyObject content = args[0];
|
183
|
-
IRubyObject parseOptions = null;
|
184
|
-
if (args.length > 1) {
|
185
|
-
parseOptions = args[1];
|
186
|
-
}
|
187
|
-
String data = content.convertToString().asJavaString();
|
188
|
-
return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions);
|
189
|
-
}
|
190
|
-
|
191
178
|
private static IRubyObject
|
192
179
|
getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions)
|
193
180
|
{
|
@@ -215,14 +202,22 @@ public class XmlSchema extends RubyObject
|
|
215
202
|
|
216
203
|
XmlDomParserContext ctx = new XmlDomParserContext(runtime, RubyFixnum.newFixnum(runtime, 1L));
|
217
204
|
ctx.setInputSourceFile(context, file);
|
218
|
-
|
219
|
-
|
205
|
+
try {
|
206
|
+
XmlDocument xmlDocument = ctx.parse(context, getNokogiriClass(runtime, "Nokogiri::XML::Document"), context.nil);
|
207
|
+
return validate_document_or_file(context, xmlDocument);
|
208
|
+
} catch (Exception ex) {
|
209
|
+
RubyArray errors = (RubyArray)context.runtime.newEmptyArray();
|
210
|
+
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
|
211
|
+
xmlSyntaxError.setException(ex);
|
212
|
+
errors.append(xmlSyntaxError);
|
213
|
+
return errors;
|
214
|
+
}
|
220
215
|
}
|
221
216
|
|
222
217
|
IRubyObject
|
223
218
|
validate_document_or_file(ThreadContext context, XmlDocument xmlDocument)
|
224
219
|
{
|
225
|
-
RubyArray
|
220
|
+
RubyArray errors = context.runtime.newEmptyArray();
|
226
221
|
ErrorHandler errorHandler = new SchemaErrorHandler(context.runtime, errors);
|
227
222
|
setErrorHandler(errorHandler);
|
228
223
|
|
@@ -248,6 +243,10 @@ public class XmlSchema extends RubyObject
|
|
248
243
|
protected void
|
249
244
|
validate(Document document) throws SAXException, IOException
|
250
245
|
{
|
246
|
+
if (document.getDocumentElement() == null) {
|
247
|
+
throw new SAXException("Document is empty");
|
248
|
+
}
|
249
|
+
|
251
250
|
DOMSource docSource = new DOMSource(document);
|
252
251
|
validator.validate(docSource);
|
253
252
|
}
|
@@ -135,7 +135,7 @@ public class XsltStylesheet extends RubyObject
|
|
135
135
|
XmlDocument xmlDoc = (XmlDocument) args[0];
|
136
136
|
ensureDocumentHasNoError(context, xmlDoc);
|
137
137
|
|
138
|
-
Document doc = ((XmlDocument)
|
138
|
+
Document doc = ((XmlDocument)xmlDoc.callMethod(context, "dup", runtime.newFixnum(1))).getDocument();
|
139
139
|
|
140
140
|
XsltStylesheet xslt =
|
141
141
|
(XsltStylesheet) NokogiriService.XSLT_STYLESHEET_ALLOCATOR.allocate(runtime, (RubyClass)klazz);
|
@@ -87,7 +87,7 @@ public class NokogiriBlockingQueueInputStream extends InputStream
|
|
87
87
|
|
88
88
|
/**
|
89
89
|
* This method shouldn't be called unless the parser has finished parsing or
|
90
|
-
* threw an exception while doing so, otherwise, there'll be the
|
90
|
+
* threw an exception while doing so, otherwise, there'll be the potential
|
91
91
|
* that the read method will block indefinitely.
|
92
92
|
*/
|
93
93
|
@Override
|
@@ -66,7 +66,7 @@ public class NokogiriDomParser extends DOMParser
|
|
66
66
|
{
|
67
67
|
dtd.reset();
|
68
68
|
if (xInclude) {
|
69
|
-
setEntityResolver(new
|
69
|
+
setEntityResolver(new NokogiriXIncludeEntityResolver(source));
|
70
70
|
}
|
71
71
|
super.parse(source);
|
72
72
|
Document doc = getDocument();
|
@@ -77,11 +77,11 @@ public class NokogiriDomParser extends DOMParser
|
|
77
77
|
doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null);
|
78
78
|
}
|
79
79
|
|
80
|
-
private static class
|
80
|
+
private static class NokogiriXIncludeEntityResolver implements org.xml.sax.EntityResolver
|
81
81
|
{
|
82
82
|
InputSource source;
|
83
83
|
private
|
84
|
-
|
84
|
+
NokogiriXIncludeEntityResolver(InputSource source)
|
85
85
|
{
|
86
86
|
this.source = source;
|
87
87
|
}
|
@@ -42,27 +42,26 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
42
42
|
|
43
43
|
private Locator locator;
|
44
44
|
private boolean needEmptyAttrCheck;
|
45
|
+
private boolean replaceEntities;
|
46
|
+
private Set<String> entities = new HashSet<String>();
|
45
47
|
|
46
48
|
public
|
47
|
-
NokogiriHandler(Ruby runtime,
|
49
|
+
NokogiriHandler(Ruby runtime,
|
50
|
+
IRubyObject object,
|
51
|
+
NokogiriErrorHandler errorHandler,
|
52
|
+
boolean replaceEntities)
|
48
53
|
{
|
49
54
|
assert object != null;
|
50
55
|
this.runtime = runtime;
|
51
56
|
this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute");
|
52
57
|
this.object = object;
|
53
58
|
this.errorHandler = errorHandler;
|
59
|
+
this.replaceEntities = replaceEntities;
|
54
60
|
charactersBuilder = new StringBuilder();
|
55
61
|
String objectName = object.getMetaClass().getName();
|
56
62
|
if ("Nokogiri::HTML4::SAX::Parser".equals(objectName)) { needEmptyAttrCheck = true; }
|
57
63
|
}
|
58
64
|
|
59
|
-
@Override
|
60
|
-
public void
|
61
|
-
skippedEntity(String skippedEntity)
|
62
|
-
{
|
63
|
-
call("error", runtime.newString("Entity '" + skippedEntity + "' not defined\n"));
|
64
|
-
}
|
65
|
-
|
66
65
|
@Override
|
67
66
|
public void
|
68
67
|
setDocumentLocator(Locator locator)
|
@@ -88,7 +87,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
88
87
|
public void
|
89
88
|
endDocument()
|
90
89
|
{
|
91
|
-
|
90
|
+
flushCharacters();
|
92
91
|
call("end_document");
|
93
92
|
}
|
94
93
|
|
@@ -161,12 +160,12 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
161
160
|
}
|
162
161
|
|
163
162
|
if (localName == null || localName.isEmpty()) { localName = getLocalPart(qName); }
|
164
|
-
|
163
|
+
flushCharacters();
|
165
164
|
call("start_element_namespace",
|
166
165
|
stringOrNil(runtime, localName),
|
167
166
|
rubyAttr,
|
168
167
|
stringOrNil(runtime, getPrefix(qName)),
|
169
|
-
stringOrNil(runtime, uri),
|
168
|
+
uri.length() > 0 ? stringOrNil(runtime, uri) : runtime.getNil(),
|
170
169
|
rubyNSAttr);
|
171
170
|
}
|
172
171
|
|
@@ -204,7 +203,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
204
203
|
public void
|
205
204
|
endElement(String uri, String localName, String qName)
|
206
205
|
{
|
207
|
-
|
206
|
+
flushCharacters();
|
208
207
|
call("end_element_namespace",
|
209
208
|
stringOrNil(runtime, localName),
|
210
209
|
stringOrNil(runtime, getPrefix(qName)),
|
@@ -218,11 +217,56 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
218
217
|
charactersBuilder.append(ch, start, length);
|
219
218
|
}
|
220
219
|
|
220
|
+
@Override
|
221
|
+
public void
|
222
|
+
externalEntityDecl(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
|
223
|
+
throws SAXException
|
224
|
+
{
|
225
|
+
entities.add(name);
|
226
|
+
}
|
227
|
+
|
228
|
+
@Override
|
229
|
+
public void
|
230
|
+
internalEntityDecl(java.lang.String name, java.lang.String value)
|
231
|
+
throws SAXException
|
232
|
+
{
|
233
|
+
entities.add(name);
|
234
|
+
}
|
235
|
+
|
236
|
+
@Override
|
237
|
+
public void
|
238
|
+
skippedEntity(String name)
|
239
|
+
{
|
240
|
+
call("error", runtime.newString("Entity '" + name + "' not defined\n"));
|
241
|
+
if (!replaceEntities) {
|
242
|
+
call("reference", runtime.newString(name), runtime.getNil());
|
243
|
+
}
|
244
|
+
}
|
245
|
+
|
246
|
+
@Override
|
247
|
+
public void
|
248
|
+
startEntity(String name)
|
249
|
+
{
|
250
|
+
flushCharacters();
|
251
|
+
}
|
252
|
+
|
253
|
+
@Override
|
254
|
+
public void
|
255
|
+
endEntity(String name)
|
256
|
+
{
|
257
|
+
IRubyObject content = charactersBuilder.length() > 0 ? runtime.newString(charactersBuilder.toString()) :
|
258
|
+
runtime.getNil();
|
259
|
+
if (!replaceEntities && entities.contains(name)) {
|
260
|
+
call("reference", runtime.newString(name), content);
|
261
|
+
}
|
262
|
+
flushCharacters();
|
263
|
+
}
|
264
|
+
|
221
265
|
@Override
|
222
266
|
public void
|
223
267
|
comment(char[] ch, int start, int length)
|
224
268
|
{
|
225
|
-
|
269
|
+
flushCharacters();
|
226
270
|
call("comment", runtime.newString(new String(ch, start, length)));
|
227
271
|
}
|
228
272
|
|
@@ -230,7 +274,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
230
274
|
public void
|
231
275
|
startCDATA()
|
232
276
|
{
|
233
|
-
|
277
|
+
flushCharacters();
|
234
278
|
}
|
235
279
|
|
236
280
|
@Override
|
@@ -329,7 +373,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
|
|
329
373
|
}
|
330
374
|
|
331
375
|
protected void
|
332
|
-
|
376
|
+
flushCharacters()
|
333
377
|
{
|
334
378
|
if (charactersBuilder.length() > 0) {
|
335
379
|
call("characters", runtime.newString(charactersBuilder.toString()));
|
@@ -724,7 +724,7 @@ public class NokogiriHelpers
|
|
724
724
|
convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
|
725
725
|
{
|
726
726
|
if (!(doc instanceof Html4Document)) { return str; }
|
727
|
-
String parsed_encoding = ((Html4Document)doc).
|
727
|
+
String parsed_encoding = ((Html4Document)doc).getParsedEncoding();
|
728
728
|
if (parsed_encoding == null) { return str; }
|
729
729
|
String ruby_encoding = rubyStringToString(doc.getEncoding());
|
730
730
|
if (ruby_encoding == null) { return str; }
|