nokogiri 1.7.2-java → 1.8.0-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.cross_rubies +4 -4
- data/.travis.yml +43 -24
- data/CHANGELOG.md +54 -6
- data/Gemfile +8 -7
- data/Gemfile-libxml-ruby +3 -0
- data/LICENSE-DEPENDENCIES.md +1612 -0
- data/{LICENSE.txt → LICENSE.md} +1 -1
- data/Manifest.txt +5 -8
- data/README.md +8 -5
- data/Rakefile +15 -31
- data/appveyor.yml +2 -0
- data/dependencies.yml +12 -7
- data/ext/java/nokogiri/HtmlDocument.java +2 -2
- data/ext/java/nokogiri/HtmlSaxParserContext.java +20 -21
- data/ext/java/nokogiri/HtmlSaxPushParser.java +6 -10
- data/ext/java/nokogiri/NokogiriService.java +10 -31
- data/ext/java/nokogiri/XmlAttr.java +1 -26
- data/ext/java/nokogiri/XmlCdata.java +0 -1
- data/ext/java/nokogiri/XmlComment.java +1 -1
- data/ext/java/nokogiri/XmlDocument.java +4 -5
- data/ext/java/nokogiri/XmlDocumentFragment.java +29 -21
- data/ext/java/nokogiri/XmlDtd.java +1 -1
- data/ext/java/nokogiri/XmlElement.java +9 -10
- data/ext/java/nokogiri/XmlEntityDecl.java +4 -5
- data/ext/java/nokogiri/XmlNode.java +105 -103
- data/ext/java/nokogiri/XmlNodeSet.java +64 -76
- data/ext/java/nokogiri/XmlReader.java +48 -48
- data/ext/java/nokogiri/XmlRelaxng.java +1 -1
- data/ext/java/nokogiri/XmlSaxPushParser.java +37 -17
- data/ext/java/nokogiri/XmlSchema.java +7 -5
- data/ext/java/nokogiri/XmlSyntaxError.java +47 -35
- data/ext/java/nokogiri/XmlXpathContext.java +160 -132
- data/ext/java/nokogiri/XsltStylesheet.java +15 -24
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +19 -23
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +11 -13
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +5 -21
- data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +105 -142
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +16 -26
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +32 -50
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +10 -13
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +3 -10
- data/ext/java/nokogiri/internals/ParserContext.java +4 -8
- data/ext/java/nokogiri/internals/ReaderNode.java +53 -93
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +77 -89
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +6 -9
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +167 -0
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +17 -6
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +1 -1
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +28 -28
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +3 -4
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +2 -2
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +5 -5
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +2 -2
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +1 -1
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +2 -2
- data/ext/java/org/apache/xml/dtm/ref/dom2dtm/DOM2DTMExt.java +1749 -0
- data/ext/nokogiri/extconf.rb +12 -17
- data/ext/nokogiri/nokogiri.h +0 -10
- data/ext/nokogiri/xml_attr.c +12 -8
- data/ext/nokogiri/xml_node.c +17 -14
- data/ext/nokogiri/xml_sax_push_parser.c +56 -12
- data/lib/nokogiri/html/sax/parser.rb +10 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +5 -4
- data/lib/nokogiri/xml/document.rb +9 -9
- data/lib/nokogiri/xml/node.rb +7 -7
- data/lib/nokogiri/xml/node_set.rb +12 -7
- data/lib/nokogiri/xml/sax/parser.rb +6 -7
- data/lib/nokogiri/xml/searchable.rb +34 -25
- data/lib/nokogiri/xml/syntax_error.rb +24 -1
- data/test/decorators/test_slop.rb +4 -1
- data/test/helper.rb +10 -0
- data/test/html/sax/test_parser.rb +27 -0
- data/test/html/test_document.rb +12 -1
- data/test/html/test_document_encoding.rb +1 -3
- data/test/html/test_document_fragment.rb +3 -0
- data/test/xml/sax/test_push_parser.rb +48 -0
- data/test/xml/test_attr.rb +7 -0
- data/test/xml/test_document.rb +1 -1
- data/test/xml/test_document_fragment.rb +27 -0
- data/test/xml/test_entity_reference.rb +2 -2
- data/test/xml/test_node.rb +12 -15
- data/test/xml/test_node_reparenting.rb +14 -0
- data/test/xml/test_node_set.rb +8 -6
- data/test/xml/test_reader.rb +19 -0
- data/test/xml/test_syntax_error.rb +21 -15
- data/test/xml/test_unparented_node.rb +54 -11
- data/test/xml/test_xpath.rb +23 -6
- metadata +32 -20
- data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +0 -73
- data/ext/java/nokogiri/internals/XsltExtensionFunction.java +0 -72
- data/suppressions/nokogiri_ree-1.8.7.358.supp +0 -61
- data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
- data/suppressions/nokogiri_ruby-1.9.2.320.supp +0 -28
- data/suppressions/nokogiri_ruby-1.9.3.327.supp +0 -28
- data/test_all +0 -105
@@ -32,21 +32,14 @@
|
|
32
32
|
|
33
33
|
package nokogiri.internals;
|
34
34
|
|
35
|
-
import static nokogiri.internals.NokogiriHelpers.
|
35
|
+
import static nokogiri.internals.NokogiriHelpers.canonicalizeWhitespace;
|
36
36
|
import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
|
37
37
|
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
38
38
|
import static nokogiri.internals.NokogiriHelpers.isWhitespaceText;
|
39
39
|
|
40
40
|
import java.nio.charset.Charset;
|
41
41
|
import java.nio.charset.CharsetEncoder;
|
42
|
-
import java.util
|
43
|
-
import java.util.ArrayList;
|
44
|
-
import java.util.Arrays;
|
45
|
-
import java.util.Comparator;
|
46
|
-
import java.util.Deque;
|
47
|
-
import java.util.Iterator;
|
48
|
-
import java.util.List;
|
49
|
-
import java.util.Stack;
|
42
|
+
import java.util.*;
|
50
43
|
import java.util.regex.Matcher;
|
51
44
|
import java.util.regex.Pattern;
|
52
45
|
|
@@ -74,10 +67,10 @@ import org.w3c.dom.Text;
|
|
74
67
|
*/
|
75
68
|
public class SaveContextVisitor {
|
76
69
|
|
77
|
-
private final
|
70
|
+
private final StringBuilder buffer;
|
78
71
|
private final Stack<String> indentation;
|
79
72
|
private String encoding;
|
80
|
-
private final
|
73
|
+
private final CharSequence indentString;
|
81
74
|
private boolean format;
|
82
75
|
private final boolean noDecl;
|
83
76
|
private final boolean noEmpty;
|
@@ -94,7 +87,8 @@ public class SaveContextVisitor {
|
|
94
87
|
private final List<Node> c14nNodeList;
|
95
88
|
private final Deque<Attr[]> c14nNamespaceStack;
|
96
89
|
private final Deque<Attr[]> c14nAttrStack;
|
97
|
-
private List<String> c14nExclusiveInclusivePrefixes = null;
|
90
|
+
//private List<String> c14nExclusiveInclusivePrefixes = null;
|
91
|
+
|
98
92
|
/*
|
99
93
|
* U can't touch this.
|
100
94
|
* http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
|
@@ -117,8 +111,8 @@ public class SaveContextVisitor {
|
|
117
111
|
public static final int SUBSETS = 8;
|
118
112
|
public static final int EXCLUSIVE = 16;
|
119
113
|
|
120
|
-
public SaveContextVisitor(int options,
|
121
|
-
buffer = new
|
114
|
+
public SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment, int canonicalOpts) {
|
115
|
+
buffer = new StringBuilder();
|
122
116
|
this.encoding = encoding;
|
123
117
|
indentation = new Stack<String>(); indentation.push("");
|
124
118
|
this.htmlDoc = htmlDoc;
|
@@ -150,9 +144,11 @@ public class SaveContextVisitor {
|
|
150
144
|
|
151
145
|
@Override
|
152
146
|
public String toString() {
|
153
|
-
return (
|
147
|
+
return buffer.toString();
|
154
148
|
}
|
155
149
|
|
150
|
+
public StringBuilder getInternalBuffer() { return buffer; }
|
151
|
+
|
156
152
|
public void setHtmlDoc(boolean htmlDoc) {
|
157
153
|
this.htmlDoc = htmlDoc;
|
158
154
|
}
|
@@ -161,14 +157,6 @@ public class SaveContextVisitor {
|
|
161
157
|
this.encoding = encoding;
|
162
158
|
}
|
163
159
|
|
164
|
-
public List<Node> getC14nNodeList() {
|
165
|
-
return c14nNodeList;
|
166
|
-
}
|
167
|
-
|
168
|
-
public void setC14nExclusiveInclusivePrefixes(List<String> prefixes) {
|
169
|
-
c14nExclusiveInclusivePrefixes = prefixes;
|
170
|
-
}
|
171
|
-
|
172
160
|
public boolean enter(Node node) {
|
173
161
|
if (node instanceof Document) {
|
174
162
|
return enter((Document)node);
|
@@ -265,16 +253,16 @@ public class SaveContextVisitor {
|
|
265
253
|
String name = attr.getName();
|
266
254
|
buffer.append(name);
|
267
255
|
if (!asHtml || !isHtmlBooleanAttr(name)) {
|
268
|
-
buffer.append(
|
269
|
-
buffer.append("
|
256
|
+
buffer.append('=');
|
257
|
+
buffer.append('"');
|
270
258
|
String value = replaceCharsetIfNecessary(attr);
|
271
259
|
buffer.append(serializeAttrTextContent(value, htmlDoc));
|
272
|
-
buffer.append("
|
260
|
+
buffer.append('"');
|
273
261
|
}
|
274
262
|
return true;
|
275
263
|
}
|
276
264
|
|
277
|
-
private static Pattern
|
265
|
+
private static final Pattern CHARSET =
|
278
266
|
Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE);
|
279
267
|
|
280
268
|
private String replaceCharsetIfNecessary(Attr attr) {
|
@@ -282,47 +270,47 @@ public class SaveContextVisitor {
|
|
282
270
|
if (encoding == null) return value; // unable to replace in any case
|
283
271
|
if (!"content".equals(attr.getName().toLowerCase())) return value; // must be content attr
|
284
272
|
if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) return value;
|
285
|
-
Matcher m =
|
273
|
+
Matcher m = CHARSET.matcher(value);
|
286
274
|
if (!m.find()) return value;
|
287
275
|
if (value.contains(encoding)) return value; // no need to replace
|
288
276
|
return value.replace(m.group(), "charset=" + encoding);
|
289
277
|
}
|
290
278
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
279
|
+
static final Set<String> HTML_BOOLEAN_ATTRS;
|
280
|
+
static {
|
281
|
+
final String[] _HTML_BOOLEAN_ATTRS = {
|
282
|
+
"checked", "compact", "declare", "defer", "disabled", "ismap",
|
283
|
+
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
|
284
|
+
"selected"
|
285
|
+
};
|
286
|
+
HTML_BOOLEAN_ATTRS = new HashSet<String>(Arrays.asList(_HTML_BOOLEAN_ATTRS));
|
287
|
+
}
|
296
288
|
|
297
|
-
private boolean isHtmlBooleanAttr(String name) {
|
298
|
-
|
299
|
-
if (s.equals(name)) return true;
|
300
|
-
}
|
301
|
-
return false;
|
289
|
+
private static boolean isHtmlBooleanAttr(String name) {
|
290
|
+
return HTML_BOOLEAN_ATTRS.contains(name);
|
302
291
|
}
|
303
292
|
|
304
|
-
private
|
305
|
-
if (
|
293
|
+
private static CharSequence serializeAttrTextContent(String str, boolean htmlDoc) {
|
294
|
+
if (str == null || str.length() == 0) return "";
|
306
295
|
|
307
|
-
|
308
|
-
StringBuffer buffer = new StringBuffer(c.length);
|
296
|
+
StringBuilder buffer = new StringBuilder(str.length() + 16);
|
309
297
|
|
310
|
-
for(int i = 0; i <
|
311
|
-
switch(c
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
298
|
+
for (int i = 0; i < str.length(); i++) {
|
299
|
+
char c; switch (c = str.charAt(i)) {
|
300
|
+
case '\n': buffer.append(" "); break;
|
301
|
+
case '\r': buffer.append(" "); break;
|
302
|
+
case '\t': buffer.append("	"); break;
|
303
|
+
case '"': if (htmlDoc) buffer.append("%22");
|
316
304
|
else buffer.append(""");
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
305
|
+
break;
|
306
|
+
case '<': buffer.append("<"); break;
|
307
|
+
case '>': buffer.append(">"); break;
|
308
|
+
case '&': buffer.append("&"); break;
|
309
|
+
default: buffer.append(c);
|
322
310
|
}
|
323
311
|
}
|
324
312
|
|
325
|
-
return buffer
|
313
|
+
return buffer;
|
326
314
|
}
|
327
315
|
|
328
316
|
public void leave(Attr attr) {
|
@@ -385,19 +373,19 @@ public class SaveContextVisitor {
|
|
385
373
|
String sysId = docType.getSystemId();
|
386
374
|
String internalSubset = docType.getInternalSubset();
|
387
375
|
if (docType.getPreviousSibling() != null) {
|
388
|
-
buffer.append(
|
376
|
+
buffer.append('\n');
|
389
377
|
}
|
390
|
-
buffer.append("<!DOCTYPE "
|
378
|
+
buffer.append("<!DOCTYPE ").append(name).append(' ');
|
391
379
|
if (pubId != null) {
|
392
|
-
buffer.append("PUBLIC \""
|
393
|
-
if (sysId != null) buffer.append(" \""
|
380
|
+
buffer.append("PUBLIC \"").append(pubId).append('"');
|
381
|
+
if (sysId != null) buffer.append(" \"").append(sysId).append('"');
|
394
382
|
} else if (sysId != null) {
|
395
|
-
buffer.append("SYSTEM \""
|
383
|
+
buffer.append("SYSTEM \"").append(sysId).append('"');
|
396
384
|
}
|
397
385
|
if (internalSubset != null) {
|
398
|
-
buffer.append(
|
386
|
+
buffer.append(' ').append('[');
|
399
387
|
buffer.append(internalSubset);
|
400
|
-
buffer.append(
|
388
|
+
buffer.append(']');
|
401
389
|
}
|
402
390
|
buffer.append(">\n");
|
403
391
|
return true;
|
@@ -420,36 +408,36 @@ public class SaveContextVisitor {
|
|
420
408
|
indentation.push(current + indentString);
|
421
409
|
}
|
422
410
|
String name = element.getTagName();
|
423
|
-
buffer.append(
|
411
|
+
buffer.append('<').append(name);
|
424
412
|
Attr[] attrs = getAttrsAndNamespaces(element);
|
425
413
|
for (Attr attr : attrs) {
|
426
414
|
if (attr.getSpecified()) {
|
427
|
-
buffer.append(
|
415
|
+
buffer.append(' ');
|
428
416
|
enter(attr);
|
429
417
|
leave(attr);
|
430
418
|
}
|
431
419
|
}
|
432
420
|
if (element.hasChildNodes()) {
|
433
|
-
buffer.append(
|
434
|
-
if (needBreakInOpening(element)) buffer.append(
|
421
|
+
buffer.append('>');
|
422
|
+
if (needBreakInOpening(element)) buffer.append('\n');
|
435
423
|
return true;
|
436
424
|
}
|
437
425
|
// no child
|
438
426
|
if (asHtml) {
|
439
|
-
buffer.append(
|
427
|
+
buffer.append('>');
|
440
428
|
} else if (asXml && noEmpty) {
|
441
|
-
buffer.append(
|
429
|
+
buffer.append('>');
|
442
430
|
} else if (asXhtml) {
|
443
431
|
if (isEmpty(name)) {
|
444
432
|
buffer.append(" />"); // see http://www.w3.org/TR/xhtml1/#C_2
|
445
433
|
} else {
|
446
|
-
buffer.append(
|
434
|
+
buffer.append('>');
|
447
435
|
}
|
448
436
|
} else {
|
449
437
|
buffer.append("/>");
|
450
438
|
}
|
451
439
|
if (needBreakInOpening(element)) {
|
452
|
-
buffer.append(
|
440
|
+
buffer.append('\n');
|
453
441
|
}
|
454
442
|
return true;
|
455
443
|
}
|
@@ -619,21 +607,21 @@ public class SaveContextVisitor {
|
|
619
607
|
} else if (asBuilder) {
|
620
608
|
if (!containsText(element)) indentation.pop();
|
621
609
|
}
|
622
|
-
buffer.append("</"
|
610
|
+
buffer.append("</").append(name).append('>');
|
623
611
|
if (needBreakInClosing(element)) {
|
624
|
-
buffer.append(
|
612
|
+
buffer.append('\n');
|
625
613
|
}
|
626
614
|
return;
|
627
615
|
}
|
628
616
|
// no child, but HTML might need a closing tag.
|
629
617
|
if (asHtml || noEmpty) {
|
630
618
|
if (!isEmpty(name) && noEmpty) {
|
631
|
-
buffer.append("</"
|
619
|
+
buffer.append("</").append(name).append('>');
|
632
620
|
}
|
633
621
|
}
|
634
622
|
if (needBreakInClosing(element)) {
|
635
623
|
if (!containsText(element)) indentation.pop();
|
636
|
-
buffer.append(
|
624
|
+
buffer.append('\n');
|
637
625
|
}
|
638
626
|
}
|
639
627
|
|
@@ -686,7 +674,7 @@ public class SaveContextVisitor {
|
|
686
674
|
}
|
687
675
|
|
688
676
|
public boolean enter(EntityReference entityRef) {
|
689
|
-
buffer.append(
|
677
|
+
buffer.append('&').append(entityRef.getNodeName()).append(';');
|
690
678
|
return true;
|
691
679
|
}
|
692
680
|
public void leave(EntityReference entityRef) {
|
@@ -738,20 +726,19 @@ public class SaveContextVisitor {
|
|
738
726
|
}
|
739
727
|
|
740
728
|
private boolean isHtmlScript(Text text) {
|
741
|
-
|
729
|
+
return htmlDoc && text.getParentNode().getNodeName().equals("script");
|
742
730
|
}
|
743
731
|
|
744
732
|
private boolean isHtmlStyle(Text text) {
|
745
|
-
|
733
|
+
return htmlDoc && text.getParentNode().getNodeName().equals("style");
|
746
734
|
}
|
747
735
|
|
748
|
-
private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
|
749
736
|
public boolean enter(Text text) {
|
750
|
-
|
737
|
+
CharSequence textContent = text.getNodeValue();
|
751
738
|
if (canonical) {
|
752
739
|
c14nNodeList.add(text);
|
753
740
|
if (isWhitespaceText(textContent)) {
|
754
|
-
buffer.append(
|
741
|
+
buffer.append(canonicalizeWhitespace(textContent));
|
755
742
|
return true;
|
756
743
|
}
|
757
744
|
}
|
@@ -765,25 +752,26 @@ public class SaveContextVisitor {
|
|
765
752
|
return true;
|
766
753
|
}
|
767
754
|
|
768
|
-
private
|
769
|
-
if (encoding == null)
|
770
|
-
|
755
|
+
private CharSequence encodeStringToHtmlEntity(CharSequence text) {
|
756
|
+
if (encoding == null) return text;
|
757
|
+
|
771
758
|
CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
|
772
|
-
|
759
|
+
StringBuilder sb = new StringBuilder(text.length() + 16);
|
773
760
|
// make sure we can handle code points that are higher than 2 bytes
|
774
|
-
for (int i = 0; i < text.length();) {
|
775
|
-
int code =
|
761
|
+
for ( int i = 0; i < text.length(); ) {
|
762
|
+
int code = Character.codePointAt(text, i);
|
776
763
|
// TODO not sure about bigger offset then 2 ?!
|
777
764
|
int offset = code > 65535 ? 2 : 1;
|
778
|
-
|
765
|
+
CharSequence substr = text.subSequence(i, i + offset);
|
766
|
+
boolean canEncode = encoder.canEncode(substr);
|
779
767
|
if (canEncode) {
|
780
|
-
sb.append(
|
768
|
+
sb.append(substr);
|
781
769
|
}
|
782
770
|
else {
|
783
|
-
sb.append("&#x"
|
771
|
+
sb.append("&#x").append(Integer.toHexString(code)).append(';');
|
784
772
|
}
|
785
773
|
i += offset;
|
786
774
|
}
|
787
|
-
return
|
775
|
+
return sb;
|
788
776
|
}
|
789
777
|
}
|
@@ -48,9 +48,10 @@ import org.xml.sax.SAXParseException;
|
|
48
48
|
* @author sergio
|
49
49
|
* @author Yoko Harada <yokolet@gmail.com>
|
50
50
|
*/
|
51
|
-
public class SchemaErrorHandler implements ErrorHandler{
|
52
|
-
|
53
|
-
private Ruby runtime;
|
51
|
+
public class SchemaErrorHandler implements ErrorHandler {
|
52
|
+
|
53
|
+
private final Ruby runtime;
|
54
|
+
final RubyArray errors;
|
54
55
|
|
55
56
|
public SchemaErrorHandler(Ruby ruby, RubyArray array) {
|
56
57
|
this.runtime = ruby;
|
@@ -58,15 +59,11 @@ public class SchemaErrorHandler implements ErrorHandler{
|
|
58
59
|
}
|
59
60
|
|
60
61
|
public void warning(SAXParseException ex) throws SAXException {
|
61
|
-
|
62
|
-
xmlSyntaxError.setException(ex);
|
63
|
-
this.errors.append(xmlSyntaxError);
|
62
|
+
errors.append( XmlSyntaxError.createWarning(runtime, ex) );
|
64
63
|
}
|
65
64
|
|
66
65
|
public void error(SAXParseException ex) throws SAXException {
|
67
|
-
|
68
|
-
xmlSyntaxError.setException(ex);
|
69
|
-
this.errors.append(xmlSyntaxError);
|
66
|
+
errors.append( XmlSyntaxError.createError(runtime, ex) );
|
70
67
|
}
|
71
68
|
|
72
69
|
public void fatalError(SAXParseException ex) throws SAXException {
|
@@ -0,0 +1,167 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2017 [Karol Bucek](http://kares.org/)
|
3
|
+
*
|
4
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
5
|
+
* or more contributor license agreements. See the NOTICE file
|
6
|
+
* distributed with this work for additional information
|
7
|
+
* regarding copyright ownership. The ASF licenses this file
|
8
|
+
* to you under the Apache License, Version 2.0 (the "License");
|
9
|
+
* you may not use this file except in compliance with the License.
|
10
|
+
* You may obtain a copy of the License at
|
11
|
+
*
|
12
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
13
|
+
*
|
14
|
+
* Unless required by applicable law or agreed to in writing, software
|
15
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
16
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
17
|
+
* See the License for the specific language governing permissions and
|
18
|
+
* limitations under the License.
|
19
|
+
*/
|
20
|
+
package nokogiri.internals;
|
21
|
+
|
22
|
+
import org.apache.xml.dtm.DTM;
|
23
|
+
import org.apache.xml.dtm.DTMWSFilter;
|
24
|
+
import org.apache.xml.dtm.ref.dom2dtm.DOM2DTMExt;
|
25
|
+
import org.apache.xml.res.XMLErrorResources;
|
26
|
+
import org.apache.xml.res.XMLMessages;
|
27
|
+
import org.w3c.dom.Node;
|
28
|
+
|
29
|
+
import javax.xml.transform.dom.DOMSource;
|
30
|
+
|
31
|
+
/**
|
32
|
+
* @author kares
|
33
|
+
*/
|
34
|
+
public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault {
|
35
|
+
|
36
|
+
/**
|
37
|
+
* Given a W3C DOM node, try and return a DTM handle.
|
38
|
+
* Note: calling this may be non-optimal, and there is no guarantee that
|
39
|
+
* the node will be found in any particular DTM.
|
40
|
+
*
|
41
|
+
* @param node Non-null reference to a DOM node.
|
42
|
+
*
|
43
|
+
* @return a valid DTM handle.
|
44
|
+
*/
|
45
|
+
@Override
|
46
|
+
public /* synchronized */ int getDTMHandleFromNode(org.w3c.dom.Node node) {
|
47
|
+
//if (node == null) // "node must be non-null for getDTMHandleFromNode!");
|
48
|
+
// throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null));
|
49
|
+
assert node != null;
|
50
|
+
|
51
|
+
if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) {
|
52
|
+
return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber();
|
53
|
+
}
|
54
|
+
|
55
|
+
// Find the DOM2DTMs wrapped around this Document (if any)
|
56
|
+
// and check whether they contain the Node in question.
|
57
|
+
//
|
58
|
+
// NOTE that since a DOM2DTM may represent a subtree rather
|
59
|
+
// than a full document, we have to be prepared to check more
|
60
|
+
// than one -- and there is no guarantee that we will find
|
61
|
+
// one that contains ancestors or siblings of the node we're
|
62
|
+
// seeking.
|
63
|
+
//
|
64
|
+
// %REVIEW% We could search for the one which contains this
|
65
|
+
// node at the deepest level, and thus covers the widest
|
66
|
+
// subtree, but that's going to entail additional work
|
67
|
+
// checking more DTMs... and getHandleOfNode is not a
|
68
|
+
// cheap operation in most implementations.
|
69
|
+
//
|
70
|
+
// TODO: %REVIEW% If overflow addressing, we may recheck a DTM
|
71
|
+
// already examined. Ouch. But with the increased number of DTMs,
|
72
|
+
// scanning back to check this is painful.
|
73
|
+
// POSSIBLE SOLUTIONS:
|
74
|
+
// Generate a list of _unique_ DTM objects?
|
75
|
+
// Have each DTM cache last DOM node search?
|
76
|
+
for(int i = 0; i < m_dtms.length; i++) {
|
77
|
+
DTM thisDTM = m_dtms[i];
|
78
|
+
if (thisDTM instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTM) {
|
79
|
+
int handle = ((org.apache.xml.dtm.ref.dom2dtm.DOM2DTM) thisDTM).getHandleOfNode(node);
|
80
|
+
if (handle != DTM.NULL) {
|
81
|
+
return handle;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
// Not found; generate a new DTM.
|
87
|
+
//
|
88
|
+
// %REVIEW% Is this really desirable, or should we return null
|
89
|
+
// and make folks explicitly instantiate from a DOMSource? The
|
90
|
+
// latter is more work but gives the caller the opportunity to
|
91
|
+
// explicitly add the DTM to a DTMManager... and thus to know when
|
92
|
+
// it can be discarded again, which is something we need to pay much
|
93
|
+
// more attention to. (Especially since only DTMs which are assigned
|
94
|
+
// to a manager can use the overflow addressing scheme.)
|
95
|
+
//
|
96
|
+
// %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode
|
97
|
+
// and the DTM wasn't registered with this DTMManager, we will create
|
98
|
+
// a new DTM and _still_ not be able to find the node (since it will
|
99
|
+
// be resynthesized). Another reason to push hard on making all DTMs
|
100
|
+
// be managed DTMs.
|
101
|
+
|
102
|
+
// Since the real root of our tree may be a DocumentFragment, we need to
|
103
|
+
// use getParent to find the root, instead of getOwnerDocument. Otherwise
|
104
|
+
// DOM2DTM#getHandleOfNode will be very unhappy.
|
105
|
+
Node root = node; int rootType = root.getNodeType();
|
106
|
+
Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode();
|
107
|
+
for (; p != null; p = p.getParentNode()) root = p;
|
108
|
+
|
109
|
+
// DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null);
|
110
|
+
DOM2DTMExt dtm = getDTMExt(new DOMSource(root), false, null/*, true, true*/);
|
111
|
+
|
112
|
+
int handle;
|
113
|
+
|
114
|
+
if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode) {
|
115
|
+
// Can't return the same node since it's unique to a specific DTM,
|
116
|
+
// but can return the equivalent node -- find the corresponding
|
117
|
+
// Document Element, then ask it for the xml: namespace decl.
|
118
|
+
handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement());
|
119
|
+
handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName());
|
120
|
+
}
|
121
|
+
else {
|
122
|
+
handle = dtm.getHandleOfNode(node);
|
123
|
+
|
124
|
+
rootType = root.getNodeType();
|
125
|
+
// Is Node actually within the same document? If not, don't search!
|
126
|
+
// This would be easier if m_root was always the Document node, but
|
127
|
+
// we decided to allow wrapping a DTM around a subtree.
|
128
|
+
if((root==node) ||
|
129
|
+
(rootType==Node.DOCUMENT_NODE && root==node.getOwnerDocument()) ||
|
130
|
+
(rootType!=Node.DOCUMENT_NODE && root.getOwnerDocument()==node.getOwnerDocument())
|
131
|
+
)
|
132
|
+
{
|
133
|
+
// If node _is_ in m_root's tree, find its handle
|
134
|
+
//
|
135
|
+
// %OPT% This check may be improved significantly when DOM
|
136
|
+
// Level 3 nodeKey and relative-order tests become
|
137
|
+
// available!
|
138
|
+
for (Node cursor = node; cursor != null;
|
139
|
+
cursor = (cursor.getNodeType()!=Node.ATTRIBUTE_NODE)
|
140
|
+
? cursor.getParentNode()
|
141
|
+
: ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
|
142
|
+
if (cursor==root) {
|
143
|
+
// We know this node; find its handle.
|
144
|
+
return (dtm).getHandleFromNode(node);
|
145
|
+
}
|
146
|
+
} // for ancestors of node
|
147
|
+
} // if node and m_root in same Document
|
148
|
+
}
|
149
|
+
|
150
|
+
if (DTM.NULL == handle)
|
151
|
+
throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE, null)); //"Could not resolve the node to a handle!");
|
152
|
+
|
153
|
+
return handle;
|
154
|
+
}
|
155
|
+
|
156
|
+
private DOM2DTMExt getDTMExt(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/) {
|
157
|
+
int dtmPos = getFirstFreeDTMID();
|
158
|
+
int documentID = dtmPos << IDENT_DTM_NODE_BITS;
|
159
|
+
|
160
|
+
//DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true);
|
161
|
+
DOM2DTMExt dtm = new DOM2DTMExt(this, source, documentID, whiteSpaceFilter, m_xsf, true);
|
162
|
+
|
163
|
+
addDTM(dtm, dtmPos, 0);
|
164
|
+
return dtm;
|
165
|
+
}
|
166
|
+
|
167
|
+
}
|