nokogiri 1.7.2-java → 1.8.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.cross_rubies +4 -4
  3. data/.travis.yml +43 -24
  4. data/CHANGELOG.md +54 -6
  5. data/Gemfile +8 -7
  6. data/Gemfile-libxml-ruby +3 -0
  7. data/LICENSE-DEPENDENCIES.md +1612 -0
  8. data/{LICENSE.txt → LICENSE.md} +1 -1
  9. data/Manifest.txt +5 -8
  10. data/README.md +8 -5
  11. data/Rakefile +15 -31
  12. data/appveyor.yml +2 -0
  13. data/dependencies.yml +12 -7
  14. data/ext/java/nokogiri/HtmlDocument.java +2 -2
  15. data/ext/java/nokogiri/HtmlSaxParserContext.java +20 -21
  16. data/ext/java/nokogiri/HtmlSaxPushParser.java +6 -10
  17. data/ext/java/nokogiri/NokogiriService.java +10 -31
  18. data/ext/java/nokogiri/XmlAttr.java +1 -26
  19. data/ext/java/nokogiri/XmlCdata.java +0 -1
  20. data/ext/java/nokogiri/XmlComment.java +1 -1
  21. data/ext/java/nokogiri/XmlDocument.java +4 -5
  22. data/ext/java/nokogiri/XmlDocumentFragment.java +29 -21
  23. data/ext/java/nokogiri/XmlDtd.java +1 -1
  24. data/ext/java/nokogiri/XmlElement.java +9 -10
  25. data/ext/java/nokogiri/XmlEntityDecl.java +4 -5
  26. data/ext/java/nokogiri/XmlNode.java +105 -103
  27. data/ext/java/nokogiri/XmlNodeSet.java +64 -76
  28. data/ext/java/nokogiri/XmlReader.java +48 -48
  29. data/ext/java/nokogiri/XmlRelaxng.java +1 -1
  30. data/ext/java/nokogiri/XmlSaxPushParser.java +37 -17
  31. data/ext/java/nokogiri/XmlSchema.java +7 -5
  32. data/ext/java/nokogiri/XmlSyntaxError.java +47 -35
  33. data/ext/java/nokogiri/XmlXpathContext.java +160 -132
  34. data/ext/java/nokogiri/XsltStylesheet.java +15 -24
  35. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +19 -23
  36. data/ext/java/nokogiri/internals/NokogiriDomParser.java +1 -1
  37. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +1 -1
  38. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +11 -13
  39. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +5 -21
  40. data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
  41. data/ext/java/nokogiri/internals/NokogiriHelpers.java +105 -142
  42. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +16 -26
  43. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +32 -50
  44. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +10 -13
  45. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +3 -10
  46. data/ext/java/nokogiri/internals/ParserContext.java +4 -8
  47. data/ext/java/nokogiri/internals/ReaderNode.java +53 -93
  48. data/ext/java/nokogiri/internals/SaveContextVisitor.java +77 -89
  49. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +6 -9
  50. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +167 -0
  51. data/ext/java/nokogiri/internals/XmlDomParserContext.java +17 -6
  52. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +1 -1
  53. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +28 -28
  54. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +3 -4
  55. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +2 -2
  56. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -10
  57. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +5 -5
  58. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +2 -2
  59. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +1 -1
  60. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +2 -2
  61. data/ext/java/org/apache/xml/dtm/ref/dom2dtm/DOM2DTMExt.java +1749 -0
  62. data/ext/nokogiri/extconf.rb +12 -17
  63. data/ext/nokogiri/nokogiri.h +0 -10
  64. data/ext/nokogiri/xml_attr.c +12 -8
  65. data/ext/nokogiri/xml_node.c +17 -14
  66. data/ext/nokogiri/xml_sax_push_parser.c +56 -12
  67. data/lib/nokogiri/html/sax/parser.rb +10 -0
  68. data/lib/nokogiri/nokogiri.jar +0 -0
  69. data/lib/nokogiri/version.rb +5 -4
  70. data/lib/nokogiri/xml/document.rb +9 -9
  71. data/lib/nokogiri/xml/node.rb +7 -7
  72. data/lib/nokogiri/xml/node_set.rb +12 -7
  73. data/lib/nokogiri/xml/sax/parser.rb +6 -7
  74. data/lib/nokogiri/xml/searchable.rb +34 -25
  75. data/lib/nokogiri/xml/syntax_error.rb +24 -1
  76. data/test/decorators/test_slop.rb +4 -1
  77. data/test/helper.rb +10 -0
  78. data/test/html/sax/test_parser.rb +27 -0
  79. data/test/html/test_document.rb +12 -1
  80. data/test/html/test_document_encoding.rb +1 -3
  81. data/test/html/test_document_fragment.rb +3 -0
  82. data/test/xml/sax/test_push_parser.rb +48 -0
  83. data/test/xml/test_attr.rb +7 -0
  84. data/test/xml/test_document.rb +1 -1
  85. data/test/xml/test_document_fragment.rb +27 -0
  86. data/test/xml/test_entity_reference.rb +2 -2
  87. data/test/xml/test_node.rb +12 -15
  88. data/test/xml/test_node_reparenting.rb +14 -0
  89. data/test/xml/test_node_set.rb +8 -6
  90. data/test/xml/test_reader.rb +19 -0
  91. data/test/xml/test_syntax_error.rb +21 -15
  92. data/test/xml/test_unparented_node.rb +54 -11
  93. data/test/xml/test_xpath.rb +23 -6
  94. metadata +32 -20
  95. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +0 -73
  96. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +0 -72
  97. data/suppressions/nokogiri_ree-1.8.7.358.supp +0 -61
  98. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  99. data/suppressions/nokogiri_ruby-1.9.2.320.supp +0 -28
  100. data/suppressions/nokogiri_ruby-1.9.3.327.supp +0 -28
  101. data/test_all +0 -105
@@ -32,21 +32,14 @@
32
32
 
33
33
  package nokogiri.internals;
34
34
 
35
- import static nokogiri.internals.NokogiriHelpers.canonicalizeWhitespce;
35
+ import static nokogiri.internals.NokogiriHelpers.canonicalizeWhitespace;
36
36
  import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
37
37
  import static nokogiri.internals.NokogiriHelpers.isNamespace;
38
38
  import static nokogiri.internals.NokogiriHelpers.isWhitespaceText;
39
39
 
40
40
  import java.nio.charset.Charset;
41
41
  import java.nio.charset.CharsetEncoder;
42
- import java.util.ArrayDeque;
43
- import java.util.ArrayList;
44
- import java.util.Arrays;
45
- import java.util.Comparator;
46
- import java.util.Deque;
47
- import java.util.Iterator;
48
- import java.util.List;
49
- import java.util.Stack;
42
+ import java.util.*;
50
43
  import java.util.regex.Matcher;
51
44
  import java.util.regex.Pattern;
52
45
 
@@ -74,10 +67,10 @@ import org.w3c.dom.Text;
74
67
  */
75
68
  public class SaveContextVisitor {
76
69
 
77
- private final StringBuffer buffer;
70
+ private final StringBuilder buffer;
78
71
  private final Stack<String> indentation;
79
72
  private String encoding;
80
- private final String indentString;
73
+ private final CharSequence indentString;
81
74
  private boolean format;
82
75
  private final boolean noDecl;
83
76
  private final boolean noEmpty;
@@ -94,7 +87,8 @@ public class SaveContextVisitor {
94
87
  private final List<Node> c14nNodeList;
95
88
  private final Deque<Attr[]> c14nNamespaceStack;
96
89
  private final Deque<Attr[]> c14nAttrStack;
97
- private List<String> c14nExclusiveInclusivePrefixes = null;
90
+ //private List<String> c14nExclusiveInclusivePrefixes = null;
91
+
98
92
  /*
99
93
  * U can't touch this.
100
94
  * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
@@ -117,8 +111,8 @@ public class SaveContextVisitor {
117
111
  public static final int SUBSETS = 8;
118
112
  public static final int EXCLUSIVE = 16;
119
113
 
120
- public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment, int canonicalOpts) {
121
- buffer = new StringBuffer();
114
+ public SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment, int canonicalOpts) {
115
+ buffer = new StringBuilder();
122
116
  this.encoding = encoding;
123
117
  indentation = new Stack<String>(); indentation.push("");
124
118
  this.htmlDoc = htmlDoc;
@@ -150,9 +144,11 @@ public class SaveContextVisitor {
150
144
 
151
145
  @Override
152
146
  public String toString() {
153
- return (new String(buffer));
147
+ return buffer.toString();
154
148
  }
155
149
 
150
+ public StringBuilder getInternalBuffer() { return buffer; }
151
+
156
152
  public void setHtmlDoc(boolean htmlDoc) {
157
153
  this.htmlDoc = htmlDoc;
158
154
  }
@@ -161,14 +157,6 @@ public class SaveContextVisitor {
161
157
  this.encoding = encoding;
162
158
  }
163
159
 
164
- public List<Node> getC14nNodeList() {
165
- return c14nNodeList;
166
- }
167
-
168
- public void setC14nExclusiveInclusivePrefixes(List<String> prefixes) {
169
- c14nExclusiveInclusivePrefixes = prefixes;
170
- }
171
-
172
160
  public boolean enter(Node node) {
173
161
  if (node instanceof Document) {
174
162
  return enter((Document)node);
@@ -265,16 +253,16 @@ public class SaveContextVisitor {
265
253
  String name = attr.getName();
266
254
  buffer.append(name);
267
255
  if (!asHtml || !isHtmlBooleanAttr(name)) {
268
- buffer.append("=");
269
- buffer.append("\"");
256
+ buffer.append('=');
257
+ buffer.append('"');
270
258
  String value = replaceCharsetIfNecessary(attr);
271
259
  buffer.append(serializeAttrTextContent(value, htmlDoc));
272
- buffer.append("\"");
260
+ buffer.append('"');
273
261
  }
274
262
  return true;
275
263
  }
276
264
 
277
- private static Pattern p =
265
+ private static final Pattern CHARSET =
278
266
  Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE);
279
267
 
280
268
  private String replaceCharsetIfNecessary(Attr attr) {
@@ -282,47 +270,47 @@ public class SaveContextVisitor {
282
270
  if (encoding == null) return value; // unable to replace in any case
283
271
  if (!"content".equals(attr.getName().toLowerCase())) return value; // must be content attr
284
272
  if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) return value;
285
- Matcher m = p.matcher(value);
273
+ Matcher m = CHARSET.matcher(value);
286
274
  if (!m.find()) return value;
287
275
  if (value.contains(encoding)) return value; // no need to replace
288
276
  return value.replace(m.group(), "charset=" + encoding);
289
277
  }
290
278
 
291
- public static final String[] HTML_BOOLEAN_ATTRS = {
292
- "checked", "compact", "declare", "defer", "disabled", "ismap",
293
- "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
294
- "selected"
295
- };
279
+ static final Set<String> HTML_BOOLEAN_ATTRS;
280
+ static {
281
+ final String[] _HTML_BOOLEAN_ATTRS = {
282
+ "checked", "compact", "declare", "defer", "disabled", "ismap",
283
+ "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
284
+ "selected"
285
+ };
286
+ HTML_BOOLEAN_ATTRS = new HashSet<String>(Arrays.asList(_HTML_BOOLEAN_ATTRS));
287
+ }
296
288
 
297
- private boolean isHtmlBooleanAttr(String name) {
298
- for (String s : HTML_BOOLEAN_ATTRS) {
299
- if (s.equals(name)) return true;
300
- }
301
- return false;
289
+ private static boolean isHtmlBooleanAttr(String name) {
290
+ return HTML_BOOLEAN_ATTRS.contains(name);
302
291
  }
303
292
 
304
- private String serializeAttrTextContent(String s, boolean htmlDoc) {
305
- if (s == null) return "";
293
+ private static CharSequence serializeAttrTextContent(String str, boolean htmlDoc) {
294
+ if (str == null || str.length() == 0) return "";
306
295
 
307
- char[] c = s.toCharArray();
308
- StringBuffer buffer = new StringBuffer(c.length);
296
+ StringBuilder buffer = new StringBuilder(str.length() + 16);
309
297
 
310
- for(int i = 0; i < c.length; i++) {
311
- switch(c[i]){
312
- case '\n': buffer.append("&#10;"); break;
313
- case '\r': buffer.append("&#13;"); break;
314
- case '\t': buffer.append("&#9;"); break;
315
- case '"': if (htmlDoc) buffer.append("%22");
298
+ for (int i = 0; i < str.length(); i++) {
299
+ char c; switch (c = str.charAt(i)) {
300
+ case '\n': buffer.append("&#10;"); break;
301
+ case '\r': buffer.append("&#13;"); break;
302
+ case '\t': buffer.append("&#9;"); break;
303
+ case '"': if (htmlDoc) buffer.append("%22");
316
304
  else buffer.append("&quot;");
317
- break;
318
- case '<': buffer.append("&lt;"); break;
319
- case '>': buffer.append("&gt;"); break;
320
- case '&': buffer.append("&amp;"); break;
321
- default: buffer.append(c[i]);
305
+ break;
306
+ case '<': buffer.append("&lt;"); break;
307
+ case '>': buffer.append("&gt;"); break;
308
+ case '&': buffer.append("&amp;"); break;
309
+ default: buffer.append(c);
322
310
  }
323
311
  }
324
312
 
325
- return buffer.toString();
313
+ return buffer;
326
314
  }
327
315
 
328
316
  public void leave(Attr attr) {
@@ -385,19 +373,19 @@ public class SaveContextVisitor {
385
373
  String sysId = docType.getSystemId();
386
374
  String internalSubset = docType.getInternalSubset();
387
375
  if (docType.getPreviousSibling() != null) {
388
- buffer.append("\n");
376
+ buffer.append('\n');
389
377
  }
390
- buffer.append("<!DOCTYPE " + name + " ");
378
+ buffer.append("<!DOCTYPE ").append(name).append(' ');
391
379
  if (pubId != null) {
392
- buffer.append("PUBLIC \"" + pubId + "\"");
393
- if (sysId != null) buffer.append(" \"" + sysId + "\"");
380
+ buffer.append("PUBLIC \"").append(pubId).append('"');
381
+ if (sysId != null) buffer.append(" \"").append(sysId).append('"');
394
382
  } else if (sysId != null) {
395
- buffer.append("SYSTEM \"" + sysId + "\"");
383
+ buffer.append("SYSTEM \"").append(sysId).append('"');
396
384
  }
397
385
  if (internalSubset != null) {
398
- buffer.append(" [");
386
+ buffer.append(' ').append('[');
399
387
  buffer.append(internalSubset);
400
- buffer.append("]");
388
+ buffer.append(']');
401
389
  }
402
390
  buffer.append(">\n");
403
391
  return true;
@@ -420,36 +408,36 @@ public class SaveContextVisitor {
420
408
  indentation.push(current + indentString);
421
409
  }
422
410
  String name = element.getTagName();
423
- buffer.append("<" + name);
411
+ buffer.append('<').append(name);
424
412
  Attr[] attrs = getAttrsAndNamespaces(element);
425
413
  for (Attr attr : attrs) {
426
414
  if (attr.getSpecified()) {
427
- buffer.append(" ");
415
+ buffer.append(' ');
428
416
  enter(attr);
429
417
  leave(attr);
430
418
  }
431
419
  }
432
420
  if (element.hasChildNodes()) {
433
- buffer.append(">");
434
- if (needBreakInOpening(element)) buffer.append("\n");
421
+ buffer.append('>');
422
+ if (needBreakInOpening(element)) buffer.append('\n');
435
423
  return true;
436
424
  }
437
425
  // no child
438
426
  if (asHtml) {
439
- buffer.append(">");
427
+ buffer.append('>');
440
428
  } else if (asXml && noEmpty) {
441
- buffer.append(">");
429
+ buffer.append('>');
442
430
  } else if (asXhtml) {
443
431
  if (isEmpty(name)) {
444
432
  buffer.append(" />"); // see http://www.w3.org/TR/xhtml1/#C_2
445
433
  } else {
446
- buffer.append(">");
434
+ buffer.append('>');
447
435
  }
448
436
  } else {
449
437
  buffer.append("/>");
450
438
  }
451
439
  if (needBreakInOpening(element)) {
452
- buffer.append("\n");
440
+ buffer.append('\n');
453
441
  }
454
442
  return true;
455
443
  }
@@ -619,21 +607,21 @@ public class SaveContextVisitor {
619
607
  } else if (asBuilder) {
620
608
  if (!containsText(element)) indentation.pop();
621
609
  }
622
- buffer.append("</" + name + ">");
610
+ buffer.append("</").append(name).append('>');
623
611
  if (needBreakInClosing(element)) {
624
- buffer.append("\n");
612
+ buffer.append('\n');
625
613
  }
626
614
  return;
627
615
  }
628
616
  // no child, but HTML might need a closing tag.
629
617
  if (asHtml || noEmpty) {
630
618
  if (!isEmpty(name) && noEmpty) {
631
- buffer.append("</" + name + ">");
619
+ buffer.append("</").append(name).append('>');
632
620
  }
633
621
  }
634
622
  if (needBreakInClosing(element)) {
635
623
  if (!containsText(element)) indentation.pop();
636
- buffer.append("\n");
624
+ buffer.append('\n');
637
625
  }
638
626
  }
639
627
 
@@ -686,7 +674,7 @@ public class SaveContextVisitor {
686
674
  }
687
675
 
688
676
  public boolean enter(EntityReference entityRef) {
689
- buffer.append("&" + entityRef.getNodeName() + ";");
677
+ buffer.append('&').append(entityRef.getNodeName()).append(';');
690
678
  return true;
691
679
  }
692
680
  public void leave(EntityReference entityRef) {
@@ -738,20 +726,19 @@ public class SaveContextVisitor {
738
726
  }
739
727
 
740
728
  private boolean isHtmlScript(Text text) {
741
- return htmlDoc && text.getParentNode().getNodeName().equals("script");
729
+ return htmlDoc && text.getParentNode().getNodeName().equals("script");
742
730
  }
743
731
 
744
732
  private boolean isHtmlStyle(Text text) {
745
- return htmlDoc && text.getParentNode().getNodeName().equals("style");
733
+ return htmlDoc && text.getParentNode().getNodeName().equals("style");
746
734
  }
747
735
 
748
- private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
749
736
  public boolean enter(Text text) {
750
- String textContent = text.getNodeValue();
737
+ CharSequence textContent = text.getNodeValue();
751
738
  if (canonical) {
752
739
  c14nNodeList.add(text);
753
740
  if (isWhitespaceText(textContent)) {
754
- buffer.append(canonicalizeWhitespce(textContent));
741
+ buffer.append(canonicalizeWhitespace(textContent));
755
742
  return true;
756
743
  }
757
744
  }
@@ -765,25 +752,26 @@ public class SaveContextVisitor {
765
752
  return true;
766
753
  }
767
754
 
768
- private String encodeStringToHtmlEntity(String text) {
769
- if (encoding == null)
770
- return text;
755
+ private CharSequence encodeStringToHtmlEntity(CharSequence text) {
756
+ if (encoding == null) return text;
757
+
771
758
  CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
772
- StringBuffer sb = new StringBuffer();
759
+ StringBuilder sb = new StringBuilder(text.length() + 16);
773
760
  // make sure we can handle code points that are higher than 2 bytes
774
- for (int i = 0; i < text.length();) {
775
- int code = text.codePointAt(i);
761
+ for ( int i = 0; i < text.length(); ) {
762
+ int code = Character.codePointAt(text, i);
776
763
  // TODO not sure about bigger offset then 2 ?!
777
764
  int offset = code > 65535 ? 2 : 1;
778
- boolean canEncode = encoder.canEncode(text.substring(i, i + offset));
765
+ CharSequence substr = text.subSequence(i, i + offset);
766
+ boolean canEncode = encoder.canEncode(substr);
779
767
  if (canEncode) {
780
- sb.append(text.substring(i, i + offset));
768
+ sb.append(substr);
781
769
  }
782
770
  else {
783
- sb.append("&#x" + Integer.toHexString(code) + ";");
771
+ sb.append("&#x").append(Integer.toHexString(code)).append(';');
784
772
  }
785
773
  i += offset;
786
774
  }
787
- return new String(sb);
775
+ return sb;
788
776
  }
789
777
  }
@@ -48,9 +48,10 @@ import org.xml.sax.SAXParseException;
48
48
  * @author sergio
49
49
  * @author Yoko Harada <yokolet@gmail.com>
50
50
  */
51
- public class SchemaErrorHandler implements ErrorHandler{
52
- private RubyArray errors;
53
- private Ruby runtime;
51
+ public class SchemaErrorHandler implements ErrorHandler {
52
+
53
+ private final Ruby runtime;
54
+ final RubyArray errors;
54
55
 
55
56
  public SchemaErrorHandler(Ruby ruby, RubyArray array) {
56
57
  this.runtime = ruby;
@@ -58,15 +59,11 @@ public class SchemaErrorHandler implements ErrorHandler{
58
59
  }
59
60
 
60
61
  public void warning(SAXParseException ex) throws SAXException {
61
- XmlSyntaxError xmlSyntaxError = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::SyntaxError"));
62
- xmlSyntaxError.setException(ex);
63
- this.errors.append(xmlSyntaxError);
62
+ errors.append( XmlSyntaxError.createWarning(runtime, ex) );
64
63
  }
65
64
 
66
65
  public void error(SAXParseException ex) throws SAXException {
67
- XmlSyntaxError xmlSyntaxError = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::SyntaxError"));
68
- xmlSyntaxError.setException(ex);
69
- this.errors.append(xmlSyntaxError);
66
+ errors.append( XmlSyntaxError.createError(runtime, ex) );
70
67
  }
71
68
 
72
69
  public void fatalError(SAXParseException ex) throws SAXException {
@@ -0,0 +1,167 @@
1
+ /*
2
+ * Copyright (c) 2017 [Karol Bucek](http://kares.org/)
3
+ *
4
+ * Licensed to the Apache Software Foundation (ASF) under one
5
+ * or more contributor license agreements. See the NOTICE file
6
+ * distributed with this work for additional information
7
+ * regarding copyright ownership. The ASF licenses this file
8
+ * to you under the Apache License, Version 2.0 (the "License");
9
+ * you may not use this file except in compliance with the License.
10
+ * You may obtain a copy of the License at
11
+ *
12
+ * http://www.apache.org/licenses/LICENSE-2.0
13
+ *
14
+ * Unless required by applicable law or agreed to in writing, software
15
+ * distributed under the License is distributed on an "AS IS" BASIS,
16
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ * See the License for the specific language governing permissions and
18
+ * limitations under the License.
19
+ */
20
+ package nokogiri.internals;
21
+
22
+ import org.apache.xml.dtm.DTM;
23
+ import org.apache.xml.dtm.DTMWSFilter;
24
+ import org.apache.xml.dtm.ref.dom2dtm.DOM2DTMExt;
25
+ import org.apache.xml.res.XMLErrorResources;
26
+ import org.apache.xml.res.XMLMessages;
27
+ import org.w3c.dom.Node;
28
+
29
+ import javax.xml.transform.dom.DOMSource;
30
+
31
+ /**
32
+ * @author kares
33
+ */
34
+ public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault {
35
+
36
+ /**
37
+ * Given a W3C DOM node, try and return a DTM handle.
38
+ * Note: calling this may be non-optimal, and there is no guarantee that
39
+ * the node will be found in any particular DTM.
40
+ *
41
+ * @param node Non-null reference to a DOM node.
42
+ *
43
+ * @return a valid DTM handle.
44
+ */
45
+ @Override
46
+ public /* synchronized */ int getDTMHandleFromNode(org.w3c.dom.Node node) {
47
+ //if (node == null) // "node must be non-null for getDTMHandleFromNode!");
48
+ // throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null));
49
+ assert node != null;
50
+
51
+ if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) {
52
+ return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber();
53
+ }
54
+
55
+ // Find the DOM2DTMs wrapped around this Document (if any)
56
+ // and check whether they contain the Node in question.
57
+ //
58
+ // NOTE that since a DOM2DTM may represent a subtree rather
59
+ // than a full document, we have to be prepared to check more
60
+ // than one -- and there is no guarantee that we will find
61
+ // one that contains ancestors or siblings of the node we're
62
+ // seeking.
63
+ //
64
+ // %REVIEW% We could search for the one which contains this
65
+ // node at the deepest level, and thus covers the widest
66
+ // subtree, but that's going to entail additional work
67
+ // checking more DTMs... and getHandleOfNode is not a
68
+ // cheap operation in most implementations.
69
+ //
70
+ // TODO: %REVIEW% If overflow addressing, we may recheck a DTM
71
+ // already examined. Ouch. But with the increased number of DTMs,
72
+ // scanning back to check this is painful.
73
+ // POSSIBLE SOLUTIONS:
74
+ // Generate a list of _unique_ DTM objects?
75
+ // Have each DTM cache last DOM node search?
76
+ for(int i = 0; i < m_dtms.length; i++) {
77
+ DTM thisDTM = m_dtms[i];
78
+ if (thisDTM instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTM) {
79
+ int handle = ((org.apache.xml.dtm.ref.dom2dtm.DOM2DTM) thisDTM).getHandleOfNode(node);
80
+ if (handle != DTM.NULL) {
81
+ return handle;
82
+ }
83
+ }
84
+ }
85
+
86
+ // Not found; generate a new DTM.
87
+ //
88
+ // %REVIEW% Is this really desirable, or should we return null
89
+ // and make folks explicitly instantiate from a DOMSource? The
90
+ // latter is more work but gives the caller the opportunity to
91
+ // explicitly add the DTM to a DTMManager... and thus to know when
92
+ // it can be discarded again, which is something we need to pay much
93
+ // more attention to. (Especially since only DTMs which are assigned
94
+ // to a manager can use the overflow addressing scheme.)
95
+ //
96
+ // %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode
97
+ // and the DTM wasn't registered with this DTMManager, we will create
98
+ // a new DTM and _still_ not be able to find the node (since it will
99
+ // be resynthesized). Another reason to push hard on making all DTMs
100
+ // be managed DTMs.
101
+
102
+ // Since the real root of our tree may be a DocumentFragment, we need to
103
+ // use getParent to find the root, instead of getOwnerDocument. Otherwise
104
+ // DOM2DTM#getHandleOfNode will be very unhappy.
105
+ Node root = node; int rootType = root.getNodeType();
106
+ Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode();
107
+ for (; p != null; p = p.getParentNode()) root = p;
108
+
109
+ // DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null);
110
+ DOM2DTMExt dtm = getDTMExt(new DOMSource(root), false, null/*, true, true*/);
111
+
112
+ int handle;
113
+
114
+ if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode) {
115
+ // Can't return the same node since it's unique to a specific DTM,
116
+ // but can return the equivalent node -- find the corresponding
117
+ // Document Element, then ask it for the xml: namespace decl.
118
+ handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement());
119
+ handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName());
120
+ }
121
+ else {
122
+ handle = dtm.getHandleOfNode(node);
123
+
124
+ rootType = root.getNodeType();
125
+ // Is Node actually within the same document? If not, don't search!
126
+ // This would be easier if m_root was always the Document node, but
127
+ // we decided to allow wrapping a DTM around a subtree.
128
+ if((root==node) ||
129
+ (rootType==Node.DOCUMENT_NODE && root==node.getOwnerDocument()) ||
130
+ (rootType!=Node.DOCUMENT_NODE && root.getOwnerDocument()==node.getOwnerDocument())
131
+ )
132
+ {
133
+ // If node _is_ in m_root's tree, find its handle
134
+ //
135
+ // %OPT% This check may be improved significantly when DOM
136
+ // Level 3 nodeKey and relative-order tests become
137
+ // available!
138
+ for (Node cursor = node; cursor != null;
139
+ cursor = (cursor.getNodeType()!=Node.ATTRIBUTE_NODE)
140
+ ? cursor.getParentNode()
141
+ : ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
142
+ if (cursor==root) {
143
+ // We know this node; find its handle.
144
+ return (dtm).getHandleFromNode(node);
145
+ }
146
+ } // for ancestors of node
147
+ } // if node and m_root in same Document
148
+ }
149
+
150
+ if (DTM.NULL == handle)
151
+ throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE, null)); //"Could not resolve the node to a handle!");
152
+
153
+ return handle;
154
+ }
155
+
156
+ private DOM2DTMExt getDTMExt(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/) {
157
+ int dtmPos = getFirstFreeDTMID();
158
+ int documentID = dtmPos << IDENT_DTM_NODE_BITS;
159
+
160
+ //DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true);
161
+ DOM2DTMExt dtm = new DOM2DTMExt(this, source, documentID, whiteSpaceFilter, m_xsf, true);
162
+
163
+ addDTM(dtm, dtmPos, 0);
164
+ return dtm;
165
+ }
166
+
167
+ }