nokogiri 1.11.0.rc3-java → 1.11.4-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (187) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/dependencies.yml +12 -12
  7. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  8. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  9. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  10. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  11. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  12. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  13. data/ext/java/nokogiri/NokogiriService.java +595 -556
  14. data/ext/java/nokogiri/XmlAttr.java +118 -126
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  16. data/ext/java/nokogiri/XmlCdata.java +35 -58
  17. data/ext/java/nokogiri/XmlComment.java +46 -67
  18. data/ext/java/nokogiri/XmlDocument.java +645 -572
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  20. data/ext/java/nokogiri/XmlDtd.java +448 -414
  21. data/ext/java/nokogiri/XmlElement.java +23 -48
  22. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  23. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  24. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  25. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  26. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  27. data/ext/java/nokogiri/XmlNode.java +1843 -1622
  28. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  30. data/ext/java/nokogiri/XmlReader.java +513 -450
  31. data/ext/java/nokogiri/XmlRelaxng.java +89 -101
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  34. data/ext/java/nokogiri/XmlSchema.java +335 -242
  35. data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
  36. data/ext/java/nokogiri/XmlText.java +55 -76
  37. data/ext/java/nokogiri/XmlXpathContext.java +242 -210
  38. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  39. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  40. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
  41. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  42. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  43. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  44. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  45. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  46. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  47. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  52. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
  55. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  56. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  57. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  58. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  59. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  60. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  61. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  62. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  63. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  64. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  65. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  66. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  67. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  68. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  82. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  83. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  84. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  85. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  86. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  87. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  88. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  89. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  90. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  91. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  94. data/ext/nokogiri/depend +37 -358
  95. data/ext/nokogiri/extconf.rb +581 -374
  96. data/ext/nokogiri/html_document.c +78 -82
  97. data/ext/nokogiri/html_element_description.c +84 -71
  98. data/ext/nokogiri/html_entity_lookup.c +21 -16
  99. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  100. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  101. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  102. data/ext/nokogiri/nokogiri.c +192 -93
  103. data/ext/nokogiri/test_global_handlers.c +40 -0
  104. data/ext/nokogiri/xml_attr.c +15 -15
  105. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  106. data/ext/nokogiri/xml_cdata.c +13 -18
  107. data/ext/nokogiri/xml_comment.c +19 -26
  108. data/ext/nokogiri/xml_document.c +246 -188
  109. data/ext/nokogiri/xml_document_fragment.c +13 -15
  110. data/ext/nokogiri/xml_dtd.c +54 -48
  111. data/ext/nokogiri/xml_element_content.c +30 -27
  112. data/ext/nokogiri/xml_element_decl.c +22 -22
  113. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  114. data/ext/nokogiri/xml_entity_decl.c +32 -30
  115. data/ext/nokogiri/xml_entity_reference.c +16 -18
  116. data/ext/nokogiri/xml_namespace.c +56 -49
  117. data/ext/nokogiri/xml_node.c +371 -320
  118. data/ext/nokogiri/xml_node_set.c +168 -156
  119. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  120. data/ext/nokogiri/xml_reader.c +191 -157
  121. data/ext/nokogiri/xml_relax_ng.c +52 -28
  122. data/ext/nokogiri/xml_sax_parser.c +118 -118
  123. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  124. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  125. data/ext/nokogiri/xml_schema.c +95 -47
  126. data/ext/nokogiri/xml_syntax_error.c +42 -21
  127. data/ext/nokogiri/xml_text.c +13 -17
  128. data/ext/nokogiri/xml_xpath_context.c +206 -123
  129. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  130. data/lib/nokogiri.rb +3 -7
  131. data/lib/nokogiri/css/parser.rb +3 -3
  132. data/lib/nokogiri/css/parser.y +2 -2
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -149
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/document.rb +91 -35
  142. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  143. data/lib/nokogiri/xml/node.rb +89 -69
  144. data/lib/nokogiri/xml/parse_options.rb +6 -0
  145. data/lib/nokogiri/xml/reader.rb +2 -9
  146. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  147. data/lib/nokogiri/xml/schema.rb +12 -4
  148. data/lib/nokogiri/xml/searchable.rb +3 -1
  149. data/lib/nokogiri/xml/xpath.rb +1 -3
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  151. metadata +86 -177
  152. data/ext/nokogiri/html_document.h +0 -10
  153. data/ext/nokogiri/html_element_description.h +0 -10
  154. data/ext/nokogiri/html_entity_lookup.h +0 -8
  155. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  156. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  157. data/ext/nokogiri/nokogiri.h +0 -134
  158. data/ext/nokogiri/xml_attr.h +0 -9
  159. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  160. data/ext/nokogiri/xml_cdata.h +0 -9
  161. data/ext/nokogiri/xml_comment.h +0 -9
  162. data/ext/nokogiri/xml_document.h +0 -23
  163. data/ext/nokogiri/xml_document_fragment.h +0 -10
  164. data/ext/nokogiri/xml_dtd.h +0 -10
  165. data/ext/nokogiri/xml_element_content.h +0 -10
  166. data/ext/nokogiri/xml_element_decl.h +0 -9
  167. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  168. data/ext/nokogiri/xml_entity_decl.h +0 -10
  169. data/ext/nokogiri/xml_entity_reference.h +0 -9
  170. data/ext/nokogiri/xml_io.c +0 -63
  171. data/ext/nokogiri/xml_io.h +0 -11
  172. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  173. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  174. data/ext/nokogiri/xml_namespace.h +0 -14
  175. data/ext/nokogiri/xml_node.h +0 -13
  176. data/ext/nokogiri/xml_node_set.h +0 -12
  177. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  178. data/ext/nokogiri/xml_reader.h +0 -10
  179. data/ext/nokogiri/xml_relax_ng.h +0 -9
  180. data/ext/nokogiri/xml_sax_parser.h +0 -39
  181. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  182. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_schema.h +0 -9
  184. data/ext/nokogiri/xml_syntax_error.h +0 -13
  185. data/ext/nokogiri/xml_text.h +0 -9
  186. data/ext/nokogiri/xml_xpath_context.h +0 -10
  187. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri.internals;
34
2
 
35
3
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
@@ -44,30 +12,39 @@ import org.xml.sax.SAXParseException;
44
12
 
45
13
  /**
46
14
  * Error handler for Relax and W3C XML Schema.
47
- *
15
+ *
48
16
  * @author sergio
49
17
  * @author Yoko Harada <yokolet@gmail.com>
50
18
  */
51
- public class SchemaErrorHandler implements ErrorHandler {
52
-
53
- private final Ruby runtime;
54
- final RubyArray errors;
55
-
56
- public SchemaErrorHandler(Ruby ruby, RubyArray array) {
57
- this.runtime = ruby;
58
- this.errors = array;
59
- }
60
-
61
- public void warning(SAXParseException ex) throws SAXException {
62
- errors.append( XmlSyntaxError.createWarning(runtime, ex) );
63
- }
64
-
65
- public void error(SAXParseException ex) throws SAXException {
66
- errors.append( XmlSyntaxError.createError(runtime, ex) );
67
- }
68
-
69
- public void fatalError(SAXParseException ex) throws SAXException {
70
- throw ex;
71
- }
19
+ public class SchemaErrorHandler implements ErrorHandler
20
+ {
21
+
22
+ private final Ruby runtime;
23
+ final RubyArray errors;
24
+
25
+ public
26
+ SchemaErrorHandler(Ruby ruby, RubyArray array)
27
+ {
28
+ this.runtime = ruby;
29
+ this.errors = array;
30
+ }
31
+
32
+ public void
33
+ warning(SAXParseException ex) throws SAXException
34
+ {
35
+ errors.append(XmlSyntaxError.createWarning(runtime, ex));
36
+ }
37
+
38
+ public void
39
+ error(SAXParseException ex) throws SAXException
40
+ {
41
+ errors.append(XmlSyntaxError.createError(runtime, ex));
42
+ }
43
+
44
+ public void
45
+ fatalError(SAXParseException ex) throws SAXException
46
+ {
47
+ throw ex;
48
+ }
72
49
 
73
50
  }
@@ -32,137 +32,143 @@ import org.w3c.dom.Node;
32
32
  /**
33
33
  * @author kares
34
34
  */
35
- public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault {
36
-
37
- /**
38
- * Given a W3C DOM node, try and return a DTM handle.
39
- * Note: calling this may be non-optimal, and there is no guarantee that
40
- * the node will be found in any particular DTM.
41
- *
42
- * @param node Non-null reference to a DOM node.
43
- *
44
- * @return a valid DTM handle.
45
- */
46
- @Override
47
- public /* synchronized */ int getDTMHandleFromNode(org.w3c.dom.Node node) {
48
- //if (node == null) // "node must be non-null for getDTMHandleFromNode!");
49
- // throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null));
50
- assert node != null;
51
-
52
- if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) {
53
- return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber();
54
- }
35
+ public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault
36
+ {
37
+
38
+ /**
39
+ * Given a W3C DOM node, try and return a DTM handle.
40
+ * Note: calling this may be non-optimal, and there is no guarantee that
41
+ * the node will be found in any particular DTM.
42
+ *
43
+ * @param node Non-null reference to a DOM node.
44
+ *
45
+ * @return a valid DTM handle.
46
+ */
47
+ @Override
48
+ public /* synchronized */ int
49
+ getDTMHandleFromNode(org.w3c.dom.Node node)
50
+ {
51
+ //if (node == null) // "node must be non-null for getDTMHandleFromNode!");
52
+ // throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null));
53
+ assert node != null;
54
+
55
+ if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) {
56
+ return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber();
57
+ }
55
58
 
56
- // Find the DOM2DTMs wrapped around this Document (if any)
57
- // and check whether they contain the Node in question.
58
- //
59
- // NOTE that since a DOM2DTM may represent a subtree rather
60
- // than a full document, we have to be prepared to check more
61
- // than one -- and there is no guarantee that we will find
62
- // one that contains ancestors or siblings of the node we're
63
- // seeking.
64
- //
65
- // %REVIEW% We could search for the one which contains this
66
- // node at the deepest level, and thus covers the widest
67
- // subtree, but that's going to entail additional work
68
- // checking more DTMs... and getHandleOfNode is not a
69
- // cheap operation in most implementations.
70
- //
71
- // TODO: %REVIEW% If overflow addressing, we may recheck a DTM
72
- // already examined. Ouch. But with the increased number of DTMs,
73
- // scanning back to check this is painful.
74
- // POSSIBLE SOLUTIONS:
75
- // Generate a list of _unique_ DTM objects?
76
- // Have each DTM cache last DOM node search?
77
- for(int i = 0; i < m_dtms.length; i++) {
78
- DTM thisDTM = m_dtms[i];
79
- if (thisDTM instanceof DOM2DTM) {
80
- int handle = ((DOM2DTM) thisDTM).getHandleOfNode(node);
81
- if (handle != DTM.NULL) {
82
- return handle;
83
- }
84
- }
59
+ // Find the DOM2DTMs wrapped around this Document (if any)
60
+ // and check whether they contain the Node in question.
61
+ //
62
+ // NOTE that since a DOM2DTM may represent a subtree rather
63
+ // than a full document, we have to be prepared to check more
64
+ // than one -- and there is no guarantee that we will find
65
+ // one that contains ancestors or siblings of the node we're
66
+ // seeking.
67
+ //
68
+ // %REVIEW% We could search for the one which contains this
69
+ // node at the deepest level, and thus covers the widest
70
+ // subtree, but that's going to entail additional work
71
+ // checking more DTMs... and getHandleOfNode is not a
72
+ // cheap operation in most implementations.
73
+ //
74
+ // TODO: %REVIEW% If overflow addressing, we may recheck a DTM
75
+ // already examined. Ouch. But with the increased number of DTMs,
76
+ // scanning back to check this is painful.
77
+ // POSSIBLE SOLUTIONS:
78
+ // Generate a list of _unique_ DTM objects?
79
+ // Have each DTM cache last DOM node search?
80
+ for (int i = 0; i < m_dtms.length; i++) {
81
+ DTM thisDTM = m_dtms[i];
82
+ if (thisDTM instanceof DOM2DTM) {
83
+ int handle = ((DOM2DTM) thisDTM).getHandleOfNode(node);
84
+ if (handle != DTM.NULL) {
85
+ return handle;
85
86
  }
87
+ }
88
+ }
86
89
 
87
- // Not found; generate a new DTM.
88
- //
89
- // %REVIEW% Is this really desirable, or should we return null
90
- // and make folks explicitly instantiate from a DOMSource? The
91
- // latter is more work but gives the caller the opportunity to
92
- // explicitly add the DTM to a DTMManager... and thus to know when
93
- // it can be discarded again, which is something we need to pay much
94
- // more attention to. (Especially since only DTMs which are assigned
95
- // to a manager can use the overflow addressing scheme.)
90
+ // Not found; generate a new DTM.
91
+ //
92
+ // %REVIEW% Is this really desirable, or should we return null
93
+ // and make folks explicitly instantiate from a DOMSource? The
94
+ // latter is more work but gives the caller the opportunity to
95
+ // explicitly add the DTM to a DTMManager... and thus to know when
96
+ // it can be discarded again, which is something we need to pay much
97
+ // more attention to. (Especially since only DTMs which are assigned
98
+ // to a manager can use the overflow addressing scheme.)
99
+ //
100
+ // %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode
101
+ // and the DTM wasn't registered with this DTMManager, we will create
102
+ // a new DTM and _still_ not be able to find the node (since it will
103
+ // be resynthesized). Another reason to push hard on making all DTMs
104
+ // be managed DTMs.
105
+
106
+ // Since the real root of our tree may be a DocumentFragment, we need to
107
+ // use getParent to find the root, instead of getOwnerDocument. Otherwise
108
+ // DOM2DTM#getHandleOfNode will be very unhappy.
109
+ Node root = node;
110
+ int rootType = root.getNodeType();
111
+ Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode();
112
+ for (; p != null; p = p.getParentNode()) { root = p; }
113
+
114
+ // DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null);
115
+ DOM2DTM dtm = getDTM(new DOMSource(root), false, null/*, true, true*/);
116
+
117
+ int handle;
118
+
119
+ if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode
120
+ || node instanceof DOM2DTMdefaultNamespaceDeclarationNode) {
121
+ // Can't return the same node since it's unique to a specific DTM,
122
+ // but can return the equivalent node -- find the corresponding
123
+ // Document Element, then ask it for the xml: namespace decl.
124
+ handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement());
125
+ handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName());
126
+ } else {
127
+ handle = dtm.getHandleOfNode(node);
128
+
129
+ rootType = root.getNodeType();
130
+ // Is Node actually within the same document? If not, don't search!
131
+ // This would be easier if m_root was always the Document node, but
132
+ // we decided to allow wrapping a DTM around a subtree.
133
+ if ((root == node) ||
134
+ (rootType == Node.DOCUMENT_NODE && root == node.getOwnerDocument()) ||
135
+ (rootType != Node.DOCUMENT_NODE && root.getOwnerDocument() == node.getOwnerDocument())
136
+ ) {
137
+ // If node _is_ in m_root's tree, find its handle
96
138
  //
97
- // %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode
98
- // and the DTM wasn't registered with this DTMManager, we will create
99
- // a new DTM and _still_ not be able to find the node (since it will
100
- // be resynthesized). Another reason to push hard on making all DTMs
101
- // be managed DTMs.
102
-
103
- // Since the real root of our tree may be a DocumentFragment, we need to
104
- // use getParent to find the root, instead of getOwnerDocument. Otherwise
105
- // DOM2DTM#getHandleOfNode will be very unhappy.
106
- Node root = node; int rootType = root.getNodeType();
107
- Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode();
108
- for (; p != null; p = p.getParentNode()) root = p;
109
-
110
- // DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null);
111
- DOM2DTM dtm = getDTM(new DOMSource(root), false, null/*, true, true*/);
112
-
113
- int handle;
114
-
115
- if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode
116
- || node instanceof DOM2DTMdefaultNamespaceDeclarationNode) {
117
- // Can't return the same node since it's unique to a specific DTM,
118
- // but can return the equivalent node -- find the corresponding
119
- // Document Element, then ask it for the xml: namespace decl.
120
- handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement());
121
- handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName());
122
- }
123
- else {
124
- handle = dtm.getHandleOfNode(node);
125
-
126
- rootType = root.getNodeType();
127
- // Is Node actually within the same document? If not, don't search!
128
- // This would be easier if m_root was always the Document node, but
129
- // we decided to allow wrapping a DTM around a subtree.
130
- if((root==node) ||
131
- (rootType==Node.DOCUMENT_NODE && root==node.getOwnerDocument()) ||
132
- (rootType!=Node.DOCUMENT_NODE && root.getOwnerDocument()==node.getOwnerDocument())
133
- )
134
- {
135
- // If node _is_ in m_root's tree, find its handle
136
- //
137
- // %OPT% This check may be improved significantly when DOM
138
- // Level 3 nodeKey and relative-order tests become
139
- // available!
140
- for (Node cursor = node; cursor != null;
141
- cursor = (cursor.getNodeType()!=Node.ATTRIBUTE_NODE)
142
- ? cursor.getParentNode()
143
- : ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
144
- if (cursor==root) {
145
- // We know this node; find its handle.
146
- return (dtm).getHandleFromNode(node);
147
- }
148
- } // for ancestors of node
149
- } // if node and m_root in same Document
150
- }
151
-
152
- if (DTM.NULL == handle)
153
- throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE, null)); //"Could not resolve the node to a handle!");
139
+ // %OPT% This check may be improved significantly when DOM
140
+ // Level 3 nodeKey and relative-order tests become
141
+ // available!
142
+ for (Node cursor = node; cursor != null;
143
+ cursor = (cursor.getNodeType() != Node.ATTRIBUTE_NODE)
144
+ ? cursor.getParentNode()
145
+ : ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
146
+ if (cursor == root) {
147
+ // We know this node; find its handle.
148
+ return (dtm).getHandleFromNode(node);
149
+ }
150
+ } // for ancestors of node
151
+ } // if node and m_root in same Document
152
+ }
154
153
 
155
- return handle;
154
+ if (DTM.NULL == handle) {
155
+ throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE,
156
+ null)); //"Could not resolve the node to a handle!");
156
157
  }
157
158
 
158
- private DOM2DTM getDTM(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/) {
159
- int dtmPos = getFirstFreeDTMID();
160
- int documentID = dtmPos << IDENT_DTM_NODE_BITS;
159
+ return handle;
160
+ }
161
161
 
162
- DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true);
162
+ private DOM2DTM
163
+ getDTM(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/)
164
+ {
165
+ int dtmPos = getFirstFreeDTMID();
166
+ int documentID = dtmPos << IDENT_DTM_NODE_BITS;
163
167
 
164
- addDTM(dtm, dtmPos, 0);
165
- return dtm;
166
- }
168
+ DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true);
169
+
170
+ addDTM(dtm, dtmPos, 0);
171
+ return dtm;
172
+ }
167
173
 
168
174
  }
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri.internals;
34
2
 
35
3
  /**
@@ -37,6 +5,7 @@ package nokogiri.internals;
37
5
  *
38
6
  * @author Patrick Mahoney <pat@polycrystal.org>
39
7
  */
40
- public interface XmlDeclHandler {
41
- public void xmlDecl(String version, String encoding, String standalone);
8
+ public interface XmlDeclHandler
9
+ {
10
+ public void xmlDecl(String version, String encoding, String standalone);
42
11
  }
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri.internals;
34
2
 
35
3
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
@@ -46,6 +14,7 @@ import org.jruby.RubyClass;
46
14
  import org.jruby.RubyFixnum;
47
15
  import org.jruby.exceptions.RaiseException;
48
16
  import org.jruby.runtime.ThreadContext;
17
+ import org.jruby.runtime.Helpers;
49
18
  import org.jruby.runtime.builtin.IRubyObject;
50
19
  import org.w3c.dom.Document;
51
20
  import org.w3c.dom.Node;
@@ -61,214 +30,244 @@ import nokogiri.XmlSyntaxError;
61
30
  * Parser class for XML DOM processing. This class actually parses XML document
62
31
  * and creates DOM tree in Java side. However, DOM tree in Ruby side is not since
63
32
  * we delay creating objects for performance.
64
- *
33
+ *
65
34
  * @author sergio
66
35
  * @author Yoko Harada <yokolet@gmail.com>
67
36
  */
68
- public class XmlDomParserContext extends ParserContext {
37
+ public class XmlDomParserContext extends ParserContext
38
+ {
69
39
 
70
- protected static final String FEATURE_LOAD_EXTERNAL_DTD =
71
- "http://apache.org/xml/features/nonvalidating/load-external-dtd";
72
- protected static final String FEATURE_LOAD_DTD_GRAMMAR =
73
- "http://apache.org/xml/features/nonvalidating/load-dtd-grammar";
74
- protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE =
75
- "http://apache.org/xml/features/dom/include-ignorable-whitespace";
76
- protected static final String CONTINUE_AFTER_FATAL_ERROR =
77
- "http://apache.org/xml/features/continue-after-fatal-error";
78
- protected static final String FEATURE_NOT_EXPAND_ENTITY =
79
- "http://apache.org/xml/features/dom/create-entity-ref-nodes";
80
- protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
81
- private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude";
82
- private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager";
40
+ protected static final String FEATURE_LOAD_EXTERNAL_DTD =
41
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd";
42
+ protected static final String FEATURE_LOAD_DTD_GRAMMAR =
43
+ "http://apache.org/xml/features/nonvalidating/load-dtd-grammar";
44
+ protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE =
45
+ "http://apache.org/xml/features/dom/include-ignorable-whitespace";
46
+ protected static final String CONTINUE_AFTER_FATAL_ERROR =
47
+ "http://apache.org/xml/features/continue-after-fatal-error";
48
+ protected static final String FEATURE_NOT_EXPAND_ENTITY =
49
+ "http://apache.org/xml/features/dom/create-entity-ref-nodes";
50
+ protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
51
+ private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude";
52
+ private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager";
83
53
 
84
- protected ParserContext.Options options;
85
- protected DOMParser parser;
86
- protected NokogiriErrorHandler errorHandler;
87
- protected IRubyObject ruby_encoding;
54
+ protected ParserContext.Options options;
55
+ protected DOMParser parser;
56
+ protected NokogiriErrorHandler errorHandler;
57
+ protected IRubyObject ruby_encoding;
88
58
 
89
- public XmlDomParserContext(Ruby runtime, IRubyObject options) {
90
- this(runtime, runtime.getNil(), options);
91
- }
92
-
93
- public XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
94
- super(runtime);
95
- this.options = new ParserContext.Options(RubyFixnum.fix2long(options));
96
- java_encoding = NokogiriHelpers.getValidEncoding(encoding);
97
- ruby_encoding = encoding;
98
- initErrorHandler();
99
- initParser(runtime);
100
- }
59
+ public
60
+ XmlDomParserContext(Ruby runtime, IRubyObject options)
61
+ {
62
+ this(runtime, runtime.getNil(), options);
63
+ }
101
64
 
102
- protected void initErrorHandler() {
103
- if (options.recover) {
104
- errorHandler = new NokogiriNonStrictErrorHandler(options.noError, options.noWarning);
105
- } else {
106
- errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
107
- }
108
- }
65
+ public
66
+ XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
67
+ {
68
+ super(runtime);
69
+ this.options = new ParserContext.Options(RubyFixnum.fix2long(options));
70
+ java_encoding = NokogiriHelpers.getValidEncodingOrNull(encoding);
71
+ ruby_encoding = encoding;
72
+ initErrorHandler();
73
+ initParser(runtime);
74
+ }
109
75
 
110
- protected void initParser(Ruby runtime) {
111
- if (options.xInclude) {
112
- System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration",
113
- "org.apache.xerces.parsers.XIncludeParserConfiguration");
114
- }
76
+ protected void
77
+ initErrorHandler()
78
+ {
79
+ if (options.recover) {
80
+ errorHandler = new NokogiriNonStrictErrorHandler(options.noError, options.noWarning);
81
+ } else {
82
+ errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
83
+ }
84
+ }
115
85
 
116
- parser = new NokogiriDomParser(options);
117
- parser.setErrorHandler(errorHandler);
86
+ protected void
87
+ initParser(Ruby runtime)
88
+ {
89
+ if (options.xInclude) {
90
+ System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration",
91
+ "org.apache.xerces.parsers.XIncludeParserConfiguration");
92
+ }
118
93
 
119
- // Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000.
120
- setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager());
94
+ parser = new NokogiriDomParser(options);
95
+ parser.setErrorHandler(errorHandler);
121
96
 
122
- if (options.noBlanks) {
123
- setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false);
124
- }
97
+ // Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000.
98
+ setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager());
125
99
 
126
- if (options.recover) {
127
- setFeature(CONTINUE_AFTER_FATAL_ERROR, true);
128
- }
100
+ if (options.noBlanks) {
101
+ setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false);
102
+ }
129
103
 
130
- if (options.dtdValid) {
131
- setFeature(FEATURE_VALIDATION, true);
132
- }
104
+ if (options.recover) {
105
+ setFeature(CONTINUE_AFTER_FATAL_ERROR, true);
106
+ }
133
107
 
134
- if (!options.noEnt) {
135
- setFeature(FEATURE_NOT_EXPAND_ENTITY, true);
136
- }
137
- // If we turn off loading of external DTDs complete, we don't
138
- // getthe publicID. Instead of turning off completely, we use
139
- // an entity resolver that returns empty documents.
140
- if (options.dtdLoad) {
141
- setFeature(FEATURE_LOAD_EXTERNAL_DTD, true);
142
- setFeature(FEATURE_LOAD_DTD_GRAMMAR, true);
143
- }
144
- parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options));
108
+ if (options.dtdValid) {
109
+ setFeature(FEATURE_VALIDATION, true);
145
110
  }
146
111
 
147
- /**
148
- * Convenience method that catches and ignores SAXException
149
- * (unrecognized and unsupported exceptions).
150
- */
151
- protected void setFeature(String feature, boolean value) {
152
- try {
153
- parser.setFeature(feature, value);
154
- } catch (SAXException e) {
155
- // ignore
156
- }
112
+ if (!options.noEnt) {
113
+ setFeature(FEATURE_NOT_EXPAND_ENTITY, true);
157
114
  }
115
+ // If we turn off loading of external DTDs complete, we don't
116
+ // getthe publicID. Instead of turning off completely, we use
117
+ // an entity resolver that returns empty documents.
118
+ if (options.dtdLoad) {
119
+ setFeature(FEATURE_LOAD_EXTERNAL_DTD, true);
120
+ setFeature(FEATURE_LOAD_DTD_GRAMMAR, true);
121
+ }
122
+ parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options));
123
+ }
158
124
 
159
- /**
160
- * Convenience method that catches and ignores SAXException
161
- * (unrecognized and unsupported exceptions).
162
- */
163
- protected void setProperty(String property, Object value) {
164
- try {
165
- parser.setProperty(property, value);
166
- } catch (SAXException e) {
167
- // ignore
168
- }
125
+ /**
126
+ * Convenience method that catches and ignores SAXException
127
+ * (unrecognized and unsupported exceptions).
128
+ */
129
+ protected void
130
+ setFeature(String feature, boolean value)
131
+ {
132
+ try {
133
+ parser.setFeature(feature, value);
134
+ } catch (SAXException e) {
135
+ // ignore
169
136
  }
137
+ }
170
138
 
171
- public void addErrorsIfNecessary(ThreadContext context, XmlDocument doc) {
172
- doc.setInstanceVariable("@errors", mapErrors(context, errorHandler));
139
+ /**
140
+ * Convenience method that catches and ignores SAXException
141
+ * (unrecognized and unsupported exceptions).
142
+ */
143
+ protected void
144
+ setProperty(String property, Object value)
145
+ {
146
+ try {
147
+ parser.setProperty(property, value);
148
+ } catch (SAXException e) {
149
+ // ignore
173
150
  }
151
+ }
174
152
 
153
+ public void
154
+ addErrorsIfNecessary(ThreadContext context, XmlDocument doc)
155
+ {
156
+ doc.setInstanceVariable("@errors", mapErrors(context, errorHandler));
157
+ }
175
158
 
176
- public static RubyArray mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler) {
177
- final Ruby runtime = context.runtime;
178
- final List<Exception> errors = errorHandler.getErrors();
179
- final IRubyObject[] errorsAry = new IRubyObject[errors.size()];
180
- for (int i = 0; i < errors.size(); i++) {
181
- XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(runtime);
182
- xmlSyntaxError.setException(errors.get(i));
183
- errorsAry[i] = xmlSyntaxError;
184
- }
185
- return runtime.newArrayNoCopy(errorsAry);
186
- }
187
159
 
188
- public XmlDocument getDocumentWithErrorsOrRaiseException(ThreadContext context, RubyClass klazz, Exception ex) {
189
- if (options.recover) {
190
- XmlDocument xmlDocument = getInterruptedOrNewXmlDocument(context, klazz);
191
- this.addErrorsIfNecessary(context, xmlDocument);
192
- XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
193
- xmlSyntaxError.setException(ex);
194
- ((RubyArray) xmlDocument.getInstanceVariable("@errors")).append(xmlSyntaxError);
195
- return xmlDocument;
196
- } else {
197
- XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
198
- xmlSyntaxError.setException(ex);
199
- throw new RaiseException(xmlSyntaxError);
200
- }
160
+ public static RubyArray
161
+ mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler)
162
+ {
163
+ final Ruby runtime = context.runtime;
164
+ final List<Exception> errors = errorHandler.getErrors();
165
+ final IRubyObject[] errorsAry = new IRubyObject[errors.size()];
166
+ for (int i = 0; i < errors.size(); i++) {
167
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(runtime);
168
+ xmlSyntaxError.setException(errors.get(i));
169
+ errorsAry[i] = xmlSyntaxError;
201
170
  }
202
-
203
- private XmlDocument getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klass) {
204
- Document document = parser.getDocument();
205
- XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, document);
206
- xmlDocument.setEncoding(ruby_encoding);
207
- return xmlDocument;
171
+ return runtime.newArrayNoCopy(errorsAry);
172
+ }
173
+
174
+ public XmlDocument
175
+ getDocumentWithErrorsOrRaiseException(ThreadContext context, RubyClass klazz, Exception ex)
176
+ {
177
+ if (options.recover) {
178
+ XmlDocument xmlDocument = getInterruptedOrNewXmlDocument(context, klazz);
179
+ this.addErrorsIfNecessary(context, xmlDocument);
180
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
181
+ xmlSyntaxError.setException(ex);
182
+ ((RubyArray) xmlDocument.getInstanceVariable("@errors")).append(xmlSyntaxError);
183
+ return xmlDocument;
184
+ } else {
185
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
186
+ xmlSyntaxError.setException(ex);
187
+ throw xmlSyntaxError.toThrowable();
208
188
  }
189
+ }
209
190
 
210
- /**
211
- * This method is broken out so that HtmlDomParserContext can
212
- * override it.
213
- */
214
- protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document doc) {
215
- XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, doc);
216
- xmlDocument.setEncoding(ruby_encoding);
191
+ private XmlDocument
192
+ getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klass)
193
+ {
194
+ Document document = parser.getDocument();
195
+ XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, document);
196
+ xmlDocument.setEncoding(ruby_encoding);
197
+ return xmlDocument;
198
+ }
217
199
 
218
- if (options.dtdLoad) {
219
- IRubyObject dtd = XmlDtd.newFromExternalSubset(context.runtime, doc);
220
- if (!dtd.isNil()) {
221
- doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, (XmlDtd) dtd, null);
222
- }
223
- }
224
- return xmlDocument;
200
+ /**
201
+ * This method is broken out so that HtmlDomParserContext can
202
+ * override it.
203
+ */
204
+ protected XmlDocument
205
+ wrapDocument(ThreadContext context, RubyClass klass, Document doc)
206
+ {
207
+ XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, doc);
208
+ Helpers.invoke(context, xmlDocument, "initialize");
209
+ xmlDocument.setEncoding(ruby_encoding);
210
+
211
+ if (options.dtdLoad) {
212
+ IRubyObject dtd = XmlDtd.newFromExternalSubset(context.runtime, doc);
213
+ if (!dtd.isNil()) {
214
+ doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, (XmlDtd) dtd, null);
215
+ }
225
216
  }
217
+ return xmlDocument;
218
+ }
226
219
 
227
- /**
228
- * Must call setInputSource() before this method.
229
- */
230
- public XmlDocument parse(ThreadContext context, RubyClass klass, IRubyObject url) {
231
- XmlDocument xmlDoc;
232
- try {
233
- Document doc = do_parse();
234
- xmlDoc = wrapDocument(context, klass, doc);
235
- xmlDoc.setUrl(url);
236
- addErrorsIfNecessary(context, xmlDoc);
237
- return xmlDoc;
238
- } catch (SAXException e) {
239
- return getDocumentWithErrorsOrRaiseException(context, klass, e);
240
- } catch (IOException e) {
241
- return getDocumentWithErrorsOrRaiseException(context, klass, e);
242
- }
220
+ /**
221
+ * Must call setInputSource() before this method.
222
+ */
223
+ public XmlDocument
224
+ parse(ThreadContext context, RubyClass klass, IRubyObject url)
225
+ {
226
+ XmlDocument xmlDoc;
227
+ try {
228
+ Document doc = do_parse();
229
+ xmlDoc = wrapDocument(context, klass, doc);
230
+ xmlDoc.setUrl(url);
231
+ addErrorsIfNecessary(context, xmlDoc);
232
+ return xmlDoc;
233
+ } catch (SAXException e) {
234
+ return getDocumentWithErrorsOrRaiseException(context, klass, e);
235
+ } catch (IOException e) {
236
+ return getDocumentWithErrorsOrRaiseException(context, klass, e);
243
237
  }
238
+ }
244
239
 
245
- protected Document do_parse() throws SAXException, IOException {
246
- try {
247
- parser.parse(getInputSource());
248
- } catch (NullPointerException ex) {
249
- // FIXME: this is really a hack to fix #838. Xerces will throw a NullPointerException
250
- // if we tried to parse '<? ?>'. We should submit a patch to Xerces.
251
- }
252
- if (options.noBlanks) {
253
- List<Node> emptyNodes = new ArrayList<Node>();
254
- findEmptyTexts(parser.getDocument(), emptyNodes);
255
- if (emptyNodes.size() > 0) {
256
- for (Node node : emptyNodes) {
257
- node.getParentNode().removeChild(node);
258
- }
259
- }
260
- }
261
- return parser.getDocument();
240
+ protected Document
241
+ do_parse() throws SAXException, IOException
242
+ {
243
+ try {
244
+ parser.parse(getInputSource());
245
+ } catch (NullPointerException ex) {
246
+ // FIXME: this is really a hack to fix #838. Xerces will throw a NullPointerException
247
+ // if we tried to parse '<? ?>'. We should submit a patch to Xerces.
262
248
  }
263
-
264
- private static void findEmptyTexts(Node node, List<Node> emptyNodes) {
265
- if (node.getNodeType() == Node.TEXT_NODE && isBlank(node.getTextContent())) {
266
- emptyNodes.add(node);
267
- } else {
268
- NodeList children = node.getChildNodes();
269
- for (int i=0; i < children.getLength(); i++) {
270
- findEmptyTexts(children.item(i), emptyNodes);
271
- }
249
+ if (options.noBlanks) {
250
+ List<Node> emptyNodes = new ArrayList<Node>();
251
+ findEmptyTexts(parser.getDocument(), emptyNodes);
252
+ if (emptyNodes.size() > 0) {
253
+ for (Node node : emptyNodes) {
254
+ node.getParentNode().removeChild(node);
272
255
  }
256
+ }
257
+ }
258
+ return parser.getDocument();
259
+ }
260
+
261
+ private static void
262
+ findEmptyTexts(Node node, List<Node> emptyNodes)
263
+ {
264
+ if (node.getNodeType() == Node.TEXT_NODE && isBlank(node.getTextContent())) {
265
+ emptyNodes.add(node);
266
+ } else {
267
+ NodeList children = node.getChildNodes();
268
+ for (int i = 0; i < children.getLength(); i++) {
269
+ findEmptyTexts(children.item(i), emptyNodes);
270
+ }
273
271
  }
272
+ }
274
273
  }