nokogiri 1.11.0.rc1-java → 1.11.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  7. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  8. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  12. data/ext/java/nokogiri/NokogiriService.java +597 -526
  13. data/ext/java/nokogiri/XmlAttr.java +120 -96
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  15. data/ext/java/nokogiri/XmlCdata.java +35 -26
  16. data/ext/java/nokogiri/XmlComment.java +48 -37
  17. data/ext/java/nokogiri/XmlDocument.java +642 -540
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  19. data/ext/java/nokogiri/XmlDtd.java +450 -384
  20. data/ext/java/nokogiri/XmlElement.java +25 -18
  21. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  22. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  23. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  24. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  25. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1590
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  29. data/ext/java/nokogiri/XmlReader.java +513 -418
  30. data/ext/java/nokogiri/XmlRelaxng.java +92 -72
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -280
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  33. data/ext/java/nokogiri/XmlSchema.java +335 -210
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -87
  35. data/ext/java/nokogiri/XmlText.java +57 -46
  36. data/ext/java/nokogiri/XmlXpathContext.java +242 -178
  37. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +203 -160
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -59
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +121 -48
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -22
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  93. data/ext/nokogiri/depend +37 -358
  94. data/ext/nokogiri/extconf.rb +585 -374
  95. data/ext/nokogiri/html_document.c +78 -82
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  99. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +192 -93
  102. data/ext/nokogiri/test_global_handlers.c +40 -0
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +225 -163
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +338 -286
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +195 -172
  120. data/ext/nokogiri/xml_relax_ng.c +52 -28
  121. data/ext/nokogiri/xml_sax_parser.c +118 -118
  122. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  123. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  124. data/ext/nokogiri/xml_schema.c +111 -34
  125. data/ext/nokogiri/xml_syntax_error.c +42 -21
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +206 -123
  128. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  129. data/lib/nokogiri.rb +4 -8
  130. data/lib/nokogiri/css/parser.rb +62 -62
  131. data/lib/nokogiri/css/parser.y +2 -2
  132. data/lib/nokogiri/css/parser_extras.rb +38 -36
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -148
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/builder.rb +2 -2
  142. data/lib/nokogiri/xml/document.rb +48 -18
  143. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  144. data/lib/nokogiri/xml/node.rb +599 -279
  145. data/lib/nokogiri/xml/parse_options.rb +6 -0
  146. data/lib/nokogiri/xml/reader.rb +2 -9
  147. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  148. data/lib/nokogiri/xml/schema.rb +12 -4
  149. data/lib/nokogiri/xml/searchable.rb +24 -16
  150. data/lib/nokogiri/xml/xpath.rb +1 -3
  151. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  152. metadata +87 -158
  153. data/ext/nokogiri/html_document.h +0 -10
  154. data/ext/nokogiri/html_element_description.h +0 -10
  155. data/ext/nokogiri/html_entity_lookup.h +0 -8
  156. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  157. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  158. data/ext/nokogiri/nokogiri.h +0 -122
  159. data/ext/nokogiri/xml_attr.h +0 -9
  160. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  161. data/ext/nokogiri/xml_cdata.h +0 -9
  162. data/ext/nokogiri/xml_comment.h +0 -9
  163. data/ext/nokogiri/xml_document.h +0 -23
  164. data/ext/nokogiri/xml_document_fragment.h +0 -10
  165. data/ext/nokogiri/xml_dtd.h +0 -10
  166. data/ext/nokogiri/xml_element_content.h +0 -10
  167. data/ext/nokogiri/xml_element_decl.h +0 -9
  168. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  169. data/ext/nokogiri/xml_entity_decl.h +0 -10
  170. data/ext/nokogiri/xml_entity_reference.h +0 -9
  171. data/ext/nokogiri/xml_io.c +0 -61
  172. data/ext/nokogiri/xml_io.h +0 -11
  173. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  174. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  175. data/ext/nokogiri/xml_namespace.h +0 -14
  176. data/ext/nokogiri/xml_node.h +0 -13
  177. data/ext/nokogiri/xml_node_set.h +0 -12
  178. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  179. data/ext/nokogiri/xml_reader.h +0 -10
  180. data/ext/nokogiri/xml_relax_ng.h +0 -9
  181. data/ext/nokogiri/xml_sax_parser.h +0 -39
  182. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  183. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_schema.h +0 -9
  185. data/ext/nokogiri/xml_syntax_error.h +0 -13
  186. data/ext/nokogiri/xml_text.h +0 -9
  187. data/ext/nokogiri/xml_xpath_context.h +0 -10
  188. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -44,30 +44,39 @@ import org.xml.sax.SAXParseException;
44
44
 
45
45
  /**
46
46
  * Error handler for Relax and W3C XML Schema.
47
- *
47
+ *
48
48
  * @author sergio
49
49
  * @author Yoko Harada <yokolet@gmail.com>
50
50
  */
51
- public class SchemaErrorHandler implements ErrorHandler {
51
+ public class SchemaErrorHandler implements ErrorHandler
52
+ {
52
53
 
53
- private final Ruby runtime;
54
- final RubyArray errors;
54
+ private final Ruby runtime;
55
+ final RubyArray errors;
55
56
 
56
- public SchemaErrorHandler(Ruby ruby, RubyArray array) {
57
- this.runtime = ruby;
58
- this.errors = array;
59
- }
57
+ public
58
+ SchemaErrorHandler(Ruby ruby, RubyArray array)
59
+ {
60
+ this.runtime = ruby;
61
+ this.errors = array;
62
+ }
60
63
 
61
- public void warning(SAXParseException ex) throws SAXException {
62
- errors.append( XmlSyntaxError.createWarning(runtime, ex) );
63
- }
64
+ public void
65
+ warning(SAXParseException ex) throws SAXException
66
+ {
67
+ errors.append(XmlSyntaxError.createWarning(runtime, ex));
68
+ }
64
69
 
65
- public void error(SAXParseException ex) throws SAXException {
66
- errors.append( XmlSyntaxError.createError(runtime, ex) );
67
- }
70
+ public void
71
+ error(SAXParseException ex) throws SAXException
72
+ {
73
+ errors.append(XmlSyntaxError.createError(runtime, ex));
74
+ }
68
75
 
69
- public void fatalError(SAXParseException ex) throws SAXException {
70
- throw ex;
71
- }
76
+ public void
77
+ fatalError(SAXParseException ex) throws SAXException
78
+ {
79
+ throw ex;
80
+ }
72
81
 
73
82
  }
@@ -32,137 +32,143 @@ import org.w3c.dom.Node;
32
32
  /**
33
33
  * @author kares
34
34
  */
35
- public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault {
36
-
37
- /**
38
- * Given a W3C DOM node, try and return a DTM handle.
39
- * Note: calling this may be non-optimal, and there is no guarantee that
40
- * the node will be found in any particular DTM.
41
- *
42
- * @param node Non-null reference to a DOM node.
43
- *
44
- * @return a valid DTM handle.
45
- */
46
- @Override
47
- public /* synchronized */ int getDTMHandleFromNode(org.w3c.dom.Node node) {
48
- //if (node == null) // "node must be non-null for getDTMHandleFromNode!");
49
- // throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null));
50
- assert node != null;
51
-
52
- if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) {
53
- return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber();
54
- }
35
+ public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault
36
+ {
37
+
38
+ /**
39
+ * Given a W3C DOM node, try and return a DTM handle.
40
+ * Note: calling this may be non-optimal, and there is no guarantee that
41
+ * the node will be found in any particular DTM.
42
+ *
43
+ * @param node Non-null reference to a DOM node.
44
+ *
45
+ * @return a valid DTM handle.
46
+ */
47
+ @Override
48
+ public /* synchronized */ int
49
+ getDTMHandleFromNode(org.w3c.dom.Node node)
50
+ {
51
+ //if (node == null) // "node must be non-null for getDTMHandleFromNode!");
52
+ // throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null));
53
+ assert node != null;
54
+
55
+ if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) {
56
+ return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber();
57
+ }
55
58
 
56
- // Find the DOM2DTMs wrapped around this Document (if any)
57
- // and check whether they contain the Node in question.
58
- //
59
- // NOTE that since a DOM2DTM may represent a subtree rather
60
- // than a full document, we have to be prepared to check more
61
- // than one -- and there is no guarantee that we will find
62
- // one that contains ancestors or siblings of the node we're
63
- // seeking.
64
- //
65
- // %REVIEW% We could search for the one which contains this
66
- // node at the deepest level, and thus covers the widest
67
- // subtree, but that's going to entail additional work
68
- // checking more DTMs... and getHandleOfNode is not a
69
- // cheap operation in most implementations.
70
- //
71
- // TODO: %REVIEW% If overflow addressing, we may recheck a DTM
72
- // already examined. Ouch. But with the increased number of DTMs,
73
- // scanning back to check this is painful.
74
- // POSSIBLE SOLUTIONS:
75
- // Generate a list of _unique_ DTM objects?
76
- // Have each DTM cache last DOM node search?
77
- for(int i = 0; i < m_dtms.length; i++) {
78
- DTM thisDTM = m_dtms[i];
79
- if (thisDTM instanceof DOM2DTM) {
80
- int handle = ((DOM2DTM) thisDTM).getHandleOfNode(node);
81
- if (handle != DTM.NULL) {
82
- return handle;
83
- }
84
- }
59
+ // Find the DOM2DTMs wrapped around this Document (if any)
60
+ // and check whether they contain the Node in question.
61
+ //
62
+ // NOTE that since a DOM2DTM may represent a subtree rather
63
+ // than a full document, we have to be prepared to check more
64
+ // than one -- and there is no guarantee that we will find
65
+ // one that contains ancestors or siblings of the node we're
66
+ // seeking.
67
+ //
68
+ // %REVIEW% We could search for the one which contains this
69
+ // node at the deepest level, and thus covers the widest
70
+ // subtree, but that's going to entail additional work
71
+ // checking more DTMs... and getHandleOfNode is not a
72
+ // cheap operation in most implementations.
73
+ //
74
+ // TODO: %REVIEW% If overflow addressing, we may recheck a DTM
75
+ // already examined. Ouch. But with the increased number of DTMs,
76
+ // scanning back to check this is painful.
77
+ // POSSIBLE SOLUTIONS:
78
+ // Generate a list of _unique_ DTM objects?
79
+ // Have each DTM cache last DOM node search?
80
+ for (int i = 0; i < m_dtms.length; i++) {
81
+ DTM thisDTM = m_dtms[i];
82
+ if (thisDTM instanceof DOM2DTM) {
83
+ int handle = ((DOM2DTM) thisDTM).getHandleOfNode(node);
84
+ if (handle != DTM.NULL) {
85
+ return handle;
85
86
  }
87
+ }
88
+ }
86
89
 
87
- // Not found; generate a new DTM.
88
- //
89
- // %REVIEW% Is this really desirable, or should we return null
90
- // and make folks explicitly instantiate from a DOMSource? The
91
- // latter is more work but gives the caller the opportunity to
92
- // explicitly add the DTM to a DTMManager... and thus to know when
93
- // it can be discarded again, which is something we need to pay much
94
- // more attention to. (Especially since only DTMs which are assigned
95
- // to a manager can use the overflow addressing scheme.)
90
+ // Not found; generate a new DTM.
91
+ //
92
+ // %REVIEW% Is this really desirable, or should we return null
93
+ // and make folks explicitly instantiate from a DOMSource? The
94
+ // latter is more work but gives the caller the opportunity to
95
+ // explicitly add the DTM to a DTMManager... and thus to know when
96
+ // it can be discarded again, which is something we need to pay much
97
+ // more attention to. (Especially since only DTMs which are assigned
98
+ // to a manager can use the overflow addressing scheme.)
99
+ //
100
+ // %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode
101
+ // and the DTM wasn't registered with this DTMManager, we will create
102
+ // a new DTM and _still_ not be able to find the node (since it will
103
+ // be resynthesized). Another reason to push hard on making all DTMs
104
+ // be managed DTMs.
105
+
106
+ // Since the real root of our tree may be a DocumentFragment, we need to
107
+ // use getParent to find the root, instead of getOwnerDocument. Otherwise
108
+ // DOM2DTM#getHandleOfNode will be very unhappy.
109
+ Node root = node;
110
+ int rootType = root.getNodeType();
111
+ Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode();
112
+ for (; p != null; p = p.getParentNode()) { root = p; }
113
+
114
+ // DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null);
115
+ DOM2DTM dtm = getDTM(new DOMSource(root), false, null/*, true, true*/);
116
+
117
+ int handle;
118
+
119
+ if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode
120
+ || node instanceof DOM2DTMdefaultNamespaceDeclarationNode) {
121
+ // Can't return the same node since it's unique to a specific DTM,
122
+ // but can return the equivalent node -- find the corresponding
123
+ // Document Element, then ask it for the xml: namespace decl.
124
+ handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement());
125
+ handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName());
126
+ } else {
127
+ handle = dtm.getHandleOfNode(node);
128
+
129
+ rootType = root.getNodeType();
130
+ // Is Node actually within the same document? If not, don't search!
131
+ // This would be easier if m_root was always the Document node, but
132
+ // we decided to allow wrapping a DTM around a subtree.
133
+ if ((root == node) ||
134
+ (rootType == Node.DOCUMENT_NODE && root == node.getOwnerDocument()) ||
135
+ (rootType != Node.DOCUMENT_NODE && root.getOwnerDocument() == node.getOwnerDocument())
136
+ ) {
137
+ // If node _is_ in m_root's tree, find its handle
96
138
  //
97
- // %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode
98
- // and the DTM wasn't registered with this DTMManager, we will create
99
- // a new DTM and _still_ not be able to find the node (since it will
100
- // be resynthesized). Another reason to push hard on making all DTMs
101
- // be managed DTMs.
102
-
103
- // Since the real root of our tree may be a DocumentFragment, we need to
104
- // use getParent to find the root, instead of getOwnerDocument. Otherwise
105
- // DOM2DTM#getHandleOfNode will be very unhappy.
106
- Node root = node; int rootType = root.getNodeType();
107
- Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode();
108
- for (; p != null; p = p.getParentNode()) root = p;
109
-
110
- // DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null);
111
- DOM2DTM dtm = getDTM(new DOMSource(root), false, null/*, true, true*/);
112
-
113
- int handle;
114
-
115
- if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode
116
- || node instanceof DOM2DTMdefaultNamespaceDeclarationNode) {
117
- // Can't return the same node since it's unique to a specific DTM,
118
- // but can return the equivalent node -- find the corresponding
119
- // Document Element, then ask it for the xml: namespace decl.
120
- handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement());
121
- handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName());
122
- }
123
- else {
124
- handle = dtm.getHandleOfNode(node);
125
-
126
- rootType = root.getNodeType();
127
- // Is Node actually within the same document? If not, don't search!
128
- // This would be easier if m_root was always the Document node, but
129
- // we decided to allow wrapping a DTM around a subtree.
130
- if((root==node) ||
131
- (rootType==Node.DOCUMENT_NODE && root==node.getOwnerDocument()) ||
132
- (rootType!=Node.DOCUMENT_NODE && root.getOwnerDocument()==node.getOwnerDocument())
133
- )
134
- {
135
- // If node _is_ in m_root's tree, find its handle
136
- //
137
- // %OPT% This check may be improved significantly when DOM
138
- // Level 3 nodeKey and relative-order tests become
139
- // available!
140
- for (Node cursor = node; cursor != null;
141
- cursor = (cursor.getNodeType()!=Node.ATTRIBUTE_NODE)
142
- ? cursor.getParentNode()
143
- : ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
144
- if (cursor==root) {
145
- // We know this node; find its handle.
146
- return (dtm).getHandleFromNode(node);
147
- }
148
- } // for ancestors of node
149
- } // if node and m_root in same Document
150
- }
151
-
152
- if (DTM.NULL == handle)
153
- throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE, null)); //"Could not resolve the node to a handle!");
139
+ // %OPT% This check may be improved significantly when DOM
140
+ // Level 3 nodeKey and relative-order tests become
141
+ // available!
142
+ for (Node cursor = node; cursor != null;
143
+ cursor = (cursor.getNodeType() != Node.ATTRIBUTE_NODE)
144
+ ? cursor.getParentNode()
145
+ : ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
146
+ if (cursor == root) {
147
+ // We know this node; find its handle.
148
+ return (dtm).getHandleFromNode(node);
149
+ }
150
+ } // for ancestors of node
151
+ } // if node and m_root in same Document
152
+ }
154
153
 
155
- return handle;
154
+ if (DTM.NULL == handle) {
155
+ throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE,
156
+ null)); //"Could not resolve the node to a handle!");
156
157
  }
157
158
 
158
- private DOM2DTM getDTM(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/) {
159
- int dtmPos = getFirstFreeDTMID();
160
- int documentID = dtmPos << IDENT_DTM_NODE_BITS;
159
+ return handle;
160
+ }
161
161
 
162
- DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true);
162
+ private DOM2DTM
163
+ getDTM(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/)
164
+ {
165
+ int dtmPos = getFirstFreeDTMID();
166
+ int documentID = dtmPos << IDENT_DTM_NODE_BITS;
163
167
 
164
- addDTM(dtm, dtmPos, 0);
165
- return dtm;
166
- }
168
+ DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true);
169
+
170
+ addDTM(dtm, dtmPos, 0);
171
+ return dtm;
172
+ }
167
173
 
168
174
  }
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -37,6 +37,7 @@ package nokogiri.internals;
37
37
  *
38
38
  * @author Patrick Mahoney <pat@polycrystal.org>
39
39
  */
40
- public interface XmlDeclHandler {
41
- public void xmlDecl(String version, String encoding, String standalone);
40
+ public interface XmlDeclHandler
41
+ {
42
+ public void xmlDecl(String version, String encoding, String standalone);
42
43
  }
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -46,6 +46,7 @@ import org.jruby.RubyClass;
46
46
  import org.jruby.RubyFixnum;
47
47
  import org.jruby.exceptions.RaiseException;
48
48
  import org.jruby.runtime.ThreadContext;
49
+ import org.jruby.runtime.Helpers;
49
50
  import org.jruby.runtime.builtin.IRubyObject;
50
51
  import org.w3c.dom.Document;
51
52
  import org.w3c.dom.Node;
@@ -61,214 +62,244 @@ import nokogiri.XmlSyntaxError;
61
62
  * Parser class for XML DOM processing. This class actually parses XML document
62
63
  * and creates DOM tree in Java side. However, DOM tree in Ruby side is not since
63
64
  * we delay creating objects for performance.
64
- *
65
+ *
65
66
  * @author sergio
66
67
  * @author Yoko Harada <yokolet@gmail.com>
67
68
  */
68
- public class XmlDomParserContext extends ParserContext {
69
+ public class XmlDomParserContext extends ParserContext
70
+ {
69
71
 
70
- protected static final String FEATURE_LOAD_EXTERNAL_DTD =
71
- "http://apache.org/xml/features/nonvalidating/load-external-dtd";
72
- protected static final String FEATURE_LOAD_DTD_GRAMMAR =
73
- "http://apache.org/xml/features/nonvalidating/load-dtd-grammar";
74
- protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE =
75
- "http://apache.org/xml/features/dom/include-ignorable-whitespace";
76
- protected static final String CONTINUE_AFTER_FATAL_ERROR =
77
- "http://apache.org/xml/features/continue-after-fatal-error";
78
- protected static final String FEATURE_NOT_EXPAND_ENTITY =
79
- "http://apache.org/xml/features/dom/create-entity-ref-nodes";
80
- protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
81
- private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude";
82
- private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager";
72
+ protected static final String FEATURE_LOAD_EXTERNAL_DTD =
73
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd";
74
+ protected static final String FEATURE_LOAD_DTD_GRAMMAR =
75
+ "http://apache.org/xml/features/nonvalidating/load-dtd-grammar";
76
+ protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE =
77
+ "http://apache.org/xml/features/dom/include-ignorable-whitespace";
78
+ protected static final String CONTINUE_AFTER_FATAL_ERROR =
79
+ "http://apache.org/xml/features/continue-after-fatal-error";
80
+ protected static final String FEATURE_NOT_EXPAND_ENTITY =
81
+ "http://apache.org/xml/features/dom/create-entity-ref-nodes";
82
+ protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
83
+ private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude";
84
+ private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager";
83
85
 
84
- protected ParserContext.Options options;
85
- protected DOMParser parser;
86
- protected NokogiriErrorHandler errorHandler;
87
- protected IRubyObject ruby_encoding;
86
+ protected ParserContext.Options options;
87
+ protected DOMParser parser;
88
+ protected NokogiriErrorHandler errorHandler;
89
+ protected IRubyObject ruby_encoding;
88
90
 
89
- public XmlDomParserContext(Ruby runtime, IRubyObject options) {
90
- this(runtime, runtime.getNil(), options);
91
- }
92
-
93
- public XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
94
- super(runtime);
95
- this.options = new ParserContext.Options(RubyFixnum.fix2long(options));
96
- java_encoding = NokogiriHelpers.getValidEncoding(encoding);
97
- ruby_encoding = encoding;
98
- initErrorHandler();
99
- initParser(runtime);
100
- }
91
+ public
92
+ XmlDomParserContext(Ruby runtime, IRubyObject options)
93
+ {
94
+ this(runtime, runtime.getNil(), options);
95
+ }
101
96
 
102
- protected void initErrorHandler() {
103
- if (options.recover) {
104
- errorHandler = new NokogiriNonStrictErrorHandler(options.noError, options.noWarning);
105
- } else {
106
- errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
107
- }
108
- }
97
+ public
98
+ XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
99
+ {
100
+ super(runtime);
101
+ this.options = new ParserContext.Options(RubyFixnum.fix2long(options));
102
+ java_encoding = NokogiriHelpers.getValidEncodingOrNull(encoding);
103
+ ruby_encoding = encoding;
104
+ initErrorHandler();
105
+ initParser(runtime);
106
+ }
109
107
 
110
- protected void initParser(Ruby runtime) {
111
- if (options.xInclude) {
112
- System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration",
113
- "org.apache.xerces.parsers.XIncludeParserConfiguration");
114
- }
108
+ protected void
109
+ initErrorHandler()
110
+ {
111
+ if (options.recover) {
112
+ errorHandler = new NokogiriNonStrictErrorHandler(options.noError, options.noWarning);
113
+ } else {
114
+ errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
115
+ }
116
+ }
115
117
 
116
- parser = new NokogiriDomParser(options);
117
- parser.setErrorHandler(errorHandler);
118
+ protected void
119
+ initParser(Ruby runtime)
120
+ {
121
+ if (options.xInclude) {
122
+ System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration",
123
+ "org.apache.xerces.parsers.XIncludeParserConfiguration");
124
+ }
118
125
 
119
- // Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000.
120
- setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager());
126
+ parser = new NokogiriDomParser(options);
127
+ parser.setErrorHandler(errorHandler);
121
128
 
122
- if (options.noBlanks) {
123
- setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false);
124
- }
129
+ // Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000.
130
+ setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager());
125
131
 
126
- if (options.recover) {
127
- setFeature(CONTINUE_AFTER_FATAL_ERROR, true);
128
- }
132
+ if (options.noBlanks) {
133
+ setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false);
134
+ }
129
135
 
130
- if (options.dtdValid) {
131
- setFeature(FEATURE_VALIDATION, true);
132
- }
136
+ if (options.recover) {
137
+ setFeature(CONTINUE_AFTER_FATAL_ERROR, true);
138
+ }
133
139
 
134
- if (!options.noEnt) {
135
- setFeature(FEATURE_NOT_EXPAND_ENTITY, true);
136
- }
137
- // If we turn off loading of external DTDs complete, we don't
138
- // getthe publicID. Instead of turning off completely, we use
139
- // an entity resolver that returns empty documents.
140
- if (options.dtdLoad) {
141
- setFeature(FEATURE_LOAD_EXTERNAL_DTD, true);
142
- setFeature(FEATURE_LOAD_DTD_GRAMMAR, true);
143
- }
144
- parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options));
140
+ if (options.dtdValid) {
141
+ setFeature(FEATURE_VALIDATION, true);
145
142
  }
146
143
 
147
- /**
148
- * Convenience method that catches and ignores SAXException
149
- * (unrecognized and unsupported exceptions).
150
- */
151
- protected void setFeature(String feature, boolean value) {
152
- try {
153
- parser.setFeature(feature, value);
154
- } catch (SAXException e) {
155
- // ignore
156
- }
144
+ if (!options.noEnt) {
145
+ setFeature(FEATURE_NOT_EXPAND_ENTITY, true);
146
+ }
147
+ // If we turn off loading of external DTDs complete, we don't
148
+ // getthe publicID. Instead of turning off completely, we use
149
+ // an entity resolver that returns empty documents.
150
+ if (options.dtdLoad) {
151
+ setFeature(FEATURE_LOAD_EXTERNAL_DTD, true);
152
+ setFeature(FEATURE_LOAD_DTD_GRAMMAR, true);
157
153
  }
154
+ parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options));
155
+ }
158
156
 
159
- /**
160
- * Convenience method that catches and ignores SAXException
161
- * (unrecognized and unsupported exceptions).
162
- */
163
- protected void setProperty(String property, Object value) {
164
- try {
165
- parser.setProperty(property, value);
166
- } catch (SAXException e) {
167
- // ignore
168
- }
157
+ /**
158
+ * Convenience method that catches and ignores SAXException
159
+ * (unrecognized and unsupported exceptions).
160
+ */
161
+ protected void
162
+ setFeature(String feature, boolean value)
163
+ {
164
+ try {
165
+ parser.setFeature(feature, value);
166
+ } catch (SAXException e) {
167
+ // ignore
169
168
  }
169
+ }
170
170
 
171
- public void addErrorsIfNecessary(ThreadContext context, XmlDocument doc) {
172
- doc.setInstanceVariable("@errors", mapErrors(context, errorHandler));
171
+ /**
172
+ * Convenience method that catches and ignores SAXException
173
+ * (unrecognized and unsupported exceptions).
174
+ */
175
+ protected void
176
+ setProperty(String property, Object value)
177
+ {
178
+ try {
179
+ parser.setProperty(property, value);
180
+ } catch (SAXException e) {
181
+ // ignore
173
182
  }
183
+ }
174
184
 
185
+ public void
186
+ addErrorsIfNecessary(ThreadContext context, XmlDocument doc)
187
+ {
188
+ doc.setInstanceVariable("@errors", mapErrors(context, errorHandler));
189
+ }
175
190
 
176
- public static RubyArray mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler) {
177
- final Ruby runtime = context.runtime;
178
- final List<Exception> errors = errorHandler.getErrors();
179
- final IRubyObject[] errorsAry = new IRubyObject[errors.size()];
180
- for (int i = 0; i < errors.size(); i++) {
181
- XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(runtime);
182
- xmlSyntaxError.setException(errors.get(i));
183
- errorsAry[i] = xmlSyntaxError;
184
- }
185
- return runtime.newArrayNoCopy(errorsAry);
186
- }
187
191
 
188
- public XmlDocument getDocumentWithErrorsOrRaiseException(ThreadContext context, RubyClass klazz, Exception ex) {
189
- if (options.recover) {
190
- XmlDocument xmlDocument = getInterruptedOrNewXmlDocument(context, klazz);
191
- this.addErrorsIfNecessary(context, xmlDocument);
192
- XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
193
- xmlSyntaxError.setException(ex);
194
- ((RubyArray) xmlDocument.getInstanceVariable("@errors")).append(xmlSyntaxError);
195
- return xmlDocument;
196
- } else {
197
- XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
198
- xmlSyntaxError.setException(ex);
199
- throw new RaiseException(xmlSyntaxError);
200
- }
192
+ public static RubyArray
193
+ mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler)
194
+ {
195
+ final Ruby runtime = context.runtime;
196
+ final List<Exception> errors = errorHandler.getErrors();
197
+ final IRubyObject[] errorsAry = new IRubyObject[errors.size()];
198
+ for (int i = 0; i < errors.size(); i++) {
199
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(runtime);
200
+ xmlSyntaxError.setException(errors.get(i));
201
+ errorsAry[i] = xmlSyntaxError;
201
202
  }
202
-
203
- private XmlDocument getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klass) {
204
- Document document = parser.getDocument();
205
- XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, document);
206
- xmlDocument.setEncoding(ruby_encoding);
207
- return xmlDocument;
203
+ return runtime.newArrayNoCopy(errorsAry);
204
+ }
205
+
206
+ public XmlDocument
207
+ getDocumentWithErrorsOrRaiseException(ThreadContext context, RubyClass klazz, Exception ex)
208
+ {
209
+ if (options.recover) {
210
+ XmlDocument xmlDocument = getInterruptedOrNewXmlDocument(context, klazz);
211
+ this.addErrorsIfNecessary(context, xmlDocument);
212
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
213
+ xmlSyntaxError.setException(ex);
214
+ ((RubyArray) xmlDocument.getInstanceVariable("@errors")).append(xmlSyntaxError);
215
+ return xmlDocument;
216
+ } else {
217
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
218
+ xmlSyntaxError.setException(ex);
219
+ throw xmlSyntaxError.toThrowable();
208
220
  }
221
+ }
209
222
 
210
- /**
211
- * This method is broken out so that HtmlDomParserContext can
212
- * override it.
213
- */
214
- protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document doc) {
215
- XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, doc);
216
- xmlDocument.setEncoding(ruby_encoding);
223
+ private XmlDocument
224
+ getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klass)
225
+ {
226
+ Document document = parser.getDocument();
227
+ XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, document);
228
+ xmlDocument.setEncoding(ruby_encoding);
229
+ return xmlDocument;
230
+ }
217
231
 
218
- if (options.dtdLoad) {
219
- IRubyObject dtd = XmlDtd.newFromExternalSubset(context.runtime, doc);
220
- if (!dtd.isNil()) {
221
- doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, (XmlDtd) dtd, null);
222
- }
223
- }
224
- return xmlDocument;
232
+ /**
233
+ * This method is broken out so that HtmlDomParserContext can
234
+ * override it.
235
+ */
236
+ protected XmlDocument
237
+ wrapDocument(ThreadContext context, RubyClass klass, Document doc)
238
+ {
239
+ XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, doc);
240
+ Helpers.invoke(context, xmlDocument, "initialize");
241
+ xmlDocument.setEncoding(ruby_encoding);
242
+
243
+ if (options.dtdLoad) {
244
+ IRubyObject dtd = XmlDtd.newFromExternalSubset(context.runtime, doc);
245
+ if (!dtd.isNil()) {
246
+ doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, (XmlDtd) dtd, null);
247
+ }
225
248
  }
249
+ return xmlDocument;
250
+ }
226
251
 
227
- /**
228
- * Must call setInputSource() before this method.
229
- */
230
- public XmlDocument parse(ThreadContext context, RubyClass klass, IRubyObject url) {
231
- XmlDocument xmlDoc;
232
- try {
233
- Document doc = do_parse();
234
- xmlDoc = wrapDocument(context, klass, doc);
235
- xmlDoc.setUrl(url);
236
- addErrorsIfNecessary(context, xmlDoc);
237
- return xmlDoc;
238
- } catch (SAXException e) {
239
- return getDocumentWithErrorsOrRaiseException(context, klass, e);
240
- } catch (IOException e) {
241
- return getDocumentWithErrorsOrRaiseException(context, klass, e);
242
- }
252
+ /**
253
+ * Must call setInputSource() before this method.
254
+ */
255
+ public XmlDocument
256
+ parse(ThreadContext context, RubyClass klass, IRubyObject url)
257
+ {
258
+ XmlDocument xmlDoc;
259
+ try {
260
+ Document doc = do_parse();
261
+ xmlDoc = wrapDocument(context, klass, doc);
262
+ xmlDoc.setUrl(url);
263
+ addErrorsIfNecessary(context, xmlDoc);
264
+ return xmlDoc;
265
+ } catch (SAXException e) {
266
+ return getDocumentWithErrorsOrRaiseException(context, klass, e);
267
+ } catch (IOException e) {
268
+ return getDocumentWithErrorsOrRaiseException(context, klass, e);
243
269
  }
270
+ }
244
271
 
245
- protected Document do_parse() throws SAXException, IOException {
246
- try {
247
- parser.parse(getInputSource());
248
- } catch (NullPointerException ex) {
249
- // FIXME: this is really a hack to fix #838. Xerces will throw a NullPointerException
250
- // if we tried to parse '<? ?>'. We should submit a patch to Xerces.
251
- }
252
- if (options.noBlanks) {
253
- List<Node> emptyNodes = new ArrayList<Node>();
254
- findEmptyTexts(parser.getDocument(), emptyNodes);
255
- if (emptyNodes.size() > 0) {
256
- for (Node node : emptyNodes) {
257
- node.getParentNode().removeChild(node);
258
- }
259
- }
260
- }
261
- return parser.getDocument();
272
+ protected Document
273
+ do_parse() throws SAXException, IOException
274
+ {
275
+ try {
276
+ parser.parse(getInputSource());
277
+ } catch (NullPointerException ex) {
278
+ // FIXME: this is really a hack to fix #838. Xerces will throw a NullPointerException
279
+ // if we tried to parse '<? ?>'. We should submit a patch to Xerces.
262
280
  }
263
-
264
- private static void findEmptyTexts(Node node, List<Node> emptyNodes) {
265
- if (node.getNodeType() == Node.TEXT_NODE && isBlank(node.getTextContent())) {
266
- emptyNodes.add(node);
267
- } else {
268
- NodeList children = node.getChildNodes();
269
- for (int i=0; i < children.getLength(); i++) {
270
- findEmptyTexts(children.item(i), emptyNodes);
271
- }
281
+ if (options.noBlanks) {
282
+ List<Node> emptyNodes = new ArrayList<Node>();
283
+ findEmptyTexts(parser.getDocument(), emptyNodes);
284
+ if (emptyNodes.size() > 0) {
285
+ for (Node node : emptyNodes) {
286
+ node.getParentNode().removeChild(node);
272
287
  }
288
+ }
289
+ }
290
+ return parser.getDocument();
291
+ }
292
+
293
+ private static void
294
+ findEmptyTexts(Node node, List<Node> emptyNodes)
295
+ {
296
+ if (node.getNodeType() == Node.TEXT_NODE && isBlank(node.getTextContent())) {
297
+ emptyNodes.add(node);
298
+ } else {
299
+ NodeList children = node.getChildNodes();
300
+ for (int i = 0; i < children.getLength(); i++) {
301
+ findEmptyTexts(children.item(i), emptyNodes);
302
+ }
273
303
  }
304
+ }
274
305
  }