Nokogiri_precompiled_aarch64_dedshit 1.14.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +44 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/java/nokogiri/Html4Document.java +157 -0
  9. data/ext/java/nokogiri/Html4ElementDescription.java +133 -0
  10. data/ext/java/nokogiri/Html4EntityLookup.java +63 -0
  11. data/ext/java/nokogiri/Html4SaxParserContext.java +289 -0
  12. data/ext/java/nokogiri/Html4SaxPushParser.java +213 -0
  13. data/ext/java/nokogiri/NokogiriService.java +613 -0
  14. data/ext/java/nokogiri/XmlAttr.java +154 -0
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +119 -0
  16. data/ext/java/nokogiri/XmlCdata.java +60 -0
  17. data/ext/java/nokogiri/XmlComment.java +77 -0
  18. data/ext/java/nokogiri/XmlDocument.java +705 -0
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +163 -0
  20. data/ext/java/nokogiri/XmlDtd.java +516 -0
  21. data/ext/java/nokogiri/XmlElement.java +44 -0
  22. data/ext/java/nokogiri/XmlElementContent.java +412 -0
  23. data/ext/java/nokogiri/XmlElementDecl.java +148 -0
  24. data/ext/java/nokogiri/XmlEntityDecl.java +151 -0
  25. data/ext/java/nokogiri/XmlEntityReference.java +79 -0
  26. data/ext/java/nokogiri/XmlNamespace.java +193 -0
  27. data/ext/java/nokogiri/XmlNode.java +1938 -0
  28. data/ext/java/nokogiri/XmlNodeSet.java +463 -0
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +79 -0
  30. data/ext/java/nokogiri/XmlReader.java +615 -0
  31. data/ext/java/nokogiri/XmlRelaxng.java +133 -0
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +329 -0
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +288 -0
  34. data/ext/java/nokogiri/XmlSchema.java +423 -0
  35. data/ext/java/nokogiri/XmlSyntaxError.java +137 -0
  36. data/ext/java/nokogiri/XmlText.java +90 -0
  37. data/ext/java/nokogiri/XmlXpathContext.java +305 -0
  38. data/ext/java/nokogiri/XsltStylesheet.java +368 -0
  39. data/ext/java/nokogiri/internals/ClosedStreamException.java +13 -0
  40. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
  41. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +27 -0
  42. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +178 -0
  43. data/ext/java/nokogiri/internals/NokogiriDomParser.java +99 -0
  44. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +140 -0
  45. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +65 -0
  46. data/ext/java/nokogiri/internals/NokogiriHandler.java +339 -0
  47. data/ext/java/nokogiri/internals/NokogiriHelpers.java +817 -0
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +228 -0
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +110 -0
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +86 -0
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +107 -0
  52. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +62 -0
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +165 -0
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +50 -0
  55. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +37 -0
  56. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +70 -0
  57. data/ext/java/nokogiri/internals/ParserContext.java +262 -0
  58. data/ext/java/nokogiri/internals/ReaderNode.java +564 -0
  59. data/ext/java/nokogiri/internals/SaveContextVisitor.java +865 -0
  60. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +50 -0
  61. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +174 -0
  62. data/ext/java/nokogiri/internals/XmlDeclHandler.java +11 -0
  63. data/ext/java/nokogiri/internals/XmlDomParserContext.java +265 -0
  64. data/ext/java/nokogiri/internals/XmlSaxParser.java +40 -0
  65. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +122 -0
  66. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +178 -0
  67. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +43 -0
  68. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +106 -0
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +278 -0
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +664 -0
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +45 -0
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +45 -0
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +388 -0
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +308 -0
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +47 -0
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +51 -0
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +51 -0
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +50 -0
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +660 -0
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +194 -0
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +77 -0
  82. data/ext/java/nokogiri/internals/c14n/Constants.java +45 -0
  83. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +325 -0
  84. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +106 -0
  85. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +86 -0
  86. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +181 -0
  87. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +87 -0
  88. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +452 -0
  89. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +52 -0
  90. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +190 -0
  91. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +540 -0
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1712 -0
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +737 -0
  94. data/ext/nokogiri/depend +38 -0
  95. data/ext/nokogiri/extconf.rb +1086 -0
  96. data/ext/nokogiri/gumbo.c +594 -0
  97. data/ext/nokogiri/html4_document.c +167 -0
  98. data/ext/nokogiri/html4_element_description.c +294 -0
  99. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  100. data/ext/nokogiri/html4_sax_parser_context.c +116 -0
  101. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  102. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  103. data/ext/nokogiri/nokogiri.c +265 -0
  104. data/ext/nokogiri/nokogiri.h +235 -0
  105. data/ext/nokogiri/test_global_handlers.c +42 -0
  106. data/ext/nokogiri/xml_attr.c +103 -0
  107. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  108. data/ext/nokogiri/xml_cdata.c +57 -0
  109. data/ext/nokogiri/xml_comment.c +62 -0
  110. data/ext/nokogiri/xml_document.c +689 -0
  111. data/ext/nokogiri/xml_document_fragment.c +44 -0
  112. data/ext/nokogiri/xml_dtd.c +210 -0
  113. data/ext/nokogiri/xml_element_content.c +128 -0
  114. data/ext/nokogiri/xml_element_decl.c +69 -0
  115. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  116. data/ext/nokogiri/xml_entity_decl.c +112 -0
  117. data/ext/nokogiri/xml_entity_reference.c +50 -0
  118. data/ext/nokogiri/xml_namespace.c +186 -0
  119. data/ext/nokogiri/xml_node.c +2426 -0
  120. data/ext/nokogiri/xml_node_set.c +496 -0
  121. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  122. data/ext/nokogiri/xml_reader.c +794 -0
  123. data/ext/nokogiri/xml_relax_ng.c +164 -0
  124. data/ext/nokogiri/xml_sax_parser.c +316 -0
  125. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  126. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  127. data/ext/nokogiri/xml_schema.c +260 -0
  128. data/ext/nokogiri/xml_syntax_error.c +85 -0
  129. data/ext/nokogiri/xml_text.c +48 -0
  130. data/ext/nokogiri/xml_xpath_context.c +415 -0
  131. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  132. data/gumbo-parser/CHANGES.md +63 -0
  133. data/gumbo-parser/Makefile +111 -0
  134. data/gumbo-parser/THANKS +27 -0
  135. data/gumbo-parser/src/Makefile +34 -0
  136. data/gumbo-parser/src/README.md +41 -0
  137. data/gumbo-parser/src/ascii.c +75 -0
  138. data/gumbo-parser/src/ascii.h +115 -0
  139. data/gumbo-parser/src/attribute.c +42 -0
  140. data/gumbo-parser/src/attribute.h +17 -0
  141. data/gumbo-parser/src/char_ref.c +22225 -0
  142. data/gumbo-parser/src/char_ref.h +29 -0
  143. data/gumbo-parser/src/char_ref.rl +2154 -0
  144. data/gumbo-parser/src/error.c +626 -0
  145. data/gumbo-parser/src/error.h +148 -0
  146. data/gumbo-parser/src/foreign_attrs.c +104 -0
  147. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  148. data/gumbo-parser/src/insertion_mode.h +33 -0
  149. data/gumbo-parser/src/macros.h +91 -0
  150. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  151. data/gumbo-parser/src/parser.c +4878 -0
  152. data/gumbo-parser/src/parser.h +41 -0
  153. data/gumbo-parser/src/replacement.h +33 -0
  154. data/gumbo-parser/src/string_buffer.c +103 -0
  155. data/gumbo-parser/src/string_buffer.h +68 -0
  156. data/gumbo-parser/src/string_piece.c +48 -0
  157. data/gumbo-parser/src/svg_attrs.c +174 -0
  158. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  159. data/gumbo-parser/src/svg_tags.c +137 -0
  160. data/gumbo-parser/src/svg_tags.gperf +55 -0
  161. data/gumbo-parser/src/tag.c +223 -0
  162. data/gumbo-parser/src/tag_lookup.c +382 -0
  163. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  164. data/gumbo-parser/src/tag_lookup.h +13 -0
  165. data/gumbo-parser/src/token_buffer.c +79 -0
  166. data/gumbo-parser/src/token_buffer.h +71 -0
  167. data/gumbo-parser/src/token_type.h +17 -0
  168. data/gumbo-parser/src/tokenizer.c +3463 -0
  169. data/gumbo-parser/src/tokenizer.h +112 -0
  170. data/gumbo-parser/src/tokenizer_states.h +339 -0
  171. data/gumbo-parser/src/utf8.c +245 -0
  172. data/gumbo-parser/src/utf8.h +164 -0
  173. data/gumbo-parser/src/util.c +66 -0
  174. data/gumbo-parser/src/util.h +34 -0
  175. data/gumbo-parser/src/vector.c +111 -0
  176. data/gumbo-parser/src/vector.h +45 -0
  177. data/lib/nokogiri/class_resolver.rb +67 -0
  178. data/lib/nokogiri/css/node.rb +54 -0
  179. data/lib/nokogiri/css/parser.rb +770 -0
  180. data/lib/nokogiri/css/parser.y +277 -0
  181. data/lib/nokogiri/css/parser_extras.rb +96 -0
  182. data/lib/nokogiri/css/syntax_error.rb +9 -0
  183. data/lib/nokogiri/css/tokenizer.rb +155 -0
  184. data/lib/nokogiri/css/tokenizer.rex +56 -0
  185. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  186. data/lib/nokogiri/css.rb +66 -0
  187. data/lib/nokogiri/decorators/slop.rb +44 -0
  188. data/lib/nokogiri/encoding_handler.rb +57 -0
  189. data/lib/nokogiri/extension.rb +32 -0
  190. data/lib/nokogiri/gumbo.rb +15 -0
  191. data/lib/nokogiri/html.rb +48 -0
  192. data/lib/nokogiri/html4/builder.rb +37 -0
  193. data/lib/nokogiri/html4/document.rb +214 -0
  194. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  195. data/lib/nokogiri/html4/element_description.rb +25 -0
  196. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  197. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  198. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  199. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  200. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  201. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  202. data/lib/nokogiri/html4.rb +47 -0
  203. data/lib/nokogiri/html5/document.rb +168 -0
  204. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  205. data/lib/nokogiri/html5/node.rb +98 -0
  206. data/lib/nokogiri/html5.rb +389 -0
  207. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  208. data/lib/nokogiri/jruby/isorelax/isorelax/20030108/isorelax-20030108.jar +0 -0
  209. data/lib/nokogiri/jruby/net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar +0 -0
  210. data/lib/nokogiri/jruby/net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar +0 -0
  211. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  212. data/lib/nokogiri/jruby/nu/validator/jing/20200702VNU/jing-20200702VNU.jar +0 -0
  213. data/lib/nokogiri/jruby/org/nokogiri/nekodtd/0.1.11.noko2/nekodtd-0.1.11.noko2.jar +0 -0
  214. data/lib/nokogiri/jruby/xalan/serializer/2.7.3/serializer-2.7.3.jar +0 -0
  215. data/lib/nokogiri/jruby/xalan/xalan/2.7.3/xalan-2.7.3.jar +0 -0
  216. data/lib/nokogiri/jruby/xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar +0 -0
  217. data/lib/nokogiri/jruby/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar +0 -0
  218. data/lib/nokogiri/syntax_error.rb +6 -0
  219. data/lib/nokogiri/version/constant.rb +6 -0
  220. data/lib/nokogiri/version/info.rb +223 -0
  221. data/lib/nokogiri/version.rb +4 -0
  222. data/lib/nokogiri/xml/attr.rb +66 -0
  223. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  224. data/lib/nokogiri/xml/builder.rb +487 -0
  225. data/lib/nokogiri/xml/cdata.rb +13 -0
  226. data/lib/nokogiri/xml/character_data.rb +9 -0
  227. data/lib/nokogiri/xml/document.rb +471 -0
  228. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  229. data/lib/nokogiri/xml/dtd.rb +34 -0
  230. data/lib/nokogiri/xml/element_content.rb +38 -0
  231. data/lib/nokogiri/xml/element_decl.rb +15 -0
  232. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  233. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  234. data/lib/nokogiri/xml/namespace.rb +58 -0
  235. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  236. data/lib/nokogiri/xml/node.rb +1563 -0
  237. data/lib/nokogiri/xml/node_set.rb +447 -0
  238. data/lib/nokogiri/xml/notation.rb +19 -0
  239. data/lib/nokogiri/xml/parse_options.rb +213 -0
  240. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  241. data/lib/nokogiri/xml/pp/node.rb +57 -0
  242. data/lib/nokogiri/xml/pp.rb +4 -0
  243. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  244. data/lib/nokogiri/xml/reader.rb +105 -0
  245. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  246. data/lib/nokogiri/xml/sax/document.rb +167 -0
  247. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  248. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  249. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  250. data/lib/nokogiri/xml/sax.rb +6 -0
  251. data/lib/nokogiri/xml/schema.rb +73 -0
  252. data/lib/nokogiri/xml/searchable.rb +270 -0
  253. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  254. data/lib/nokogiri/xml/text.rb +11 -0
  255. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  256. data/lib/nokogiri/xml/xpath.rb +21 -0
  257. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  258. data/lib/nokogiri/xml.rb +76 -0
  259. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  260. data/lib/nokogiri/xslt.rb +65 -0
  261. data/lib/nokogiri.rb +120 -0
  262. data/lib/xsd/xmlparser/nokogiri.rb +106 -0
  263. metadata +391 -0
@@ -0,0 +1,1712 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+ package nokogiri.internals.dom2dtm;
19
+
20
+ import javax.xml.transform.SourceLocator;
21
+ import javax.xml.transform.dom.DOMSource;
22
+
23
+ import org.apache.xml.dtm.DTM;
24
+ import org.apache.xml.dtm.DTMManager;
25
+ import org.apache.xml.dtm.DTMWSFilter;
26
+ import org.apache.xml.dtm.ref.DTMDefaultBaseIterators;
27
+ import org.apache.xml.dtm.ref.DTMManagerDefault;
28
+ import org.apache.xml.dtm.ref.ExpandedNameTable;
29
+ import org.apache.xml.dtm.ref.IncrementalSAXSource;
30
+ import org.apache.xml.res.XMLErrorResources;
31
+ import org.apache.xml.res.XMLMessages;
32
+ import org.apache.xml.utils.FastStringBuffer;
33
+ import org.apache.xml.utils.QName;
34
+ import org.apache.xml.utils.StringBufferPool;
35
+ import org.apache.xml.utils.TreeWalker;
36
+ import org.apache.xml.utils.XMLCharacterRecognizer;
37
+ import org.apache.xml.utils.XMLString;
38
+ import org.apache.xml.utils.XMLStringFactory;
39
+ import org.w3c.dom.Attr;
40
+ import org.w3c.dom.Document;
41
+ import org.w3c.dom.DocumentType;
42
+ import org.w3c.dom.Element;
43
+ import org.w3c.dom.Entity;
44
+ import org.w3c.dom.NamedNodeMap;
45
+ import org.w3c.dom.Node;
46
+ import org.xml.sax.ContentHandler;
47
+
48
+ /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
49
+ * DTM API.
50
+ *
51
+ * Note that it doesn't necessarily represent a full Document
52
+ * tree. You can wrap a DOM2DTM around a specific node and its subtree
53
+ * and the right things should happen. (I don't _think_ we currently
54
+ * support DocumentFrgment nodes as roots, though that might be worth
55
+ * considering.)
56
+ *
57
+ * Note too that we do not currently attempt to track document
58
+ * mutation. If you alter the DOM after wrapping DOM2DTM around it,
59
+ * all bets are off.
60
+ * */
61
+ public class DOM2DTM extends DTMDefaultBaseIterators
62
+ {
63
+ // static final boolean JJK_DEBUG=false;
64
+ // static final boolean JJK_NEWCODE=true;
65
+
66
+ /** Manefest constant
67
+ */
68
+ static final String NAMESPACE_DECL_NS = "http://www.w3.org/XML/1998/namespace";
69
+
70
+ /** The current position in the DOM tree. Last node examined for
71
+ * possible copying to DTM. */
72
+ transient private Node m_pos;
73
+ /** The current position in the DTM tree. Who children get appended to. */
74
+ private int m_last_parent = 0;
75
+ /** The current position in the DTM tree. Who children reference as their
76
+ * previous sib. */
77
+ private int m_last_kid = NULL;
78
+
79
+ /** The top of the subtree.
80
+ * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
81
+ * */
82
+ transient private Node m_root;
83
+
84
+ /** True iff the first element has been processed. This is used to control
85
+ synthesis of the implied xml: namespace declaration node. */
86
+ boolean m_processedFirstElement = false;
87
+
88
+ /** true if ALL the nodes in the m_root subtree have been processed;
89
+ * false if our incremental build has not yet finished scanning the
90
+ * DOM tree. */
91
+ transient private boolean m_nodesAreProcessed;
92
+
93
+ /** The node objects. The instance part of the handle indexes
94
+ * directly into this vector. Each DTM node may actually be
95
+ * composed of several DOM nodes (for example, if logically-adjacent
96
+ * Text/CDATASection nodes in the DOM have been coalesced into a
97
+ * single DTM Text node); this table points only to the first in
98
+ * that sequence. */
99
+ protected final java.util.List<Node> m_nodes = new java.util.ArrayList<Node>(12);
100
+
101
+ /**
102
+ * Construct a DOM2DTM object from a DOM node.
103
+ *
104
+ * @param mgr The DTMManager who owns this DTM.
105
+ * @param domSource the DOM source that this DTM will wrap.
106
+ * @param dtmIdentity The DTM identity ID for this DTM.
107
+ * @param whiteSpaceFilter The white space filter for this DTM, which may
108
+ * be null.
109
+ * @param xstringfactory XMLString factory for creating character content.
110
+ * @param doIndexing true if the caller considers it worth it to use
111
+ * indexing schemes.
112
+ */
113
+ public
114
+ DOM2DTM(DTMManager mgr, DOMSource domSource,
115
+ int dtmIdentity, DTMWSFilter whiteSpaceFilter,
116
+ XMLStringFactory xstringfactory,
117
+ boolean doIndexing)
118
+ {
119
+ super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
120
+ xstringfactory, doIndexing);
121
+
122
+ // Initialize DOM navigation
123
+ m_pos = m_root = domSource.getNode();
124
+ // Initialize DTM navigation
125
+ m_last_parent = m_last_kid = NULL;
126
+ m_last_kid = addNode(m_root, m_last_parent, m_last_kid, NULL);
127
+
128
+ // Apparently the domSource root may not actually be the
129
+ // Document node. If it's an Element node, we need to immediately
130
+ // add its attributes. Adapted from nextNode().
131
+ // %REVIEW% Move this logic into addNode and recurse? Cleaner!
132
+ //
133
+ // (If it's an EntityReference node, we're probably in
134
+ // seriously bad trouble. For now
135
+ // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
136
+ //
137
+ // %ISSUE% What about inherited namespaces in this case?
138
+ // Do we need to special-case initialize them into the DTM model?
139
+ if (ELEMENT_NODE == m_root.getNodeType()) {
140
+ NamedNodeMap attrs = m_root.getAttributes();
141
+ int attrsize = (attrs == null) ? 0 : attrs.getLength();
142
+ if (attrsize > 0) {
143
+ int attrIndex = NULL; // start with no previous sib
144
+ for (int i = 0; i < attrsize; ++i) {
145
+ // No need to force nodetype in this case;
146
+ // addNode() will take care of switching it from
147
+ // Attr to Namespace if necessary.
148
+ attrIndex = addNode(attrs.item(i), 0, attrIndex, NULL);
149
+ m_firstch.setElementAt(DTM.NULL, attrIndex);
150
+ }
151
+ // Terminate list of attrs, and make sure they aren't
152
+ // considered children of the element
153
+ m_nextsib.setElementAt(DTM.NULL, attrIndex);
154
+
155
+ // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
156
+ } // if attrs exist
157
+ } //if(ELEMENT_NODE)
158
+
159
+ // Initialize DTM-completed status
160
+ m_nodesAreProcessed = false;
161
+ }
162
+
163
+ /**
164
+ * Construct the node map from the node.
165
+ *
166
+ * @param node The node that is to be added to the DTM.
167
+ * @param parentIndex The current parent index.
168
+ * @param previousSibling The previous sibling index.
169
+ * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
170
+ * Used to force nodes to Text rather than CDATASection when their
171
+ * coalesced value includes ordinary Text nodes (current DTM behavior).
172
+ *
173
+ * @return The index identity of the node that was added.
174
+ */
175
+ protected int
176
+ addNode(Node node, int parentIndex,
177
+ int previousSibling, int forceNodeType)
178
+ {
179
+ int nodeIndex = m_nodes.size();
180
+
181
+ // Have we overflowed a DTM Identity's addressing range?
182
+ if (m_dtmIdent.size() == (nodeIndex >>> DTMManager.IDENT_DTM_NODE_BITS)) {
183
+ try {
184
+ if (m_mgr == null) {
185
+ throw new ClassCastException();
186
+ }
187
+
188
+ // Handle as Extended Addressing
189
+ DTMManagerDefault mgrD = (DTMManagerDefault)m_mgr;
190
+ int id = mgrD.getFirstFreeDTMID();
191
+ mgrD.addDTM(this, id, nodeIndex);
192
+ m_dtmIdent.addElement(id << DTMManager.IDENT_DTM_NODE_BITS);
193
+ } catch (ClassCastException e) {
194
+ // %REVIEW% Wrong error message, but I've been told we're trying
195
+ // not to add messages right not for I18N reasons.
196
+ // %REVIEW% Should this be a Fatal Error?
197
+ error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
198
+ }
199
+ }
200
+
201
+ m_size++;
202
+ // ensureSize(nodeIndex);
203
+
204
+ int type;
205
+ if (NULL == forceNodeType) {
206
+ type = node.getNodeType();
207
+ } else {
208
+ type = forceNodeType;
209
+ }
210
+
211
+ // %REVIEW% The Namespace Spec currently says that Namespaces are
212
+ // processed in a non-namespace-aware manner, by matching the
213
+ // QName, even though there is in fact a namespace assigned to
214
+ // these nodes in the DOM. If and when that changes, we will have
215
+ // to consider whether we check the namespace-for-namespaces
216
+ // rather than the node name.
217
+ //
218
+ // %TBD% Note that the DOM does not necessarily explicitly declare
219
+ // all the namespaces it uses. DOM Level 3 will introduce a
220
+ // namespace-normalization operation which reconciles that, and we
221
+ // can request that users invoke it or otherwise ensure that the
222
+ // tree is namespace-well-formed before passing the DOM to Xalan.
223
+ // But if they don't, what should we do about it? We probably
224
+ // don't want to alter the source DOM (and may not be able to do
225
+ // so if it's read-only). The best available answer might be to
226
+ // synthesize additional DTM Namespace Nodes that don't correspond
227
+ // to DOM Attr Nodes.
228
+ if (Node.ATTRIBUTE_NODE == type) {
229
+ String name = node.getNodeName();
230
+
231
+ if (name.startsWith("xmlns:") || name.equals("xmlns")) {
232
+ type = DTM.NAMESPACE_NODE;
233
+ }
234
+ }
235
+
236
+ m_nodes.add(node);
237
+
238
+ m_firstch.setElementAt(NOTPROCESSED, nodeIndex);
239
+ m_nextsib.setElementAt(NOTPROCESSED, nodeIndex);
240
+ m_prevsib.setElementAt(previousSibling, nodeIndex);
241
+ m_parent.setElementAt(parentIndex, nodeIndex);
242
+
243
+ if (DTM.NULL != parentIndex &&
244
+ type != DTM.ATTRIBUTE_NODE &&
245
+ type != DTM.NAMESPACE_NODE) {
246
+ // If the DTM parent had no children, this becomes its first child.
247
+ if (NOTPROCESSED == m_firstch.elementAt(parentIndex)) {
248
+ m_firstch.setElementAt(nodeIndex, parentIndex);
249
+ }
250
+ }
251
+
252
+ String nsURI = node.getNamespaceURI();
253
+
254
+ // Deal with the difference between Namespace spec and XSLT
255
+ // definitions of local name. (The former says PIs don't have
256
+ // localnames; the latter says they do.)
257
+ String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ?
258
+ node.getNodeName() :
259
+ node.getLocalName();
260
+
261
+ // Hack to make DOM1 sort of work...
262
+ if (((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
263
+ && null == localName) {
264
+ localName = node.getNodeName(); // -sb
265
+ }
266
+
267
+ ExpandedNameTable exnt = m_expandedNameTable;
268
+
269
+ // %TBD% Nodes created with the old non-namespace-aware DOM
270
+ // calls createElement() and createAttribute() will never have a
271
+ // localname. That will cause their expandedNameID to be just the
272
+ // nodeType... which will keep them from being matched
273
+ // successfully by name. Since the DOM makes no promise that
274
+ // those will participate in namespace processing, this is
275
+ // officially accepted as Not Our Fault. But it might be nice to
276
+ // issue a diagnostic message!
277
+ if (node.getLocalName() == null &&
278
+ (type == Node.ELEMENT_NODE || type == Node.ATTRIBUTE_NODE)) {
279
+ // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
280
+ }
281
+
282
+ int expandedNameID = (null != localName)
283
+ ? exnt.getExpandedTypeID(nsURI, localName, type) :
284
+ exnt.getExpandedTypeID(type);
285
+
286
+ m_exptype.setElementAt(expandedNameID, nodeIndex);
287
+
288
+ indexNode(expandedNameID, nodeIndex);
289
+
290
+ if (DTM.NULL != previousSibling) {
291
+ m_nextsib.setElementAt(nodeIndex, previousSibling);
292
+ }
293
+
294
+ // This should be done after m_exptype has been set, and probably should
295
+ // always be the last thing we do
296
+ if (type == DTM.NAMESPACE_NODE) {
297
+ declareNamespaceInContext(parentIndex, nodeIndex);
298
+ }
299
+
300
+ return nodeIndex;
301
+ }
302
+
303
+ /**
304
+ * Get the number of nodes that have been added.
305
+ */
306
+ public int
307
+ getNumberOfNodes()
308
+ {
309
+ return m_nodes.size();
310
+ }
311
+
312
+ /**
313
+ * This method iterates to the next node that will be added to the table.
314
+ * Each call to this method adds a new node to the table, unless the end
315
+ * is reached, in which case it returns null.
316
+ *
317
+ * @return The true if a next node is found or false if
318
+ * there are no more nodes.
319
+ */
320
+ protected boolean
321
+ nextNode()
322
+ {
323
+ // Non-recursive one-fetch-at-a-time depth-first traversal with
324
+ // attribute/namespace nodes and white-space stripping.
325
+ // Navigating the DOM is simple, navigating the DTM is simple;
326
+ // keeping track of both at once is a trifle baroque but at least
327
+ // we've avoided most of the special cases.
328
+ if (m_nodesAreProcessed) {
329
+ return false;
330
+ }
331
+
332
+ // %REVIEW% Is this local copy Really Useful from a performance
333
+ // point of view? Or is this a false microoptimization?
334
+ Node pos = m_pos;
335
+ Node next = null;
336
+ int nexttype = NULL;
337
+
338
+ // Navigate DOM tree
339
+ do {
340
+ // Look down to first child.
341
+ if (pos.hasChildNodes()) {
342
+ next = pos.getFirstChild();
343
+
344
+ // %REVIEW% There's probably a more elegant way to skip
345
+ // the doctype. (Just let it go and Suppress it?
346
+ if (next != null && DOCUMENT_TYPE_NODE == next.getNodeType()) {
347
+ next = next.getNextSibling();
348
+ }
349
+
350
+ // Push DTM context -- except for children of Entity References,
351
+ // which have no DTM equivalent and cause no DTM navigation.
352
+ if (ENTITY_REFERENCE_NODE != pos.getNodeType()) {
353
+ m_last_parent = m_last_kid;
354
+ m_last_kid = NULL;
355
+ // Whitespace-handler context stacking
356
+ if (null != m_wsfilter) {
357
+ short wsv =
358
+ m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent), this);
359
+ boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
360
+ ? getShouldStripWhitespace()
361
+ : (DTMWSFilter.STRIP == wsv);
362
+ pushShouldStripWhitespace(shouldStrip);
363
+ } // if(m_wsfilter)
364
+ }
365
+ }
366
+
367
+ // If that fails, look up and right (but not past root!)
368
+ else {
369
+ if (m_last_kid != NULL) {
370
+ // Last node posted at this level had no more children
371
+ // If it has _no_ children, we need to record that.
372
+ if (m_firstch.elementAt(m_last_kid) == NOTPROCESSED) {
373
+ m_firstch.setElementAt(NULL, m_last_kid);
374
+ }
375
+ }
376
+
377
+ while (m_last_parent != NULL) {
378
+ // %REVIEW% There's probably a more elegant way to
379
+ // skip the doctype. (Just let it go and Suppress it?
380
+ next = pos.getNextSibling();
381
+ if (next != null && DOCUMENT_TYPE_NODE == next.getNodeType()) {
382
+ next = next.getNextSibling();
383
+ }
384
+
385
+ if (next != null) {
386
+ break; // Found it!
387
+ }
388
+
389
+ // No next-sibling found. Pop the DOM.
390
+ pos = pos.getParentNode();
391
+ // if(pos==null)
392
+ // {
393
+ // // %TBD% Should never arise, but I want to be sure of that...
394
+ // if(JJK_DEBUG)
395
+ // {
396
+ // System.out.println("***** DOM2DTM Pop Control Flow problem");
397
+ // for(;;); // Freeze right here!
398
+ // }
399
+ // }
400
+
401
+ // The only parents in the DTM are Elements. However,
402
+ // the DOM could contain EntityReferences. If we
403
+ // encounter one, pop it _without_ popping DTM.
404
+ if (pos != null && ENTITY_REFERENCE_NODE == pos.getNodeType()) {
405
+ // // Nothing needs doing
406
+ // if(JJK_DEBUG) System.out.println("***** DOM2DTM popping EntRef");
407
+ } else {
408
+ popShouldStripWhitespace();
409
+ // Fix and pop DTM
410
+ if (m_last_kid == NULL) {
411
+ m_firstch.setElementAt(NULL, m_last_parent); // Popping from an element
412
+ } else {
413
+ m_nextsib.setElementAt(NULL, m_last_kid); // Popping from anything else
414
+ }
415
+ m_last_parent = m_parent.elementAt(m_last_kid = m_last_parent);
416
+ }
417
+ }
418
+ if (m_last_parent == NULL) { next = null; }
419
+ }
420
+
421
+ if (next != null) { nexttype = next.getNodeType(); }
422
+
423
+ // If it's an entity ref, advance past it.
424
+ //
425
+ // %REVIEW% Should we let this out the door and just suppress it?
426
+ // More work, but simpler code, more likely to be correct, and
427
+ // it doesn't happen very often. We'd get rid of the loop too.
428
+ if (ENTITY_REFERENCE_NODE == nexttype) { pos = next; }
429
+ } while (ENTITY_REFERENCE_NODE == nexttype);
430
+
431
+ // Did we run out of the tree?
432
+ if (next == null) {
433
+ m_nextsib.setElementAt(NULL, 0);
434
+ m_nodesAreProcessed = true;
435
+ m_pos = null;
436
+
437
+ // if(JJK_DEBUG)
438
+ // {
439
+ // System.out.println("***** DOM2DTM Crosscheck:");
440
+ // for(int i=0;i<m_nodes.size();++i)
441
+ // System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
442
+ // }
443
+
444
+ return false;
445
+ }
446
+
447
+ // Text needs some special handling:
448
+ //
449
+ // DTM may skip whitespace. This is handled by the suppressNode flag, which
450
+ // when true will keep the DTM node from being created.
451
+ //
452
+ // DTM only directly records the first DOM node of any logically-contiguous
453
+ // sequence. The lastTextNode value will be set to the last node in the
454
+ // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
455
+ // advance next over this whole block. Should be simpler than special-casing
456
+ // the above loop for "Was the logically-preceeding sibling a text node".
457
+ //
458
+ // Finally, a DTM node should be considered a CDATASection only if all the
459
+ // contiguous text it covers is CDATASections. The first Text should
460
+ // force DTM to Text.
461
+
462
+ boolean suppressNode = false;
463
+ Node lastTextNode = null;
464
+
465
+ nexttype = next.getNodeType();
466
+
467
+ // nexttype=pos.getNodeType();
468
+ if (TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) {
469
+ // If filtering, initially assume we're going to suppress the node
470
+ suppressNode = ((null != m_wsfilter) && getShouldStripWhitespace());
471
+
472
+ // Scan logically contiguous text (siblings, plus "flattening"
473
+ // of entity reference boundaries).
474
+ if (suppressNode) {
475
+ Node n = next;
476
+ while (n != null) {
477
+ lastTextNode = n;
478
+ // Any Text node means DTM considers it all Text
479
+ if (TEXT_NODE == n.getNodeType()) { nexttype = TEXT_NODE; }
480
+ // Any non-whitespace in this sequence blocks whitespace
481
+ // suppression
482
+ suppressNode &= XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
483
+
484
+ n = logicalNextDOMTextNode(n);
485
+ }
486
+ }
487
+ }
488
+
489
+ // Special handling for PIs: Some DOMs represent the XML
490
+ // Declaration as a PI. This is officially incorrect, per the DOM
491
+ // spec, but is considered a "wrong but tolerable" temporary
492
+ // workaround pending proper handling of these fields in DOM Level
493
+ // 3. We want to recognize and reject that case.
494
+ else if (PROCESSING_INSTRUCTION_NODE == nexttype) {
495
+ suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
496
+ }
497
+
498
+
499
+ if (!suppressNode) {
500
+ // Inserting next. NOTE that we force the node type; for
501
+ // coalesced Text, this records CDATASections adjacent to
502
+ // ordinary Text as Text.
503
+ int nextindex = addNode(next, m_last_parent, m_last_kid, nexttype);
504
+
505
+ m_last_kid = nextindex;
506
+
507
+ if (ELEMENT_NODE == nexttype) {
508
+ int attrIndex = NULL; // start with no previous sib
509
+ // Process attributes _now_, rather than waiting.
510
+ // Simpler control flow, makes NS cache available immediately.
511
+ NamedNodeMap attrs = next.getAttributes();
512
+ int attrsize = (attrs == null) ? 0 : attrs.getLength();
513
+ if (attrsize > 0) {
514
+ for (int i = 0; i < attrsize; ++i) {
515
+ // No need to force nodetype in this case;
516
+ // addNode() will take care of switching it from
517
+ // Attr to Namespace if necessary.
518
+ attrIndex = addNode(attrs.item(i), nextindex, attrIndex, NULL);
519
+ m_firstch.setElementAt(DTM.NULL, attrIndex);
520
+
521
+ // If the xml: prefix is explicitly declared
522
+ // we don't need to synthesize one.
523
+ //
524
+ // NOTE that XML Namespaces were not originally
525
+ // defined as being namespace-aware (grrr), and
526
+ // while the W3C is planning to fix this it's
527
+ // safer for now to test the QName and trust the
528
+ // parsers to prevent anyone from redefining the
529
+ // reserved xmlns: prefix
530
+ if (!m_processedFirstElement
531
+ && "xmlns:xml".equals(attrs.item(i).getNodeName())) {
532
+ m_processedFirstElement = true;
533
+ }
534
+ }
535
+ // Terminate list of attrs, and make sure they aren't
536
+ // considered children of the element
537
+ } // if attrs exist
538
+ if (!m_processedFirstElement) {
539
+ // The DOM might not have an explicit declaration for the
540
+ // implicit "xml:" prefix, but the XPath data model
541
+ // requires that this appear as a Namespace Node so we
542
+ // have to synthesize one. You can think of this as
543
+ // being a default attribute defined by the XML
544
+ // Namespaces spec rather than by the DTD.
545
+ attrIndex = addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
546
+ (Element)next, "xml", NAMESPACE_DECL_NS,
547
+ makeNodeHandle(((attrIndex == NULL) ? nextindex : attrIndex) + 1)
548
+ ),
549
+ nextindex, attrIndex, NULL);
550
+ m_firstch.setElementAt(DTM.NULL, attrIndex);
551
+ m_processedFirstElement = true;
552
+ }
553
+ if (attrIndex != NULL) { m_nextsib.setElementAt(DTM.NULL, attrIndex); }
554
+ } //if(ELEMENT_NODE)
555
+ } // (if !suppressNode)
556
+
557
+ // Text postprocessing: Act on values stored above
558
+ //if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
559
+ //{
560
+ // %TBD% If nexttype was forced to TEXT, patch the DTM node
561
+ if (lastTextNode != null) { next = lastTextNode; } // Advance the DOM cursor over contiguous text
562
+ //}
563
+
564
+ // Remember where we left off.
565
+ m_pos = next;
566
+ return true;
567
+ }
568
+
569
+
570
+ /**
571
+ * Return an DOM node for the given node.
572
+ *
573
+ * @param nodeHandle The node ID.
574
+ *
575
+ * @return A node representation of the DTM node.
576
+ */
577
+ public Node
578
+ getNode(int nodeHandle)
579
+ {
580
+
581
+ int identity = makeNodeIdentity(nodeHandle);
582
+
583
+ return m_nodes.get(identity);
584
+ }
585
+
586
+ /**
587
+ * Get a Node from an identity index.
588
+ *
589
+ * NEEDSDOC @param nodeIdentity
590
+ *
591
+ * NEEDSDOC ($objectName$) @return
592
+ */
593
+ protected Node
594
+ lookupNode(int nodeIdentity)
595
+ {
596
+ return m_nodes.get(nodeIdentity);
597
+ }
598
+
599
+ /**
600
+ * Get the next node identity value in the list, and call the iterator
601
+ * if it hasn't been added yet.
602
+ *
603
+ * @param identity The node identity (index).
604
+ * @return identity+1, or DTM.NULL.
605
+ */
606
+ protected int
607
+ getNextNodeIdentity(int identity)
608
+ {
609
+
610
+ identity += 1;
611
+
612
+ if (identity >= m_nodes.size()) {
613
+ if (!nextNode()) {
614
+ identity = DTM.NULL;
615
+ }
616
+ }
617
+
618
+ return identity;
619
+ }
620
+
621
+ /**
622
+ * Get the handle from a Node.
623
+ * <p>%OPT% This will be pretty slow.</p>
624
+ *
625
+ * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
626
+ * walk down DTM reconstructing path) might be considerably faster
627
+ * on later nodes in large documents. That might also imply improving
628
+ * this call to handle nodes which would be in this DTM but
629
+ * have not yet been built, which might or might not be a Good Thing.</p>
630
+ *
631
+ * %REVIEW% This relies on being able to test node-identity via
632
+ * object-identity. DTM2DOM proxying is a great example of a case where
633
+ * that doesn't work. DOM Level 3 will provide the isSameNode() method
634
+ * to fix that, but until then this is going to be flaky.
635
+ *
636
+ * @param node A node, which may be null.
637
+ *
638
+ * @return The node handle or <code>DTM.NULL</code>.
639
+ */
640
+ public int
641
+ getHandleFromNode(Node node)
642
+ {
643
+ if (null != node) {
644
+ int len = m_nodes.size();
645
+ boolean isMore;
646
+ int i = 0;
647
+ do {
648
+ for (; i < len; i++) {
649
+ if (m_nodes.get(i) == node) {
650
+ return makeNodeHandle(i);
651
+ }
652
+ }
653
+
654
+ isMore = nextNode();
655
+
656
+ len = m_nodes.size();
657
+
658
+ } while (isMore || i < len);
659
+ }
660
+
661
+ return DTM.NULL;
662
+ }
663
+
664
+ /** Get the handle from a Node. This is a more robust version of
665
+ * getHandleFromNode, intended to be usable by the public.
666
+ *
667
+ * <p>%OPT% This will be pretty slow.</p>
668
+ *
669
+ * %REVIEW% This relies on being able to test node-identity via
670
+ * object-identity. DTM2DOM proxying is a great example of a case where
671
+ * that doesn't work. DOM Level 3 will provide the isSameNode() method
672
+ * to fix that, but until then this is going to be flaky.
673
+ *
674
+ * @param node A node, which may be null.
675
+ *
676
+ * @return The node handle or <code>DTM.NULL</code>. */
677
+ public int
678
+ getHandleOfNode(Node node)
679
+ {
680
+ if (null != node) {
681
+ // Is Node actually within the same document? If not, don't search!
682
+ // This would be easier if m_root was always the Document node, but
683
+ // we decided to allow wrapping a DTM around a subtree.
684
+ if ((m_root == node) ||
685
+ (m_root.getNodeType() == DOCUMENT_NODE &&
686
+ m_root == node.getOwnerDocument()) ||
687
+ (m_root.getNodeType() != DOCUMENT_NODE &&
688
+ m_root.getOwnerDocument() == node.getOwnerDocument())
689
+ ) {
690
+ // If node _is_ in m_root's tree, find its handle
691
+ //
692
+ // %OPT% This check may be improved significantly when DOM
693
+ // Level 3 nodeKey and relative-order tests become
694
+ // available!
695
+ for (Node cursor = node;
696
+ cursor != null;
697
+ cursor =
698
+ (cursor.getNodeType() != ATTRIBUTE_NODE)
699
+ ? cursor.getParentNode()
700
+ : ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
701
+ if (cursor == m_root)
702
+ // We know this node; find its handle.
703
+ {
704
+ return getHandleFromNode(node);
705
+ }
706
+ } // for ancestors of node
707
+ } // if node and m_root in same Document
708
+ } // if node!=null
709
+
710
+ return DTM.NULL;
711
+ }
712
+
713
+ /**
714
+ * Retrieves an attribute node by by qualified name and namespace URI.
715
+ *
716
+ * @param nodeHandle int Handle of the node upon which to look up this attribute..
717
+ * @param namespaceURI The namespace URI of the attribute to
718
+ * retrieve, or null.
719
+ * @param name The local name of the attribute to
720
+ * retrieve.
721
+ * @return The attribute node handle with the specified name (
722
+ * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
723
+ * attribute.
724
+ */
725
+ public int
726
+ getAttributeNode(int nodeHandle, String namespaceURI,
727
+ String name)
728
+ {
729
+
730
+ // %OPT% This is probably slower than it needs to be.
731
+ if (null == namespaceURI) {
732
+ namespaceURI = "";
733
+ }
734
+
735
+ int type = getNodeType(nodeHandle);
736
+
737
+ if (DTM.ELEMENT_NODE == type) {
738
+
739
+ // Assume that attributes immediately follow the element.
740
+ int identity = makeNodeIdentity(nodeHandle);
741
+
742
+ while (DTM.NULL != (identity = getNextNodeIdentity(identity))) {
743
+ // Assume this can not be null.
744
+ type = _type(identity);
745
+
746
+ // %REVIEW%
747
+ // Should namespace nodes be retrievable DOM-style as attrs?
748
+ // If not we need a separate function... which may be desirable
749
+ // architecturally, but which is ugly from a code point of view.
750
+ // (If we REALLY insist on it, this code should become a subroutine
751
+ // of both -- retrieve the node, then test if the type matches
752
+ // what you're looking for.)
753
+ if (type == DTM.ATTRIBUTE_NODE || type == DTM.NAMESPACE_NODE) {
754
+ Node node = lookupNode(identity);
755
+ String nodeuri = node.getNamespaceURI();
756
+
757
+ if (null == nodeuri) {
758
+ nodeuri = "";
759
+ }
760
+
761
+ String nodelocalname = node.getLocalName();
762
+
763
+ if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname)) {
764
+ return makeNodeHandle(identity);
765
+ }
766
+ }
767
+
768
+ else { // if (DTM.NAMESPACE_NODE != type)
769
+ break;
770
+ }
771
+ }
772
+ }
773
+
774
+ return DTM.NULL;
775
+ }
776
+
777
+ /**
778
+ * Get the string-value of a node as a String object
779
+ * (see http://www.w3.org/TR/xpath#data-model
780
+ * for the definition of a node's string-value).
781
+ *
782
+ * @param nodeHandle The node ID.
783
+ *
784
+ * @return A string object that represents the string-value of the given node.
785
+ */
786
+ public XMLString
787
+ getStringValue(int nodeHandle)
788
+ {
789
+
790
+ int type = getNodeType(nodeHandle);
791
+ Node node = getNode(nodeHandle);
792
+ // %TBD% If an element only has one text node, we should just use it
793
+ // directly.
794
+ if (DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
795
+ || DTM.DOCUMENT_FRAGMENT_NODE == type) {
796
+ FastStringBuffer buf = StringBufferPool.get();
797
+ String s;
798
+
799
+ try {
800
+ getNodeData(node, buf);
801
+
802
+ s = (buf.length() > 0) ? buf.toString() : "";
803
+ } finally {
804
+ StringBufferPool.free(buf);
805
+ }
806
+
807
+ return m_xstrf.newstr(s);
808
+ } else if (TEXT_NODE == type || CDATA_SECTION_NODE == type) {
809
+ // If this is a DTM text node, it may be made of multiple DOM text
810
+ // nodes -- including navigating into Entity References. DOM2DTM
811
+ // records the first node in the sequence and requires that we
812
+ // pick up the others when we retrieve the DTM node's value.
813
+ //
814
+ // %REVIEW% DOM Level 3 is expected to add a "whole text"
815
+ // retrieval method which performs this function for us.
816
+ FastStringBuffer buf = StringBufferPool.get();
817
+ while (node != null) {
818
+ buf.append(node.getNodeValue());
819
+ node = logicalNextDOMTextNode(node);
820
+ }
821
+ String s = (buf.length() > 0) ? buf.toString() : "";
822
+ StringBufferPool.free(buf);
823
+ return m_xstrf.newstr(s);
824
+ } else {
825
+ return m_xstrf.newstr(node.getNodeValue());
826
+ }
827
+ }
828
+
829
+ /**
830
+ * Determine if the string-value of a node is whitespace
831
+ *
832
+ * @param nodeHandle The node Handle.
833
+ *
834
+ * @return Return true if the given node is whitespace.
835
+ */
836
+ public boolean
837
+ isWhitespace(int nodeHandle)
838
+ {
839
+ int type = getNodeType(nodeHandle);
840
+ Node node = getNode(nodeHandle);
841
+ if (TEXT_NODE == type || CDATA_SECTION_NODE == type) {
842
+ // If this is a DTM text node, it may be made of multiple DOM text
843
+ // nodes -- including navigating into Entity References. DOM2DTM
844
+ // records the first node in the sequence and requires that we
845
+ // pick up the others when we retrieve the DTM node's value.
846
+ //
847
+ // %REVIEW% DOM Level 3 is expected to add a "whole text"
848
+ // retrieval method which performs this function for us.
849
+ FastStringBuffer buf = StringBufferPool.get();
850
+ while (node != null) {
851
+ buf.append(node.getNodeValue());
852
+ node = logicalNextDOMTextNode(node);
853
+ }
854
+ boolean b = buf.isWhitespace(0, buf.length());
855
+ StringBufferPool.free(buf);
856
+ return b;
857
+ }
858
+ return false;
859
+ }
860
+
861
+ /**
862
+ * Retrieve the text content of a DOM subtree, appending it into a
863
+ * user-supplied FastStringBuffer object. Note that attributes are
864
+ * not considered part of the content of an element.
865
+ * <p>
866
+ * There are open questions regarding whitespace stripping.
867
+ * Currently we make no special effort in that regard, since the standard
868
+ * DOM doesn't yet provide DTD-based information to distinguish
869
+ * whitespace-in-element-context from genuine #PCDATA. Note that we
870
+ * should probably also consider xml:space if/when we address this.
871
+ * DOM Level 3 may solve the problem for us.
872
+ * <p>
873
+ * %REVIEW% Actually, since this method operates on the DOM side of the
874
+ * fence rather than the DTM side, it SHOULDN'T do
875
+ * any special handling. The DOM does what the DOM does; if you want
876
+ * DTM-level abstractions, use DTM-level methods.
877
+ *
878
+ * @param node Node whose subtree is to be walked, gathering the
879
+ * contents of all Text or CDATASection nodes.
880
+ * @param buf FastStringBuffer into which the contents of the text
881
+ * nodes are to be concatenated.
882
+ */
883
+ protected static void
884
+ getNodeData(Node node, FastStringBuffer buf)
885
+ {
886
+
887
+ switch (node.getNodeType()) {
888
+ case Node.DOCUMENT_FRAGMENT_NODE :
889
+ case Node.DOCUMENT_NODE :
890
+ case Node.ELEMENT_NODE : {
891
+ for (Node child = node.getFirstChild(); null != child;
892
+ child = child.getNextSibling()) {
893
+ getNodeData(child, buf);
894
+ }
895
+ }
896
+ break;
897
+ case Node.TEXT_NODE :
898
+ case Node.CDATA_SECTION_NODE :
899
+ case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node
900
+ buf.append(node.getNodeValue());
901
+ break;
902
+ case Node.PROCESSING_INSTRUCTION_NODE :
903
+ // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
904
+ break;
905
+ default :
906
+ // ignore
907
+ break;
908
+ }
909
+ }
910
+
911
+ /**
912
+ * Given a node handle, return its DOM-style node name. This will
913
+ * include names such as #text or #document.
914
+ *
915
+ * @param nodeHandle the id of the node.
916
+ * @return String Name of this node, which may be an empty string.
917
+ * %REVIEW% Document when empty string is possible...
918
+ * %REVIEW-COMMENT% It should never be empty, should it?
919
+ */
920
+ public String
921
+ getNodeName(int nodeHandle)
922
+ {
923
+
924
+ Node node = getNode(nodeHandle);
925
+
926
+ // Assume non-null.
927
+ return node.getNodeName();
928
+ }
929
+
930
+ /**
931
+ * Given a node handle, return the XPath node name. This should be
932
+ * the name as described by the XPath data model, NOT the DOM-style
933
+ * name.
934
+ *
935
+ * @param nodeHandle the id of the node.
936
+ * @return String Name of this node, which may be an empty string.
937
+ */
938
+ public String
939
+ getNodeNameX(int nodeHandle)
940
+ {
941
+
942
+ String name;
943
+ short type = getNodeType(nodeHandle);
944
+
945
+ switch (type) {
946
+ case DTM.NAMESPACE_NODE : {
947
+ Node node = getNode(nodeHandle);
948
+
949
+ // assume not null.
950
+ name = node.getNodeName();
951
+ if (name.startsWith("xmlns:")) {
952
+ name = QName.getLocalPart(name);
953
+ } else if (name.equals("xmlns")) {
954
+ name = "";
955
+ }
956
+ }
957
+ break;
958
+ case DTM.ATTRIBUTE_NODE :
959
+ case DTM.ELEMENT_NODE :
960
+ case DTM.ENTITY_REFERENCE_NODE :
961
+ case DTM.PROCESSING_INSTRUCTION_NODE : {
962
+ Node node = getNode(nodeHandle);
963
+
964
+ // assume not null.
965
+ name = node.getNodeName();
966
+ }
967
+ break;
968
+ default :
969
+ name = "";
970
+ }
971
+
972
+ return name;
973
+ }
974
+
975
+ /**
976
+ * Given a node handle, return its XPath-style localname.
977
+ * (As defined in Namespaces, this is the portion of the name after any
978
+ * colon character).
979
+ *
980
+ * @param nodeHandle the id of the node.
981
+ * @return String Local name of this node.
982
+ */
983
+ public String
984
+ getLocalName(int nodeHandle)
985
+ {
986
+ // if(JJK_NEWCODE)
987
+ // {
988
+ int id = makeNodeIdentity(nodeHandle);
989
+ if (NULL == id) { return null; }
990
+ Node newnode = m_nodes.get(id);
991
+ String newname = newnode.getLocalName();
992
+ if (null == newname) {
993
+ // XSLT treats PIs, and possibly other things, as having QNames.
994
+ String qname = newnode.getNodeName();
995
+ if ('#' == qname.charAt(0)) {
996
+ // Match old default for this function
997
+ // This conversion may or may not be necessary
998
+ newname = "";
999
+ } else {
1000
+ int index = qname.indexOf(':');
1001
+ newname = (index < 0) ? qname : qname.substring(index + 1);
1002
+ }
1003
+ }
1004
+ return newname;
1005
+ // }
1006
+ // else
1007
+ // {
1008
+ // String name;
1009
+ // short type = getNodeType(nodeHandle);
1010
+ // switch (type)
1011
+ // {
1012
+ // case DTM.ATTRIBUTE_NODE :
1013
+ // case DTM.ELEMENT_NODE :
1014
+ // case DTM.ENTITY_REFERENCE_NODE :
1015
+ // case DTM.NAMESPACE_NODE :
1016
+ // case DTM.PROCESSING_INSTRUCTION_NODE :
1017
+ // {
1018
+ // Node node = getNode(nodeHandle);
1019
+ //
1020
+ // // assume not null.
1021
+ // name = node.getLocalName();
1022
+ //
1023
+ // if (null == name)
1024
+ // {
1025
+ // String qname = node.getNodeName();
1026
+ // int index = qname.indexOf(':');
1027
+ //
1028
+ // name = (index < 0) ? qname : qname.substring(index + 1);
1029
+ // }
1030
+ // }
1031
+ // break;
1032
+ // default :
1033
+ // name = "";
1034
+ // }
1035
+ // return name;
1036
+ // }
1037
+ }
1038
+
1039
+ /**
1040
+ * Given a namespace handle, return the prefix that the namespace decl is
1041
+ * mapping.
1042
+ * Given a node handle, return the prefix used to map to the namespace.
1043
+ *
1044
+ * <p> %REVIEW% Are you sure you want "" for no prefix? </p>
1045
+ * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p>
1046
+ *
1047
+ * @param nodeHandle the id of the node.
1048
+ * @return String prefix of this node's name, or "" if no explicit
1049
+ * namespace prefix was given.
1050
+ */
1051
+ public String
1052
+ getPrefix(int nodeHandle)
1053
+ {
1054
+
1055
+ String prefix;
1056
+ short type = getNodeType(nodeHandle);
1057
+
1058
+ switch (type) {
1059
+ case DTM.NAMESPACE_NODE : {
1060
+ Node node = getNode(nodeHandle);
1061
+
1062
+ // assume not null.
1063
+ String qname = node.getNodeName();
1064
+ int index = qname.indexOf(':');
1065
+
1066
+ prefix = (index < 0) ? "" : qname.substring(index + 1);
1067
+ }
1068
+ break;
1069
+ case DTM.ATTRIBUTE_NODE :
1070
+ case DTM.ELEMENT_NODE : {
1071
+ Node node = getNode(nodeHandle);
1072
+
1073
+ // assume not null.
1074
+ String qname = node.getNodeName();
1075
+ int index = qname.indexOf(':');
1076
+
1077
+ prefix = (index < 0) ? "" : qname.substring(0, index);
1078
+ }
1079
+ break;
1080
+ default :
1081
+ prefix = "";
1082
+ }
1083
+
1084
+ return prefix;
1085
+ }
1086
+
1087
+ /**
1088
+ * Given a node handle, return its DOM-style namespace URI
1089
+ * (As defined in Namespaces, this is the declared URI which this node's
1090
+ * prefix -- or default in lieu thereof -- was mapped to.)
1091
+ *
1092
+ * <p>%REVIEW% Null or ""? -sb</p>
1093
+ *
1094
+ * @param nodeHandle the id of the node.
1095
+ * @return String URI value of this node's namespace, or null if no
1096
+ * namespace was resolved.
1097
+ */
1098
+ public String
1099
+ getNamespaceURI(int nodeHandle)
1100
+ {
1101
+ // if(JJK_NEWCODE)
1102
+ // {
1103
+ int id = makeNodeIdentity(nodeHandle);
1104
+ if (id == NULL) { return null; }
1105
+ Node node = m_nodes.get(id);
1106
+ return node.getNamespaceURI();
1107
+ // }
1108
+ // else
1109
+ // {
1110
+ // String nsuri;
1111
+ // short type = getNodeType(nodeHandle);
1112
+ //
1113
+ // switch (type)
1114
+ // {
1115
+ // case DTM.ATTRIBUTE_NODE :
1116
+ // case DTM.ELEMENT_NODE :
1117
+ // case DTM.ENTITY_REFERENCE_NODE :
1118
+ // case DTM.NAMESPACE_NODE :
1119
+ // case DTM.PROCESSING_INSTRUCTION_NODE :
1120
+ // {
1121
+ // Node node = getNode(nodeHandle);
1122
+ //
1123
+ // // assume not null.
1124
+ // nsuri = node.getNamespaceURI();
1125
+ //
1126
+ // // %TBD% Handle DOM1?
1127
+ // }
1128
+ // break;
1129
+ // default :
1130
+ // nsuri = null;
1131
+ // }
1132
+ //
1133
+ // return nsuri;
1134
+ // }
1135
+ }
1136
+
1137
+ /** Utility function: Given a DOM Text node, determine whether it is
1138
+ * logically followed by another Text or CDATASection node. This may
1139
+ * involve traversing into Entity References.
1140
+ *
1141
+ * %REVIEW% DOM Level 3 is expected to add functionality which may
1142
+ * allow us to retire this.
1143
+ */
1144
+ private Node
1145
+ logicalNextDOMTextNode(Node n)
1146
+ {
1147
+ Node p = n.getNextSibling();
1148
+ if (p == null) {
1149
+ // Walk out of any EntityReferenceNodes that ended with text
1150
+ for (n = n.getParentNode();
1151
+ n != null && ENTITY_REFERENCE_NODE == n.getNodeType();
1152
+ n = n.getParentNode()) {
1153
+ p = n.getNextSibling();
1154
+ if (p != null) {
1155
+ break;
1156
+ }
1157
+ }
1158
+ }
1159
+ n = p;
1160
+ while (n != null && ENTITY_REFERENCE_NODE == n.getNodeType()) {
1161
+ // Walk into any EntityReferenceNodes that start with text
1162
+ if (n.hasChildNodes()) {
1163
+ n = n.getFirstChild();
1164
+ } else {
1165
+ n = n.getNextSibling();
1166
+ }
1167
+ }
1168
+ if (n != null) {
1169
+ // Found a logical next sibling. Is it text?
1170
+ int ntype = n.getNodeType();
1171
+ if (TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype) {
1172
+ n = null;
1173
+ }
1174
+ }
1175
+ return n;
1176
+ }
1177
+
1178
+ /**
1179
+ * Given a node handle, return its node value. This is mostly
1180
+ * as defined by the DOM, but may ignore some conveniences.
1181
+ * <p>
1182
+ *
1183
+ * @param nodeHandle The node id.
1184
+ * @return String Value of this node, or null if not
1185
+ * meaningful for this node type.
1186
+ */
1187
+ public String
1188
+ getNodeValue(int nodeHandle)
1189
+ {
1190
+ // The _type(nodeHandle) call was taking the lion's share of our
1191
+ // time, and was wrong anyway since it wasn't coverting handle to
1192
+ // identity. Inlined it.
1193
+ int type = _exptype(makeNodeIdentity(nodeHandle));
1194
+ type = (NULL != type) ? getNodeType(nodeHandle) : NULL;
1195
+
1196
+ if (TEXT_NODE != type && CDATA_SECTION_NODE != type) {
1197
+ return getNode(nodeHandle).getNodeValue();
1198
+ }
1199
+
1200
+ // If this is a DTM text node, it may be made of multiple DOM text
1201
+ // nodes -- including navigating into Entity References. DOM2DTM
1202
+ // records the first node in the sequence and requires that we
1203
+ // pick up the others when we retrieve the DTM node's value.
1204
+ //
1205
+ // %REVIEW% DOM Level 3 is expected to add a "whole text"
1206
+ // retrieval method which performs this function for us.
1207
+ Node node = getNode(nodeHandle);
1208
+ Node n = logicalNextDOMTextNode(node);
1209
+ if (n == null) {
1210
+ return node.getNodeValue();
1211
+ }
1212
+
1213
+ FastStringBuffer buf = StringBufferPool.get();
1214
+ buf.append(node.getNodeValue());
1215
+ while (n != null) {
1216
+ buf.append(n.getNodeValue());
1217
+ n = logicalNextDOMTextNode(n);
1218
+ }
1219
+ String s = (buf.length() > 0) ? buf.toString() : "";
1220
+ StringBufferPool.free(buf);
1221
+ return s;
1222
+ }
1223
+
1224
+ /**
1225
+ * A document type declaration information item has the following properties:
1226
+ *
1227
+ * 1. [system identifier] The system identifier of the external subset, if
1228
+ * it exists. Otherwise this property has no value.
1229
+ *
1230
+ * @return the system identifier String object, or null if there is none.
1231
+ */
1232
+ public String
1233
+ getDocumentTypeDeclarationSystemIdentifier()
1234
+ {
1235
+
1236
+ Document doc;
1237
+
1238
+ if (m_root.getNodeType() == Node.DOCUMENT_NODE) {
1239
+ doc = (Document) m_root;
1240
+ } else {
1241
+ doc = m_root.getOwnerDocument();
1242
+ }
1243
+
1244
+ if (null != doc) {
1245
+ DocumentType dtd = doc.getDoctype();
1246
+
1247
+ if (null != dtd) {
1248
+ return dtd.getSystemId();
1249
+ }
1250
+ }
1251
+
1252
+ return null;
1253
+ }
1254
+
1255
+ /**
1256
+ * Return the public identifier of the external subset,
1257
+ * normalized as described in 4.2.2 External Entities [XML]. If there is
1258
+ * no external subset or if it has no public identifier, this property
1259
+ * has no value.
1260
+ *
1261
+ * @return the public identifier String object, or null if there is none.
1262
+ */
1263
+ public String
1264
+ getDocumentTypeDeclarationPublicIdentifier()
1265
+ {
1266
+
1267
+ Document doc;
1268
+
1269
+ if (m_root.getNodeType() == Node.DOCUMENT_NODE) {
1270
+ doc = (Document) m_root;
1271
+ } else {
1272
+ doc = m_root.getOwnerDocument();
1273
+ }
1274
+
1275
+ if (null != doc) {
1276
+ DocumentType dtd = doc.getDoctype();
1277
+
1278
+ if (null != dtd) {
1279
+ return dtd.getPublicId();
1280
+ }
1281
+ }
1282
+
1283
+ return null;
1284
+ }
1285
+
1286
+ /**
1287
+ * Returns the <code>Element</code> whose <code>ID</code> is given by
1288
+ * <code>elementId</code>. If no such element exists, returns
1289
+ * <code>DTM.NULL</code>. Behavior is not defined if more than one element
1290
+ * has this <code>ID</code>. Attributes (including those
1291
+ * with the name "ID") are not of type ID unless so defined by DTD/Schema
1292
+ * information available to the DTM implementation.
1293
+ * Implementations that do not know whether attributes are of type ID or
1294
+ * not are expected to return <code>DTM.NULL</code>.
1295
+ *
1296
+ * <p>%REVIEW% Presumably IDs are still scoped to a single document,
1297
+ * and this operation searches only within a single document, right?
1298
+ * Wouldn't want collisions between DTMs in the same process.</p>
1299
+ *
1300
+ * @param elementId The unique <code>id</code> value for an element.
1301
+ * @return The handle of the matching element.
1302
+ */
1303
+ public int
1304
+ getElementById(String elementId)
1305
+ {
1306
+
1307
+ Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1308
+ ? (Document) m_root : m_root.getOwnerDocument();
1309
+
1310
+ if (null != doc) {
1311
+ Node elem = doc.getElementById(elementId);
1312
+ if (null != elem) {
1313
+ int elemHandle = getHandleFromNode(elem);
1314
+
1315
+ if (DTM.NULL == elemHandle) {
1316
+ int identity = m_nodes.size() - 1;
1317
+ while (DTM.NULL != (identity = getNextNodeIdentity(identity))) {
1318
+ Node node = getNode(identity);
1319
+ if (node == elem) {
1320
+ elemHandle = getHandleFromNode(elem);
1321
+ break;
1322
+ }
1323
+ }
1324
+ }
1325
+
1326
+ return elemHandle;
1327
+ }
1328
+
1329
+ }
1330
+ return DTM.NULL;
1331
+ }
1332
+
1333
+ /**
1334
+ * The getUnparsedEntityURI function returns the URI of the unparsed
1335
+ * entity with the specified name in the same document as the context
1336
+ * node (see [3.3 Unparsed Entities]). It returns the empty string if
1337
+ * there is no such entity.
1338
+ * <p>
1339
+ * XML processors may choose to use the System Identifier (if one
1340
+ * is provided) to resolve the entity, rather than the URI in the
1341
+ * Public Identifier. The details are dependent on the processor, and
1342
+ * we would have to support some form of plug-in resolver to handle
1343
+ * this properly. Currently, we simply return the System Identifier if
1344
+ * present, and hope that it a usable URI or that our caller can
1345
+ * map it to one.
1346
+ * TODO: Resolve Public Identifiers... or consider changing function name.
1347
+ * <p>
1348
+ * If we find a relative URI
1349
+ * reference, XML expects it to be resolved in terms of the base URI
1350
+ * of the document. The DOM doesn't do that for us, and it isn't
1351
+ * entirely clear whether that should be done here; currently that's
1352
+ * pushed up to a higher level of our application. (Note that DOM Level
1353
+ * 1 didn't store the document's base URI.)
1354
+ * TODO: Consider resolving Relative URIs.
1355
+ * <p>
1356
+ * (The DOM's statement that "An XML processor may choose to
1357
+ * completely expand entities before the structure model is passed
1358
+ * to the DOM" refers only to parsed entities, not unparsed, and hence
1359
+ * doesn't affect this function.)
1360
+ *
1361
+ * @param name A string containing the Entity Name of the unparsed
1362
+ * entity.
1363
+ *
1364
+ * @return String containing the URI of the Unparsed Entity, or an
1365
+ * empty string if no such entity exists.
1366
+ */
1367
+ public String
1368
+ getUnparsedEntityURI(String name)
1369
+ {
1370
+
1371
+ String url = "";
1372
+ Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1373
+ ? (Document) m_root : m_root.getOwnerDocument();
1374
+
1375
+ if (null != doc) {
1376
+ DocumentType doctype = doc.getDoctype();
1377
+
1378
+ if (null != doctype) {
1379
+ NamedNodeMap entities = doctype.getEntities();
1380
+ if (null == entities) {
1381
+ return url;
1382
+ }
1383
+ Entity entity = (Entity) entities.getNamedItem(name);
1384
+ if (null == entity) {
1385
+ return url;
1386
+ }
1387
+
1388
+ String notationName = entity.getNotationName();
1389
+
1390
+ if (null != notationName) { // then it's unparsed
1391
+ // The draft says: "The XSLT processor may use the public
1392
+ // identifier to generate a URI for the entity instead of the URI
1393
+ // specified in the system identifier. If the XSLT processor does
1394
+ // not use the public identifier to generate the URI, it must use
1395
+ // the system identifier; if the system identifier is a relative
1396
+ // URI, it must be resolved into an absolute URI using the URI of
1397
+ // the resource containing the entity declaration as the base
1398
+ // URI [RFC2396]."
1399
+ // So I'm falling a bit short here.
1400
+ url = entity.getSystemId();
1401
+
1402
+ if (null == url) {
1403
+ url = entity.getPublicId();
1404
+ } else {
1405
+ // This should be resolved to an absolute URL, but that's hard
1406
+ // to do from here.
1407
+ }
1408
+ }
1409
+ }
1410
+ }
1411
+
1412
+ return url;
1413
+ }
1414
+
1415
+ /**
1416
+ * 5. [specified] A flag indicating whether this attribute was actually
1417
+ * specified in the start-tag of its element, or was defaulted from the
1418
+ * DTD.
1419
+ *
1420
+ * @param attributeHandle the attribute handle
1421
+ * @return <code>true</code> if the attribute was specified;
1422
+ * <code>false</code> if it was defaulted.
1423
+ */
1424
+ public boolean
1425
+ isAttributeSpecified(int attributeHandle)
1426
+ {
1427
+ int type = getNodeType(attributeHandle);
1428
+
1429
+ if (DTM.ATTRIBUTE_NODE == type) {
1430
+ Attr attr = (Attr)getNode(attributeHandle);
1431
+ return attr.getSpecified();
1432
+ }
1433
+ return false;
1434
+ }
1435
+
1436
+ /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
1437
+ * we're wrapped around an existing DOM.
1438
+ *
1439
+ * @param source The IncrementalSAXSource that we want to recieve events from
1440
+ * on demand.
1441
+ */
1442
+ public void
1443
+ setIncrementalSAXSource(IncrementalSAXSource source)
1444
+ {
1445
+ }
1446
+
1447
+ /** getContentHandler returns "our SAX builder" -- the thing that
1448
+ * someone else should send SAX events to in order to extend this
1449
+ * DTM model.
1450
+ *
1451
+ * @return null if this model doesn't respond to SAX events,
1452
+ * "this" if the DTM object has a built-in SAX ContentHandler,
1453
+ * the IncrmentalSAXSource if we're bound to one and should receive
1454
+ * the SAX stream via it for incremental build purposes...
1455
+ * */
1456
+ public org.xml.sax.ContentHandler
1457
+ getContentHandler()
1458
+ {
1459
+ return null;
1460
+ }
1461
+
1462
+ /**
1463
+ * Return this DTM's lexical handler.
1464
+ *
1465
+ * %REVIEW% Should this return null if constrution already done/begun?
1466
+ *
1467
+ * @return null if this model doesn't respond to lexical SAX events,
1468
+ * "this" if the DTM object has a built-in SAX ContentHandler,
1469
+ * the IncrementalSAXSource if we're bound to one and should receive
1470
+ * the SAX stream via it for incremental build purposes...
1471
+ */
1472
+ public org.xml.sax.ext.LexicalHandler
1473
+ getLexicalHandler()
1474
+ {
1475
+
1476
+ return null;
1477
+ }
1478
+
1479
+
1480
+ /**
1481
+ * Return this DTM's EntityResolver.
1482
+ *
1483
+ * @return null if this model doesn't respond to SAX entity ref events.
1484
+ */
1485
+ public org.xml.sax.EntityResolver
1486
+ getEntityResolver()
1487
+ {
1488
+
1489
+ return null;
1490
+ }
1491
+
1492
+ /**
1493
+ * Return this DTM's DTDHandler.
1494
+ *
1495
+ * @return null if this model doesn't respond to SAX dtd events.
1496
+ */
1497
+ public org.xml.sax.DTDHandler
1498
+ getDTDHandler()
1499
+ {
1500
+
1501
+ return null;
1502
+ }
1503
+
1504
+ /**
1505
+ * Return this DTM's ErrorHandler.
1506
+ *
1507
+ * @return null if this model doesn't respond to SAX error events.
1508
+ */
1509
+ public org.xml.sax.ErrorHandler
1510
+ getErrorHandler()
1511
+ {
1512
+
1513
+ return null;
1514
+ }
1515
+
1516
+ /**
1517
+ * Return this DTM's DeclHandler.
1518
+ *
1519
+ * @return null if this model doesn't respond to SAX Decl events.
1520
+ */
1521
+ public org.xml.sax.ext.DeclHandler
1522
+ getDeclHandler()
1523
+ {
1524
+
1525
+ return null;
1526
+ }
1527
+
1528
+ /** @return true iff we're building this model incrementally (eg
1529
+ * we're partnered with a IncrementalSAXSource) and thus require that the
1530
+ * transformation and the parse run simultaneously. Guidance to the
1531
+ * DTMManager.
1532
+ * */
1533
+ public boolean
1534
+ needsTwoThreads()
1535
+ {
1536
+ return false;
1537
+ }
1538
+
1539
+ // ========== Direct SAX Dispatch, for optimization purposes ========
1540
+
1541
+ /**
1542
+ * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
1543
+ * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
1544
+ * the definition of <CODE>S</CODE></A> for details.
1545
+ * @param ch Character to check as XML whitespace.
1546
+ * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
1547
+ */
1548
+ private static boolean
1549
+ isSpace(char ch)
1550
+ {
1551
+ return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now.
1552
+ }
1553
+
1554
+ /**
1555
+ * Directly call the
1556
+ * characters method on the passed ContentHandler for the
1557
+ * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
1558
+ * for the definition of a node's string-value). Multiple calls to the
1559
+ * ContentHandler's characters methods may well occur for a single call to
1560
+ * this method.
1561
+ *
1562
+ * @param nodeHandle The node ID.
1563
+ * @param ch A non-null reference to a ContentHandler.
1564
+ *
1565
+ * @throws org.xml.sax.SAXException
1566
+ */
1567
+ public void
1568
+ dispatchCharactersEvents(
1569
+ int nodeHandle, org.xml.sax.ContentHandler ch,
1570
+ boolean normalize)
1571
+ throws org.xml.sax.SAXException
1572
+ {
1573
+ if (normalize) {
1574
+ XMLString str = getStringValue(nodeHandle);
1575
+ str = str.fixWhiteSpace(true, true, false);
1576
+ str.dispatchCharactersEvents(ch);
1577
+ } else {
1578
+ int type = getNodeType(nodeHandle);
1579
+ Node node = getNode(nodeHandle);
1580
+ dispatchNodeData(node, ch, 0);
1581
+ // Text coalition -- a DTM text node may represent multiple
1582
+ // DOM nodes.
1583
+ if (TEXT_NODE == type || CDATA_SECTION_NODE == type) {
1584
+ while (null != (node = logicalNextDOMTextNode(node))) {
1585
+ dispatchNodeData(node, ch, 0);
1586
+ }
1587
+ }
1588
+ }
1589
+ }
1590
+
1591
+ /**
1592
+ * Retrieve the text content of a DOM subtree, appending it into a
1593
+ * user-supplied FastStringBuffer object. Note that attributes are
1594
+ * not considered part of the content of an element.
1595
+ * <p>
1596
+ * There are open questions regarding whitespace stripping.
1597
+ * Currently we make no special effort in that regard, since the standard
1598
+ * DOM doesn't yet provide DTD-based information to distinguish
1599
+ * whitespace-in-element-context from genuine #PCDATA. Note that we
1600
+ * should probably also consider xml:space if/when we address this.
1601
+ * DOM Level 3 may solve the problem for us.
1602
+ * <p>
1603
+ * %REVIEW% Note that as a DOM-level operation, it can be argued that this
1604
+ * routine _shouldn't_ perform any processing beyond what the DOM already
1605
+ * does, and that whitespace stripping and so on belong at the DTM level.
1606
+ * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
1607
+ *
1608
+ * @param node Node whose subtree is to be walked, gathering the
1609
+ * contents of all Text or CDATASection nodes.
1610
+ */
1611
+ @SuppressWarnings("fallthrough")
1612
+ protected static void
1613
+ dispatchNodeData(Node node,
1614
+ org.xml.sax.ContentHandler ch,
1615
+ int depth)
1616
+ throws org.xml.sax.SAXException
1617
+ {
1618
+
1619
+ switch (node.getNodeType()) {
1620
+ case Node.DOCUMENT_FRAGMENT_NODE :
1621
+ case Node.DOCUMENT_NODE :
1622
+ case Node.ELEMENT_NODE : {
1623
+ for (Node child = node.getFirstChild(); null != child;
1624
+ child = child.getNextSibling()) {
1625
+ dispatchNodeData(child, ch, depth + 1);
1626
+ }
1627
+ }
1628
+ break;
1629
+ case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
1630
+ case Node.COMMENT_NODE :
1631
+ if (0 != depth) {
1632
+ break;
1633
+ }
1634
+ // NOTE: Because this operation works in the DOM space, it does _not_ attempt
1635
+ // to perform Text Coalition. That should only be done in DTM space.
1636
+ case Node.TEXT_NODE :
1637
+ case Node.CDATA_SECTION_NODE :
1638
+ case Node.ATTRIBUTE_NODE :
1639
+ String str = node.getNodeValue();
1640
+ if (ch instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler) {
1641
+ ((org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)ch).characters(node);
1642
+ } else {
1643
+ ch.characters(str.toCharArray(), 0, str.length());
1644
+ }
1645
+ break;
1646
+ // /* case Node.PROCESSING_INSTRUCTION_NODE :
1647
+ // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1648
+ // break; */
1649
+ default :
1650
+ // ignore
1651
+ break;
1652
+ }
1653
+ }
1654
+
1655
+ TreeWalker m_walker = new TreeWalker(null);
1656
+
1657
+ /**
1658
+ * Directly create SAX parser events from a subtree.
1659
+ *
1660
+ * @param nodeHandle The node ID.
1661
+ * @param ch A non-null reference to a ContentHandler.
1662
+ *
1663
+ * @throws org.xml.sax.SAXException
1664
+ */
1665
+ public void
1666
+ dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
1667
+ throws org.xml.sax.SAXException
1668
+ {
1669
+ TreeWalker treeWalker = m_walker;
1670
+ ContentHandler prevCH = treeWalker.getContentHandler();
1671
+
1672
+ if (null != prevCH) {
1673
+ treeWalker = new TreeWalker(null);
1674
+ }
1675
+ treeWalker.setContentHandler(ch);
1676
+
1677
+ try {
1678
+ Node node = getNode(nodeHandle);
1679
+ treeWalker.traverseFragment(node);
1680
+ } finally {
1681
+ treeWalker.setContentHandler(null);
1682
+ }
1683
+ }
1684
+
1685
+ /**
1686
+ * For the moment all the run time properties are ignored by this
1687
+ * class.
1688
+ *
1689
+ * @param property a <code>String</code> value
1690
+ * @param value an <code>Object</code> value
1691
+ */
1692
+ public void
1693
+ setProperty(String property, Object value)
1694
+ {
1695
+ }
1696
+
1697
+ /**
1698
+ * No source information is available for DOM2DTM, so return
1699
+ * <code>null</code> here.
1700
+ *
1701
+ * @param node an <code>int</code> value
1702
+ * @return null
1703
+ */
1704
+ public SourceLocator
1705
+ getSourceLocatorFor(int node)
1706
+ {
1707
+ return null;
1708
+ }
1709
+
1710
+ }
1711
+
1712
+