nokogiri-backport 1.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (239) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1682 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +272 -0
  6. data/bin/nokogiri +118 -0
  7. data/dependencies.yml +74 -0
  8. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +178 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +148 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +282 -0
  13. data/ext/java/nokogiri/HtmlSaxPushParser.java +222 -0
  14. data/ext/java/nokogiri/NokogiriService.java +597 -0
  15. data/ext/java/nokogiri/XmlAttr.java +162 -0
  16. data/ext/java/nokogiri/XmlAttributeDecl.java +129 -0
  17. data/ext/java/nokogiri/XmlCdata.java +82 -0
  18. data/ext/java/nokogiri/XmlComment.java +97 -0
  19. data/ext/java/nokogiri/XmlDocument.java +633 -0
  20. data/ext/java/nokogiri/XmlDocumentFragment.java +185 -0
  21. data/ext/java/nokogiri/XmlDtd.java +481 -0
  22. data/ext/java/nokogiri/XmlElement.java +68 -0
  23. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  24. data/ext/java/nokogiri/XmlElementDecl.java +147 -0
  25. data/ext/java/nokogiri/XmlEntityDecl.java +157 -0
  26. data/ext/java/nokogiri/XmlEntityReference.java +101 -0
  27. data/ext/java/nokogiri/XmlNamespace.java +199 -0
  28. data/ext/java/nokogiri/XmlNode.java +1684 -0
  29. data/ext/java/nokogiri/XmlNodeSet.java +434 -0
  30. data/ext/java/nokogiri/XmlProcessingInstruction.java +100 -0
  31. data/ext/java/nokogiri/XmlReader.java +531 -0
  32. data/ext/java/nokogiri/XmlRelaxng.java +151 -0
  33. data/ext/java/nokogiri/XmlSaxParserContext.java +374 -0
  34. data/ext/java/nokogiri/XmlSaxPushParser.java +286 -0
  35. data/ext/java/nokogiri/XmlSchema.java +388 -0
  36. data/ext/java/nokogiri/XmlSyntaxError.java +138 -0
  37. data/ext/java/nokogiri/XmlText.java +110 -0
  38. data/ext/java/nokogiri/XmlXpathContext.java +301 -0
  39. data/ext/java/nokogiri/XsltStylesheet.java +347 -0
  40. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  41. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
  42. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +20 -0
  43. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  44. data/ext/java/nokogiri/internals/NokogiriDomParser.java +116 -0
  45. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +121 -0
  46. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +69 -0
  47. data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
  48. data/ext/java/nokogiri/internals/NokogiriHelpers.java +734 -0
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +217 -0
  50. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +127 -0
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +100 -0
  52. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  53. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +180 -0
  55. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +72 -0
  56. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +60 -0
  57. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +87 -0
  58. data/ext/java/nokogiri/internals/ParserContext.java +259 -0
  59. data/ext/java/nokogiri/internals/ReaderNode.java +488 -0
  60. data/ext/java/nokogiri/internals/SaveContextVisitor.java +778 -0
  61. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +73 -0
  62. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +168 -0
  63. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  64. data/ext/java/nokogiri/internals/XmlDomParserContext.java +274 -0
  65. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  66. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +119 -0
  67. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +159 -0
  68. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +37 -0
  69. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +93 -0
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +252 -0
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +639 -0
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +38 -0
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +38 -0
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +367 -0
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +295 -0
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +40 -0
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +44 -0
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +44 -0
  79. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +43 -0
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +630 -0
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +173 -0
  82. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +76 -0
  83. data/ext/java/nokogiri/internals/c14n/Constants.java +42 -0
  84. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +293 -0
  85. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +93 -0
  86. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +79 -0
  87. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +166 -0
  88. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +76 -0
  89. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +402 -0
  90. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +51 -0
  91. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +179 -0
  92. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +507 -0
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1745 -0
  94. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +685 -0
  95. data/ext/nokogiri/depend +477 -0
  96. data/ext/nokogiri/extconf.rb +836 -0
  97. data/ext/nokogiri/html_document.c +171 -0
  98. data/ext/nokogiri/html_document.h +10 -0
  99. data/ext/nokogiri/html_element_description.c +279 -0
  100. data/ext/nokogiri/html_element_description.h +10 -0
  101. data/ext/nokogiri/html_entity_lookup.c +32 -0
  102. data/ext/nokogiri/html_entity_lookup.h +8 -0
  103. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  104. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  105. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  106. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  107. data/ext/nokogiri/nokogiri.c +135 -0
  108. data/ext/nokogiri/nokogiri.h +130 -0
  109. data/ext/nokogiri/xml_attr.c +103 -0
  110. data/ext/nokogiri/xml_attr.h +9 -0
  111. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  112. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  113. data/ext/nokogiri/xml_cdata.c +62 -0
  114. data/ext/nokogiri/xml_cdata.h +9 -0
  115. data/ext/nokogiri/xml_comment.c +69 -0
  116. data/ext/nokogiri/xml_comment.h +9 -0
  117. data/ext/nokogiri/xml_document.c +622 -0
  118. data/ext/nokogiri/xml_document.h +23 -0
  119. data/ext/nokogiri/xml_document_fragment.c +48 -0
  120. data/ext/nokogiri/xml_document_fragment.h +10 -0
  121. data/ext/nokogiri/xml_dtd.c +202 -0
  122. data/ext/nokogiri/xml_dtd.h +10 -0
  123. data/ext/nokogiri/xml_element_content.c +123 -0
  124. data/ext/nokogiri/xml_element_content.h +10 -0
  125. data/ext/nokogiri/xml_element_decl.c +69 -0
  126. data/ext/nokogiri/xml_element_decl.h +9 -0
  127. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  128. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  129. data/ext/nokogiri/xml_entity_decl.c +110 -0
  130. data/ext/nokogiri/xml_entity_decl.h +10 -0
  131. data/ext/nokogiri/xml_entity_reference.c +52 -0
  132. data/ext/nokogiri/xml_entity_reference.h +9 -0
  133. data/ext/nokogiri/xml_io.c +63 -0
  134. data/ext/nokogiri/xml_io.h +11 -0
  135. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  136. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  137. data/ext/nokogiri/xml_namespace.c +111 -0
  138. data/ext/nokogiri/xml_namespace.h +14 -0
  139. data/ext/nokogiri/xml_node.c +1773 -0
  140. data/ext/nokogiri/xml_node.h +13 -0
  141. data/ext/nokogiri/xml_node_set.c +486 -0
  142. data/ext/nokogiri/xml_node_set.h +12 -0
  143. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  144. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  145. data/ext/nokogiri/xml_reader.c +657 -0
  146. data/ext/nokogiri/xml_reader.h +10 -0
  147. data/ext/nokogiri/xml_relax_ng.c +179 -0
  148. data/ext/nokogiri/xml_relax_ng.h +9 -0
  149. data/ext/nokogiri/xml_sax_parser.c +305 -0
  150. data/ext/nokogiri/xml_sax_parser.h +39 -0
  151. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  152. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  153. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  154. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  155. data/ext/nokogiri/xml_schema.c +276 -0
  156. data/ext/nokogiri/xml_schema.h +9 -0
  157. data/ext/nokogiri/xml_syntax_error.c +64 -0
  158. data/ext/nokogiri/xml_syntax_error.h +13 -0
  159. data/ext/nokogiri/xml_text.c +52 -0
  160. data/ext/nokogiri/xml_text.h +9 -0
  161. data/ext/nokogiri/xml_xpath_context.c +374 -0
  162. data/ext/nokogiri/xml_xpath_context.h +10 -0
  163. data/ext/nokogiri/xslt_stylesheet.c +263 -0
  164. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  165. data/lib/isorelax.jar +0 -0
  166. data/lib/jing.jar +0 -0
  167. data/lib/nekodtd.jar +0 -0
  168. data/lib/nekohtml.jar +0 -0
  169. data/lib/nokogiri/css/node.rb +53 -0
  170. data/lib/nokogiri/css/parser.rb +751 -0
  171. data/lib/nokogiri/css/parser.y +272 -0
  172. data/lib/nokogiri/css/parser_extras.rb +94 -0
  173. data/lib/nokogiri/css/syntax_error.rb +8 -0
  174. data/lib/nokogiri/css/tokenizer.rb +154 -0
  175. data/lib/nokogiri/css/tokenizer.rex +55 -0
  176. data/lib/nokogiri/css/xpath_visitor.rb +260 -0
  177. data/lib/nokogiri/css.rb +28 -0
  178. data/lib/nokogiri/decorators/slop.rb +43 -0
  179. data/lib/nokogiri/html/builder.rb +36 -0
  180. data/lib/nokogiri/html/document.rb +322 -0
  181. data/lib/nokogiri/html/document_fragment.rb +50 -0
  182. data/lib/nokogiri/html/element_description.rb +24 -0
  183. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  184. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  185. data/lib/nokogiri/html/sax/parser.rb +63 -0
  186. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  187. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  188. data/lib/nokogiri/html.rb +38 -0
  189. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  190. data/lib/nokogiri/syntax_error.rb +5 -0
  191. data/lib/nokogiri/version/constant.rb +5 -0
  192. data/lib/nokogiri/version/info.rb +182 -0
  193. data/lib/nokogiri/version.rb +3 -0
  194. data/lib/nokogiri/xml/attr.rb +15 -0
  195. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  196. data/lib/nokogiri/xml/builder.rb +447 -0
  197. data/lib/nokogiri/xml/cdata.rb +12 -0
  198. data/lib/nokogiri/xml/character_data.rb +8 -0
  199. data/lib/nokogiri/xml/document.rb +290 -0
  200. data/lib/nokogiri/xml/document_fragment.rb +159 -0
  201. data/lib/nokogiri/xml/dtd.rb +33 -0
  202. data/lib/nokogiri/xml/element_content.rb +37 -0
  203. data/lib/nokogiri/xml/element_decl.rb +14 -0
  204. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  205. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  206. data/lib/nokogiri/xml/namespace.rb +14 -0
  207. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  208. data/lib/nokogiri/xml/node.rb +1240 -0
  209. data/lib/nokogiri/xml/node_set.rb +372 -0
  210. data/lib/nokogiri/xml/notation.rb +7 -0
  211. data/lib/nokogiri/xml/parse_options.rb +127 -0
  212. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  213. data/lib/nokogiri/xml/pp/node.rb +57 -0
  214. data/lib/nokogiri/xml/pp.rb +3 -0
  215. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  216. data/lib/nokogiri/xml/reader.rb +116 -0
  217. data/lib/nokogiri/xml/relax_ng.rb +37 -0
  218. data/lib/nokogiri/xml/sax/document.rb +172 -0
  219. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  220. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  221. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  222. data/lib/nokogiri/xml/sax.rb +5 -0
  223. data/lib/nokogiri/xml/schema.rb +72 -0
  224. data/lib/nokogiri/xml/searchable.rb +239 -0
  225. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  226. data/lib/nokogiri/xml/text.rb +10 -0
  227. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  228. data/lib/nokogiri/xml/xpath.rb +11 -0
  229. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  230. data/lib/nokogiri/xml.rb +76 -0
  231. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  232. data/lib/nokogiri/xslt.rb +57 -0
  233. data/lib/nokogiri.rb +144 -0
  234. data/lib/serializer.jar +0 -0
  235. data/lib/xalan.jar +0 -0
  236. data/lib/xercesImpl.jar +0 -0
  237. data/lib/xml-apis.jar +0 -0
  238. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  239. metadata +531 -0
@@ -0,0 +1,633 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2014:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com]
9
+ * * {Sergio Arbeo}[http://www.serabe.com]
10
+ * * {Patrick Mahoney}[http://polycrystal.org]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri;
34
+
35
+ import static nokogiri.internals.NokogiriHelpers.clearXpathContext;
36
+ import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate;
37
+ import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
38
+ import static nokogiri.internals.NokogiriHelpers.isNamespace;
39
+ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
40
+ import static nokogiri.internals.NokogiriHelpers.stringOrNil;
41
+
42
+ import java.util.List;
43
+
44
+ import javax.xml.parsers.DocumentBuilderFactory;
45
+ import javax.xml.parsers.ParserConfigurationException;
46
+
47
+ import org.jcodings.specific.USASCIIEncoding;
48
+ import org.jcodings.specific.UTF8Encoding;
49
+ import org.jruby.Ruby;
50
+ import org.jruby.RubyArray;
51
+ import org.jruby.RubyClass;
52
+ import org.jruby.RubyFixnum;
53
+ import org.jruby.RubyString;
54
+ import org.jruby.anno.JRubyClass;
55
+ import org.jruby.anno.JRubyMethod;
56
+ import org.jruby.exceptions.RaiseException;
57
+ import org.jruby.javasupport.JavaUtil;
58
+ import org.jruby.runtime.Block;
59
+ import org.jruby.runtime.Helpers;
60
+ import org.jruby.runtime.ThreadContext;
61
+ import org.jruby.runtime.Visibility;
62
+ import org.jruby.runtime.builtin.IRubyObject;
63
+ import org.jruby.util.ByteList;
64
+ import org.w3c.dom.Attr;
65
+ import org.w3c.dom.Document;
66
+ import org.w3c.dom.DocumentType;
67
+ import org.w3c.dom.NamedNodeMap;
68
+ import org.w3c.dom.Node;
69
+ import org.w3c.dom.NodeList;
70
+
71
+ import nokogiri.internals.NokogiriHelpers;
72
+ import nokogiri.internals.NokogiriNamespaceCache;
73
+ import nokogiri.internals.SaveContextVisitor;
74
+ import nokogiri.internals.XmlDomParserContext;
75
+ import nokogiri.internals.c14n.CanonicalFilter;
76
+ import nokogiri.internals.c14n.CanonicalizationException;
77
+ import nokogiri.internals.c14n.Canonicalizer;
78
+
79
+ /**
80
+ * Class for Nokogiri::XML::Document
81
+ *
82
+ * @author sergio
83
+ * @author Yoko Harada <yokolet@gmail.com>
84
+ * @author John Shahid <jvshahid@gmail.com>
85
+ */
86
+
87
+ @JRubyClass(name="Nokogiri::XML::Document", parent="Nokogiri::XML::Node")
88
+ public class XmlDocument extends XmlNode {
89
+ private NokogiriNamespaceCache nsCache;
90
+
91
+ /* UserData keys for storing extra info in the document node. */
92
+ public final static String DTD_RAW_DOCUMENT = "DTD_RAW_DOCUMENT";
93
+ public final static String DTD_INTERNAL_SUBSET = "DTD_INTERNAL_SUBSET";
94
+ public final static String DTD_EXTERNAL_SUBSET = "DTD_EXTERNAL_SUBSET";
95
+
96
+ /* DocumentBuilderFactory implementation class name. This needs to set a classloader into it.
97
+ * Setting an appropriate classloader resolves issue 380.
98
+ */
99
+ private static final String DOCUMENTBUILDERFACTORY_IMPLE_NAME = "org.apache.xerces.jaxp.DocumentBuilderFactoryImpl";
100
+
101
+ private static final ByteList DOCUMENT = ByteList.create("document");
102
+ static { DOCUMENT.setEncoding(USASCIIEncoding.INSTANCE); }
103
+
104
+ private static boolean substituteEntities = false;
105
+ private static boolean loadExternalSubset = false; // TODO: Verify this.
106
+
107
+ /** cache variables */
108
+ protected IRubyObject encoding;
109
+ protected IRubyObject url;
110
+
111
+ public XmlDocument(Ruby runtime, RubyClass klazz) {
112
+ super(runtime, klazz, createNewDocument(runtime));
113
+ }
114
+
115
+ public XmlDocument(Ruby runtime, Document document) {
116
+ this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document);
117
+ }
118
+
119
+ public XmlDocument(Ruby runtime, RubyClass klass, Document document) {
120
+ super(runtime, klass, document);
121
+ init(runtime, document);
122
+ }
123
+
124
+ void init(Ruby runtime, Document document) {
125
+ stabilizeTextContent(document);
126
+ if (document.getDocumentElement() != null) {
127
+ createAndCacheNamespaces(runtime, document.getDocumentElement());
128
+ }
129
+ setInstanceVariable("@decorators", runtime.getNil());
130
+ }
131
+
132
+ public final void setDocumentNode(Ruby runtime, Document node) {
133
+ super.setNode(runtime, node);
134
+ if (node != null) init(runtime, node);
135
+ else setInstanceVariable("@decorators", runtime.getNil());
136
+ }
137
+
138
+ public void setEncoding(IRubyObject encoding) {
139
+ this.encoding = encoding;
140
+ }
141
+
142
+ public IRubyObject getEncoding() {
143
+ return encoding;
144
+ }
145
+
146
+ // not sure, but like attribute values, text value will be lost
147
+ // unless it is referred once before this document is used.
148
+ // this seems to happen only when the fragment is parsed from Node#in_context.
149
+ protected static void stabilizeTextContent(Document document) {
150
+ if (document.getDocumentElement() != null) document.getDocumentElement().getTextContent();
151
+ }
152
+
153
+ private static void createAndCacheNamespaces(Ruby runtime, Node node) {
154
+ if (node.hasAttributes()) {
155
+ NamedNodeMap nodeMap = node.getAttributes();
156
+ for (int i=0; i<nodeMap.getLength(); i++) {
157
+ Node n = nodeMap.item(i);
158
+ if (n instanceof Attr) {
159
+ Attr attr = (Attr) n;
160
+ stabilizeAttr(attr);
161
+ if (isNamespace(attr.getName())) {
162
+ // create and cache
163
+ XmlNamespace.createFromAttr(runtime, attr);
164
+ }
165
+ }
166
+ }
167
+ }
168
+ NodeList children = node.getChildNodes();
169
+ for (int i=0; i<children.getLength(); i++) {
170
+ createAndCacheNamespaces(runtime, children.item(i));
171
+ }
172
+ }
173
+
174
+ static void stabilizeAttr(final Attr attr) {
175
+ // TODO not sure, but need to get value always before document is referred or lose attribute value
176
+ attr.getValue(); // don't delete this line
177
+ }
178
+
179
+ // When a document is created from fragment with a context (reference) document,
180
+ // namespace should be resolved based on the context document.
181
+ public XmlDocument(Ruby ruby, RubyClass klass, Document document, XmlDocument contextDoc) {
182
+ super(ruby, klass, document);
183
+ nsCache = contextDoc.getNamespaceCache();
184
+ String default_href = nsCache.getDefault().getHref();
185
+ resolveNamespaceIfNecessary(document.getDocumentElement(), default_href);
186
+ }
187
+
188
+ private void resolveNamespaceIfNecessary(Node node, String default_href) {
189
+ if (node == null) return;
190
+ String nodePrefix = node.getPrefix();
191
+ if (nodePrefix == null) { // default namespace
192
+ NokogiriHelpers.renameNode(node, default_href, node.getNodeName());
193
+ } else {
194
+ String href = getNamespaceCache().get(node, nodePrefix).getHref();
195
+ NokogiriHelpers.renameNode(node, href, node.getNodeName());
196
+ }
197
+ resolveNamespaceIfNecessary(node.getNextSibling(), default_href);
198
+ NodeList children = node.getChildNodes();
199
+ for (int i=0; i<children.getLength(); i++) {
200
+ resolveNamespaceIfNecessary(children.item(i), default_href);
201
+ }
202
+ }
203
+
204
+ public NokogiriNamespaceCache getNamespaceCache() {
205
+ if (nsCache == null) nsCache = new NokogiriNamespaceCache();
206
+ return nsCache;
207
+ }
208
+
209
+ public Document getDocument() {
210
+ return (Document) node;
211
+ }
212
+
213
+ @Override
214
+ protected IRubyObject getNodeName(ThreadContext context) {
215
+ if (name == null) name = RubyString.newStringShared(context.runtime, DOCUMENT);
216
+ return name;
217
+ }
218
+
219
+ public void setUrl(IRubyObject url) {
220
+ this.url = url;
221
+ }
222
+
223
+ protected IRubyObject getUrl() {
224
+ return this.url;
225
+ }
226
+
227
+ @JRubyMethod
228
+ public IRubyObject url(ThreadContext context) {
229
+ return getUrl();
230
+ }
231
+
232
+ public static Document createNewDocument(final Ruby runtime) {
233
+ try {
234
+ return DocumentBuilderFactoryHolder.INSTANCE.newDocumentBuilder().newDocument();
235
+ } catch (ParserConfigurationException e) {
236
+ throw asRuntimeError(runtime, null, e);
237
+ }
238
+ }
239
+
240
+ private static class DocumentBuilderFactoryHolder {
241
+ static final DocumentBuilderFactory INSTANCE;
242
+ static {
243
+ INSTANCE = DocumentBuilderFactory.newInstance(DOCUMENTBUILDERFACTORY_IMPLE_NAME, NokogiriService.class.getClassLoader());
244
+ }
245
+ }
246
+
247
+ static RaiseException asRuntimeError(Ruby runtime, String message, Exception cause) {
248
+ if (cause instanceof RaiseException) return (RaiseException) cause;
249
+
250
+ if (message == null) message = cause.toString();
251
+ else message = message + '(' + cause.toString() + ')';
252
+ RaiseException ex = runtime.newRuntimeError(message);
253
+ ex.initCause(cause);
254
+ return ex;
255
+ }
256
+
257
+ /*
258
+ * call-seq:
259
+ * new(version = default)
260
+ *
261
+ * Create a new document with +version+ (defaults to "1.0")
262
+ */
263
+ @JRubyMethod(name="new", meta = true, rest = true, required=0)
264
+ public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyObject[] args) {
265
+ final Ruby runtime = context.runtime;
266
+ XmlDocument xmlDocument;
267
+ try {
268
+ Document docNode = createNewDocument(runtime);
269
+ if ("Nokogiri::HTML::Document".equals(((RubyClass)klazz).getName())) {
270
+ xmlDocument = new HtmlDocument(context.runtime, (RubyClass) klazz, docNode);
271
+ } else {
272
+ xmlDocument = new XmlDocument(context.runtime, (RubyClass) klazz, docNode);
273
+ }
274
+ } catch (Exception ex) {
275
+ throw asRuntimeError(runtime, "couldn't create document: ", ex);
276
+ }
277
+
278
+ Helpers.invoke(context, xmlDocument, "initialize", args);
279
+
280
+ return xmlDocument;
281
+ }
282
+
283
+ @JRubyMethod(required=1, optional=4)
284
+ public IRubyObject create_entity(ThreadContext context, IRubyObject[] argv) {
285
+ // FIXME: Entity node should be create by some right way.
286
+ // this impl passes tests, but entity doesn't exists in DTD, which
287
+ // would cause validation failure.
288
+ if (argv.length == 0) throw context.runtime.newRuntimeError("Could not create entity");
289
+ String tagName = rubyStringToString(argv[0]);
290
+ Node node = getOwnerDocument().createElement(tagName);
291
+ return XmlEntityDecl.create(context, node, argv);
292
+ }
293
+
294
+ @Override
295
+ XmlDocument document(Ruby runtime) {
296
+ return this;
297
+ }
298
+
299
+ @JRubyMethod(name="encoding=")
300
+ public IRubyObject encoding_set(IRubyObject encoding) {
301
+ this.encoding = encoding;
302
+ return this;
303
+ }
304
+
305
+ @JRubyMethod
306
+ public IRubyObject encoding(ThreadContext context) {
307
+ if (this.encoding == null || this.encoding.isNil()) {
308
+ final String enc = getDocument().getXmlEncoding();
309
+ if (enc == null) {
310
+ this.encoding = context.nil;
311
+ } else {
312
+ this.encoding = context.runtime.newString(enc);
313
+ }
314
+ }
315
+
316
+ return this.encoding.isNil() ? this.encoding : this.encoding.asString().encode(context, context.getRuntime().newString("UTF-8"));
317
+ }
318
+
319
+ @JRubyMethod(meta = true)
320
+ public static IRubyObject load_external_subsets_set(ThreadContext context, IRubyObject cls, IRubyObject value) {
321
+ XmlDocument.loadExternalSubset = value.isTrue();
322
+ return context.nil;
323
+ }
324
+
325
+ @JRubyMethod(meta = true, required = 4)
326
+ public static IRubyObject read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args) {
327
+ XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[2], args[3]);
328
+ ctx.setIOInputSource(context, args[0], args[1]);
329
+ return ctx.parse(context, (RubyClass) klass, args[1]);
330
+ }
331
+
332
+ @JRubyMethod(meta = true, required = 4)
333
+ public static IRubyObject read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args) {
334
+ XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[2], args[3]);
335
+ ctx.setStringInputSource(context, args[0], args[1]);
336
+ return ctx.parse(context, (RubyClass) klass, args[1]);
337
+ }
338
+
339
+ @JRubyMethod(name="remove_namespaces!")
340
+ public IRubyObject remove_namespaces(ThreadContext context) {
341
+ removeNamespaceRecursively(this);
342
+ if (nsCache != null) nsCache.clear();
343
+ clearXpathContext(getNode());
344
+ return this;
345
+ }
346
+
347
+ private void removeNamespaceRecursively(XmlNode xmlNode) {
348
+ Node node = xmlNode.node;
349
+ if (node.getNodeType() == Node.ELEMENT_NODE) {
350
+ node.setPrefix(null);
351
+ NokogiriHelpers.renameNode(node, null, node.getLocalName());
352
+ NamedNodeMap attrs = node.getAttributes();
353
+ for (int i=0; i<attrs.getLength(); i++) {
354
+ Attr attr = (Attr) attrs.item(i);
355
+ if (isNamespace(attr.getNodeName())) {
356
+ ((org.w3c.dom.Element) node).removeAttributeNode(attr);
357
+ } else {
358
+ attr.setPrefix(null);
359
+ NokogiriHelpers.renameNode(attr, null, attr.getLocalName());
360
+ }
361
+ }
362
+ }
363
+ IRubyObject[] nodes = xmlNode.getChildren();
364
+ for (int i=0; i < nodes.length; i++) {
365
+ XmlNode childNode = (XmlNode) nodes[i];
366
+ removeNamespaceRecursively(childNode);
367
+ }
368
+ }
369
+
370
+ @JRubyMethod
371
+ public IRubyObject root(ThreadContext context) {
372
+ Node rootNode = getDocument().getDocumentElement();
373
+ if (rootNode == null) return context.nil;
374
+
375
+ Object invalid = rootNode.getUserData(NokogiriHelpers.ROOT_NODE_INVALID);
376
+ if (invalid != null && ((Boolean) invalid)) return context.nil;
377
+
378
+ return getCachedNodeOrCreate(context.runtime, rootNode);
379
+ }
380
+
381
+ protected IRubyObject dup_implementation(Ruby runtime, boolean deep) {
382
+ XmlDocument doc = (XmlDocument) super.dup_implementation(runtime, deep);
383
+ // Avoid creating a new XmlDocument since we cloned one
384
+ // already. Otherwise the following test will fail:
385
+ //
386
+ // dup = doc.dup
387
+ // dup.equal?(dup.children[0].document)
388
+ //
389
+ // Since `dup.children[0].document' will end up creating a new
390
+ // XmlDocument. See #1060.
391
+ doc.resetCache();
392
+ return doc;
393
+ }
394
+
395
+ @JRubyMethod(name="root=")
396
+ public IRubyObject root_set(ThreadContext context, IRubyObject new_root) {
397
+ // in case of document fragment, temporary root node should be deleted.
398
+
399
+ // Java can't have a root whose value is null. Instead of setting null,
400
+ // the method sets user data so that other methods are able to know the root
401
+ // should be nil.
402
+ if (new_root == context.nil) {
403
+ getDocument().getDocumentElement().setUserData(NokogiriHelpers.ROOT_NODE_INVALID, Boolean.TRUE, null);
404
+ return new_root;
405
+ }
406
+ XmlNode newRoot = asXmlNode(context, new_root);
407
+
408
+ IRubyObject root = root(context);
409
+ if (root.isNil()) {
410
+ Node newRootNode;
411
+ if (getDocument() == newRoot.getOwnerDocument()) {
412
+ newRootNode = newRoot.node;
413
+ } else {
414
+ // must copy otherwise newRoot may exist in two places
415
+ // with different owner document.
416
+ newRootNode = getDocument().importNode(newRoot.node, true);
417
+ }
418
+ add_child_node(context, getCachedNodeOrCreate(context.runtime, newRootNode));
419
+ } else {
420
+ Node rootNode = asXmlNode(context, root).node;
421
+ ((XmlNode) getCachedNodeOrCreate(context.runtime, rootNode)).replace_node(context, newRoot);
422
+ }
423
+
424
+ return newRoot;
425
+ }
426
+
427
+ @JRubyMethod
428
+ public IRubyObject version(ThreadContext context) {
429
+ return stringOrNil(context.runtime, getDocument().getXmlVersion());
430
+ }
431
+
432
+ @JRubyMethod(meta = true)
433
+ public static IRubyObject substitute_entities_set(ThreadContext context, IRubyObject cls, IRubyObject value) {
434
+ XmlDocument.substituteEntities = value.isTrue();
435
+ return context.nil;
436
+ }
437
+
438
+ public IRubyObject getInternalSubset(ThreadContext context) {
439
+ IRubyObject dtd = (IRubyObject) node.getUserData(DTD_INTERNAL_SUBSET);
440
+
441
+ if (dtd == null) {
442
+ Document document = getDocument();
443
+ if (document.getUserData(XmlDocument.DTD_RAW_DOCUMENT) != null) {
444
+ dtd = XmlDtd.newFromInternalSubset(context.runtime, document);
445
+ } else if (document.getDoctype() != null) {
446
+ DocumentType docType = document.getDoctype();
447
+ IRubyObject name, publicId, systemId;
448
+ name = publicId = systemId = context.nil;
449
+ if (docType.getName() != null) {
450
+ name = context.runtime.newString(docType.getName());
451
+ }
452
+ if (docType.getPublicId() != null) {
453
+ publicId = context.runtime.newString(docType.getPublicId());
454
+ }
455
+ if (docType.getSystemId() != null) {
456
+ systemId = context.runtime.newString(docType.getSystemId());
457
+ }
458
+ dtd = XmlDtd.newEmpty(context.runtime, document, name, publicId, systemId);
459
+ } else {
460
+ dtd = context.nil;
461
+ }
462
+
463
+ setInternalSubset(dtd);
464
+ }
465
+
466
+ return dtd;
467
+ }
468
+
469
+ /**
470
+ * Assumes XmlNode#internal_subset() has returned nil. (i.e. there
471
+ * is not already an internal subset).
472
+ */
473
+ public IRubyObject createInternalSubset(ThreadContext context,
474
+ IRubyObject name,
475
+ IRubyObject external_id,
476
+ IRubyObject system_id) {
477
+ XmlDtd dtd = XmlDtd.newEmpty(context.runtime, getDocument(), name, external_id, system_id);
478
+ setInternalSubset(dtd);
479
+ return dtd;
480
+ }
481
+
482
+ protected void setInternalSubset(IRubyObject data) {
483
+ node.setUserData(DTD_INTERNAL_SUBSET, data, null);
484
+ }
485
+
486
+ public IRubyObject getExternalSubset(ThreadContext context) {
487
+ IRubyObject dtd = (IRubyObject) node.getUserData(DTD_EXTERNAL_SUBSET);
488
+
489
+ if (dtd == null) return context.nil;
490
+ return dtd;
491
+ }
492
+
493
+ /**
494
+ * Assumes XmlNode#external_subset() has returned nil. (i.e. there
495
+ * is not already an external subset).
496
+ */
497
+ public IRubyObject createExternalSubset(ThreadContext context,
498
+ IRubyObject name,
499
+ IRubyObject external_id,
500
+ IRubyObject system_id) {
501
+ XmlDtd dtd = XmlDtd.newEmpty(context.runtime, getDocument(), name, external_id, system_id);
502
+ setExternalSubset(dtd);
503
+ return dtd;
504
+ }
505
+
506
+ protected void setExternalSubset(IRubyObject data) {
507
+ node.setUserData(DTD_EXTERNAL_SUBSET, data, null);
508
+ }
509
+
510
+ @Override
511
+ public void accept(ThreadContext context, SaveContextVisitor visitor) {
512
+ Document document = getDocument();
513
+ visitor.enter(document);
514
+ NodeList children = document.getChildNodes();
515
+ for (int i=0; i<children.getLength(); i++) {
516
+ Node child = children.item(i);
517
+ short type = child.getNodeType();
518
+ if (type == Node.COMMENT_NODE) {
519
+ XmlComment xmlComment = (XmlComment) getCachedNodeOrCreate(context.runtime, child);
520
+ xmlComment.accept(context, visitor);
521
+ } else if (type == Node.DOCUMENT_TYPE_NODE) {
522
+ XmlDtd xmlDtd = (XmlDtd) getCachedNodeOrCreate(context.runtime, child);
523
+ xmlDtd.accept(context, visitor);
524
+ } else if (type == Node.PROCESSING_INSTRUCTION_NODE) {
525
+ XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) getCachedNodeOrCreate(context.runtime, child);
526
+ xmlProcessingInstruction.accept(context, visitor);
527
+ } else if (type == Node.TEXT_NODE) {
528
+ XmlText xmlText = (XmlText) getCachedNodeOrCreate(context.runtime, child);
529
+ xmlText.accept(context, visitor);
530
+ } else if (type == Node.ELEMENT_NODE) {
531
+ XmlElement xmlElement = (XmlElement) getCachedNodeOrCreate(context.runtime, child);
532
+ xmlElement.accept(context, visitor);
533
+ }
534
+ }
535
+ visitor.leave(document);
536
+ }
537
+
538
+ @JRubyMethod(meta = true)
539
+ public static IRubyObject wrap(ThreadContext context, IRubyObject klass, IRubyObject arg) {
540
+ XmlDocument xmlDocument = new XmlDocument(context.runtime, (RubyClass) klass, (Document) arg.toJava(Document.class));
541
+ Helpers.invoke(context, xmlDocument, "initialize");
542
+ return xmlDocument;
543
+ }
544
+
545
+ @Deprecated
546
+ @JRubyMethod(meta = true, visibility = Visibility.PRIVATE)
547
+ public static IRubyObject wrapJavaDocument(ThreadContext context, IRubyObject klass, IRubyObject arg) {
548
+ return wrap(context, klass, arg);
549
+ }
550
+
551
+ @Deprecated // default to_java works (due inherited from XmlNode#toJava)
552
+ @JRubyMethod(visibility = Visibility.PRIVATE)
553
+ public IRubyObject toJavaDocument(ThreadContext context) {
554
+ return JavaUtil.convertJavaToUsableRubyObject(context.getRuntime(), node);
555
+ }
556
+
557
+ /* call-seq:
558
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
559
+ * doc.canonicalize { |obj, parent| ... }
560
+ *
561
+ * Canonicalize a document and return the results. Takes an optional block
562
+ * that takes two parameters: the +obj+ and that node's +parent+.
563
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
564
+ * The block must return a non-nil, non-false value if the +obj+ passed in
565
+ * should be included in the canonicalized document.
566
+ */
567
+ @JRubyMethod(optional=3)
568
+ public IRubyObject canonicalize(ThreadContext context, IRubyObject[] args, Block block) {
569
+ int mode = 0;
570
+ String inclusive_namespace = null;
571
+ Boolean with_comments = false;
572
+ if (args.length > 0 && !(args[0].isNil())) {
573
+ mode = RubyFixnum.fix2int(args[0]);
574
+ }
575
+ if (args.length > 1 ) {
576
+ if (!args[1].isNil() && !(args[1] instanceof List)) {
577
+ throw context.runtime.newTypeError("Expected array");
578
+ }
579
+ if (!args[1].isNil()) {
580
+ inclusive_namespace = ((RubyArray)args[1])
581
+ .join(context, context.runtime.newString(" "))
582
+ .asString()
583
+ .asJavaString(); // OMG I wish I knew JRuby better, this is ugly
584
+ }
585
+ }
586
+ if (args.length > 2) {
587
+ with_comments = args[2].isTrue();
588
+ }
589
+ String algorithmURI = null;
590
+ switch(mode) {
591
+ case 0: // XML_C14N_1_0
592
+ if (with_comments) algorithmURI = Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS;
593
+ else algorithmURI = Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS;
594
+ break;
595
+ case 1: // XML_C14N_EXCLUSIVE_1_0
596
+ if (with_comments) algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS;
597
+ else algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS;
598
+ break;
599
+ case 2: // XML_C14N_1_1 = 2
600
+ if (with_comments) algorithmURI = Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS;
601
+ else algorithmURI = Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS;
602
+ }
603
+ try {
604
+ Canonicalizer canonicalizer = Canonicalizer.getInstance(algorithmURI);
605
+ XmlNode startingNode = getStartingNode(block);
606
+ byte[] result;
607
+ CanonicalFilter filter = new CanonicalFilter(context, block);
608
+ if (inclusive_namespace == null) {
609
+ result = canonicalizer.canonicalizeSubtree(startingNode.getNode(), filter);
610
+ } else {
611
+ result = canonicalizer.canonicalizeSubtree(startingNode.getNode(), inclusive_namespace, filter);
612
+ }
613
+ return RubyString.newString(context.runtime, new ByteList(result, UTF8Encoding.INSTANCE));
614
+ } catch (CanonicalizationException e) {
615
+ // TODO Auto-generated catch block
616
+ e.printStackTrace();
617
+ }
618
+ return context.nil;
619
+ }
620
+
621
+ private XmlNode getStartingNode(Block block) {
622
+ if (block.isGiven()) {
623
+ IRubyObject boundSelf = block.getBinding().getSelf();
624
+ if (boundSelf instanceof XmlNode) return (XmlNode) boundSelf;
625
+ }
626
+ return this;
627
+ }
628
+
629
+ public void resetNamespaceCache(ThreadContext context) {
630
+ nsCache = new NokogiriNamespaceCache();
631
+ createAndCacheNamespaces(context.runtime, node);
632
+ }
633
+ }