nokogiri-backport 1.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (239) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1682 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +272 -0
  6. data/bin/nokogiri +118 -0
  7. data/dependencies.yml +74 -0
  8. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +178 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +148 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +282 -0
  13. data/ext/java/nokogiri/HtmlSaxPushParser.java +222 -0
  14. data/ext/java/nokogiri/NokogiriService.java +597 -0
  15. data/ext/java/nokogiri/XmlAttr.java +162 -0
  16. data/ext/java/nokogiri/XmlAttributeDecl.java +129 -0
  17. data/ext/java/nokogiri/XmlCdata.java +82 -0
  18. data/ext/java/nokogiri/XmlComment.java +97 -0
  19. data/ext/java/nokogiri/XmlDocument.java +633 -0
  20. data/ext/java/nokogiri/XmlDocumentFragment.java +185 -0
  21. data/ext/java/nokogiri/XmlDtd.java +481 -0
  22. data/ext/java/nokogiri/XmlElement.java +68 -0
  23. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  24. data/ext/java/nokogiri/XmlElementDecl.java +147 -0
  25. data/ext/java/nokogiri/XmlEntityDecl.java +157 -0
  26. data/ext/java/nokogiri/XmlEntityReference.java +101 -0
  27. data/ext/java/nokogiri/XmlNamespace.java +199 -0
  28. data/ext/java/nokogiri/XmlNode.java +1684 -0
  29. data/ext/java/nokogiri/XmlNodeSet.java +434 -0
  30. data/ext/java/nokogiri/XmlProcessingInstruction.java +100 -0
  31. data/ext/java/nokogiri/XmlReader.java +531 -0
  32. data/ext/java/nokogiri/XmlRelaxng.java +151 -0
  33. data/ext/java/nokogiri/XmlSaxParserContext.java +374 -0
  34. data/ext/java/nokogiri/XmlSaxPushParser.java +286 -0
  35. data/ext/java/nokogiri/XmlSchema.java +388 -0
  36. data/ext/java/nokogiri/XmlSyntaxError.java +138 -0
  37. data/ext/java/nokogiri/XmlText.java +110 -0
  38. data/ext/java/nokogiri/XmlXpathContext.java +301 -0
  39. data/ext/java/nokogiri/XsltStylesheet.java +347 -0
  40. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  41. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
  42. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +20 -0
  43. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  44. data/ext/java/nokogiri/internals/NokogiriDomParser.java +116 -0
  45. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +121 -0
  46. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +69 -0
  47. data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
  48. data/ext/java/nokogiri/internals/NokogiriHelpers.java +734 -0
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +217 -0
  50. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +127 -0
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +100 -0
  52. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  53. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +180 -0
  55. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +72 -0
  56. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +60 -0
  57. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +87 -0
  58. data/ext/java/nokogiri/internals/ParserContext.java +259 -0
  59. data/ext/java/nokogiri/internals/ReaderNode.java +488 -0
  60. data/ext/java/nokogiri/internals/SaveContextVisitor.java +778 -0
  61. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +73 -0
  62. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +168 -0
  63. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  64. data/ext/java/nokogiri/internals/XmlDomParserContext.java +274 -0
  65. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  66. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +119 -0
  67. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +159 -0
  68. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +37 -0
  69. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +93 -0
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +252 -0
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +639 -0
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +38 -0
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +38 -0
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +367 -0
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +295 -0
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +40 -0
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +44 -0
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +44 -0
  79. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +43 -0
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +630 -0
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +173 -0
  82. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +76 -0
  83. data/ext/java/nokogiri/internals/c14n/Constants.java +42 -0
  84. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +293 -0
  85. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +93 -0
  86. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +79 -0
  87. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +166 -0
  88. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +76 -0
  89. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +402 -0
  90. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +51 -0
  91. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +179 -0
  92. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +507 -0
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1745 -0
  94. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +685 -0
  95. data/ext/nokogiri/depend +477 -0
  96. data/ext/nokogiri/extconf.rb +836 -0
  97. data/ext/nokogiri/html_document.c +171 -0
  98. data/ext/nokogiri/html_document.h +10 -0
  99. data/ext/nokogiri/html_element_description.c +279 -0
  100. data/ext/nokogiri/html_element_description.h +10 -0
  101. data/ext/nokogiri/html_entity_lookup.c +32 -0
  102. data/ext/nokogiri/html_entity_lookup.h +8 -0
  103. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  104. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  105. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  106. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  107. data/ext/nokogiri/nokogiri.c +135 -0
  108. data/ext/nokogiri/nokogiri.h +130 -0
  109. data/ext/nokogiri/xml_attr.c +103 -0
  110. data/ext/nokogiri/xml_attr.h +9 -0
  111. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  112. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  113. data/ext/nokogiri/xml_cdata.c +62 -0
  114. data/ext/nokogiri/xml_cdata.h +9 -0
  115. data/ext/nokogiri/xml_comment.c +69 -0
  116. data/ext/nokogiri/xml_comment.h +9 -0
  117. data/ext/nokogiri/xml_document.c +622 -0
  118. data/ext/nokogiri/xml_document.h +23 -0
  119. data/ext/nokogiri/xml_document_fragment.c +48 -0
  120. data/ext/nokogiri/xml_document_fragment.h +10 -0
  121. data/ext/nokogiri/xml_dtd.c +202 -0
  122. data/ext/nokogiri/xml_dtd.h +10 -0
  123. data/ext/nokogiri/xml_element_content.c +123 -0
  124. data/ext/nokogiri/xml_element_content.h +10 -0
  125. data/ext/nokogiri/xml_element_decl.c +69 -0
  126. data/ext/nokogiri/xml_element_decl.h +9 -0
  127. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  128. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  129. data/ext/nokogiri/xml_entity_decl.c +110 -0
  130. data/ext/nokogiri/xml_entity_decl.h +10 -0
  131. data/ext/nokogiri/xml_entity_reference.c +52 -0
  132. data/ext/nokogiri/xml_entity_reference.h +9 -0
  133. data/ext/nokogiri/xml_io.c +63 -0
  134. data/ext/nokogiri/xml_io.h +11 -0
  135. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  136. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  137. data/ext/nokogiri/xml_namespace.c +111 -0
  138. data/ext/nokogiri/xml_namespace.h +14 -0
  139. data/ext/nokogiri/xml_node.c +1773 -0
  140. data/ext/nokogiri/xml_node.h +13 -0
  141. data/ext/nokogiri/xml_node_set.c +486 -0
  142. data/ext/nokogiri/xml_node_set.h +12 -0
  143. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  144. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  145. data/ext/nokogiri/xml_reader.c +657 -0
  146. data/ext/nokogiri/xml_reader.h +10 -0
  147. data/ext/nokogiri/xml_relax_ng.c +179 -0
  148. data/ext/nokogiri/xml_relax_ng.h +9 -0
  149. data/ext/nokogiri/xml_sax_parser.c +305 -0
  150. data/ext/nokogiri/xml_sax_parser.h +39 -0
  151. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  152. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  153. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  154. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  155. data/ext/nokogiri/xml_schema.c +276 -0
  156. data/ext/nokogiri/xml_schema.h +9 -0
  157. data/ext/nokogiri/xml_syntax_error.c +64 -0
  158. data/ext/nokogiri/xml_syntax_error.h +13 -0
  159. data/ext/nokogiri/xml_text.c +52 -0
  160. data/ext/nokogiri/xml_text.h +9 -0
  161. data/ext/nokogiri/xml_xpath_context.c +374 -0
  162. data/ext/nokogiri/xml_xpath_context.h +10 -0
  163. data/ext/nokogiri/xslt_stylesheet.c +263 -0
  164. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  165. data/lib/isorelax.jar +0 -0
  166. data/lib/jing.jar +0 -0
  167. data/lib/nekodtd.jar +0 -0
  168. data/lib/nekohtml.jar +0 -0
  169. data/lib/nokogiri/css/node.rb +53 -0
  170. data/lib/nokogiri/css/parser.rb +751 -0
  171. data/lib/nokogiri/css/parser.y +272 -0
  172. data/lib/nokogiri/css/parser_extras.rb +94 -0
  173. data/lib/nokogiri/css/syntax_error.rb +8 -0
  174. data/lib/nokogiri/css/tokenizer.rb +154 -0
  175. data/lib/nokogiri/css/tokenizer.rex +55 -0
  176. data/lib/nokogiri/css/xpath_visitor.rb +260 -0
  177. data/lib/nokogiri/css.rb +28 -0
  178. data/lib/nokogiri/decorators/slop.rb +43 -0
  179. data/lib/nokogiri/html/builder.rb +36 -0
  180. data/lib/nokogiri/html/document.rb +322 -0
  181. data/lib/nokogiri/html/document_fragment.rb +50 -0
  182. data/lib/nokogiri/html/element_description.rb +24 -0
  183. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  184. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  185. data/lib/nokogiri/html/sax/parser.rb +63 -0
  186. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  187. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  188. data/lib/nokogiri/html.rb +38 -0
  189. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  190. data/lib/nokogiri/syntax_error.rb +5 -0
  191. data/lib/nokogiri/version/constant.rb +5 -0
  192. data/lib/nokogiri/version/info.rb +182 -0
  193. data/lib/nokogiri/version.rb +3 -0
  194. data/lib/nokogiri/xml/attr.rb +15 -0
  195. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  196. data/lib/nokogiri/xml/builder.rb +447 -0
  197. data/lib/nokogiri/xml/cdata.rb +12 -0
  198. data/lib/nokogiri/xml/character_data.rb +8 -0
  199. data/lib/nokogiri/xml/document.rb +290 -0
  200. data/lib/nokogiri/xml/document_fragment.rb +159 -0
  201. data/lib/nokogiri/xml/dtd.rb +33 -0
  202. data/lib/nokogiri/xml/element_content.rb +37 -0
  203. data/lib/nokogiri/xml/element_decl.rb +14 -0
  204. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  205. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  206. data/lib/nokogiri/xml/namespace.rb +14 -0
  207. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  208. data/lib/nokogiri/xml/node.rb +1240 -0
  209. data/lib/nokogiri/xml/node_set.rb +372 -0
  210. data/lib/nokogiri/xml/notation.rb +7 -0
  211. data/lib/nokogiri/xml/parse_options.rb +127 -0
  212. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  213. data/lib/nokogiri/xml/pp/node.rb +57 -0
  214. data/lib/nokogiri/xml/pp.rb +3 -0
  215. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  216. data/lib/nokogiri/xml/reader.rb +116 -0
  217. data/lib/nokogiri/xml/relax_ng.rb +37 -0
  218. data/lib/nokogiri/xml/sax/document.rb +172 -0
  219. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  220. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  221. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  222. data/lib/nokogiri/xml/sax.rb +5 -0
  223. data/lib/nokogiri/xml/schema.rb +72 -0
  224. data/lib/nokogiri/xml/searchable.rb +239 -0
  225. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  226. data/lib/nokogiri/xml/text.rb +10 -0
  227. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  228. data/lib/nokogiri/xml/xpath.rb +11 -0
  229. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  230. data/lib/nokogiri/xml.rb +76 -0
  231. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  232. data/lib/nokogiri/xslt.rb +57 -0
  233. data/lib/nokogiri.rb +144 -0
  234. data/lib/serializer.jar +0 -0
  235. data/lib/xalan.jar +0 -0
  236. data/lib/xercesImpl.jar +0 -0
  237. data/lib/xml-apis.jar +0 -0
  238. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  239. metadata +531 -0
@@ -0,0 +1,185 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2011:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com]
9
+ * * {Sergio Arbeo}[http://www.serabe.com]
10
+ * * {Patrick Mahoney}[http://polycrystal.org]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri;
34
+
35
+ import static nokogiri.internals.NokogiriHelpers.getLocalNameForNamespace;
36
+ import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
37
+ import static nokogiri.internals.NokogiriHelpers.getPrefix;
38
+ import static nokogiri.internals.NokogiriHelpers.isNamespace;
39
+ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
40
+
41
+ import java.util.HashMap;
42
+ import java.util.Map;
43
+ import java.util.regex.Matcher;
44
+ import java.util.regex.Pattern;
45
+
46
+ import org.jruby.Ruby;
47
+ import org.jruby.RubyArray;
48
+ import org.jruby.RubyClass;
49
+ import org.jruby.RubyString;
50
+ import org.jruby.anno.JRubyClass;
51
+ import org.jruby.anno.JRubyMethod;
52
+ import org.jruby.runtime.Helpers;
53
+ import org.jruby.runtime.ThreadContext;
54
+ import org.jruby.runtime.builtin.IRubyObject;
55
+ import org.jruby.util.ByteList;
56
+ import org.w3c.dom.Attr;
57
+ import org.w3c.dom.NamedNodeMap;
58
+
59
+ /**
60
+ * Class for Nokogiri::XML::DocumentFragment
61
+ *
62
+ * @author sergio
63
+ * @author Yoko Harada <yokolet@gmail.com>
64
+ */
65
+ @JRubyClass(name="Nokogiri::XML::DocumentFragment", parent="Nokogiri::XML::Node")
66
+ public class XmlDocumentFragment extends XmlNode {
67
+
68
+ private XmlElement fragmentContext;
69
+
70
+ public XmlDocumentFragment(Ruby ruby) {
71
+ this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DocumentFragment"));
72
+ }
73
+
74
+ public XmlDocumentFragment(Ruby ruby, RubyClass klazz) {
75
+ super(ruby, klazz);
76
+ }
77
+
78
+ @JRubyMethod(name="new", meta = true, required=1, optional=2)
79
+ public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args) {
80
+ if (args.length < 1) {
81
+ throw context.runtime.newArgumentError(args.length, 1);
82
+ }
83
+
84
+ if (!(args[0] instanceof XmlDocument)){
85
+ throw context.runtime.newArgumentError("first parameter must be a Nokogiri::XML::Document instance");
86
+ }
87
+
88
+ XmlDocument doc = (XmlDocument) args[0];
89
+
90
+ // make wellformed fragment, ignore invalid namespace, or add appropriate namespace to parse
91
+ if (args.length > 1 && args[1] instanceof RubyString) {
92
+ final RubyString arg1 = (RubyString) args[1];
93
+ if (XmlDocumentFragment.isTag(arg1)) {
94
+ args[1] = RubyString.newString(context.runtime, addNamespaceDeclIfNeeded(doc, rubyStringToString(arg1)));
95
+ }
96
+ }
97
+
98
+ XmlDocumentFragment fragment = (XmlDocumentFragment) NokogiriService.XML_DOCUMENT_FRAGMENT_ALLOCATOR.allocate(context.runtime, (RubyClass)cls);
99
+ fragment.setDocument(context, doc);
100
+ fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment());
101
+
102
+ //TODO: Get namespace definitions from doc.
103
+ if (args.length == 3 && args[2] != null && args[2] instanceof XmlElement) {
104
+ fragment.fragmentContext = (XmlElement)args[2];
105
+ }
106
+ Helpers.invoke(context, fragment, "initialize", args);
107
+ return fragment;
108
+ }
109
+
110
+ private static final ByteList TAG_BEG = ByteList.create("<");
111
+ private static final ByteList TAG_END = ByteList.create(">");
112
+
113
+ private static boolean isTag(final RubyString str) {
114
+ return str.getByteList().startsWith(TAG_BEG) && str.getByteList().endsWith(TAG_END);
115
+ }
116
+
117
+ private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
118
+ if (isNamespace(qName.intern())) return true;
119
+ for (int i=0; i < nodeMap.getLength(); i++) {
120
+ Attr attr = (Attr)nodeMap.item(i);
121
+ if (isNamespace(attr.getNodeName())) {
122
+ String localPart = getLocalNameForNamespace(attr.getNodeName(), null);
123
+ if (getPrefix(qName).equals(localPart)) {
124
+ return true;
125
+ }
126
+ }
127
+ }
128
+ return false;
129
+ }
130
+
131
+ private static final Pattern QNAME_RE = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
132
+ private static final Pattern START_TAG_RE = Pattern.compile("<[^</>]+>");
133
+
134
+ private static String addNamespaceDeclIfNeeded(XmlDocument doc, String tags) {
135
+ if (doc.getDocument() == null) return tags;
136
+ if (doc.getDocument().getDocumentElement() == null) return tags;
137
+ Matcher matcher = START_TAG_RE.matcher(tags);
138
+ Map<CharSequence, CharSequence> rewriteTable = null;
139
+ while (matcher.find()) {
140
+ String start_tag = matcher.group();
141
+ Matcher matcher2 = QNAME_RE.matcher(start_tag);
142
+ while (matcher2.find()) {
143
+ String qName = matcher2.group();
144
+ NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
145
+ if (isNamespaceDefined(qName, nodeMap)) {
146
+ CharSequence namespaceDecl = getNamespaceDecl(getPrefix(qName), nodeMap);
147
+ if (namespaceDecl != null) {
148
+ if (rewriteTable == null) rewriteTable = new HashMap(8, 1);
149
+ StringBuilder str = new StringBuilder(qName.length() + namespaceDecl.length() + 3);
150
+ String key = str.append('<').append(qName).append('>').toString();
151
+ str.setCharAt(key.length() - 1, ' '); // (last) '>' -> ' '
152
+ rewriteTable.put(key, str.append(namespaceDecl).append('>'));
153
+ }
154
+ }
155
+ }
156
+ }
157
+ if (rewriteTable != null) {
158
+ for (Map.Entry<CharSequence, CharSequence> e : rewriteTable.entrySet()) {
159
+ tags = tags.replace(e.getKey(), e.getValue());
160
+ }
161
+ }
162
+
163
+ return tags;
164
+ }
165
+
166
+ private static CharSequence getNamespaceDecl(final String prefix, NamedNodeMap nodeMap) {
167
+ for (int i=0; i < nodeMap.getLength(); i++) {
168
+ Attr attr = (Attr) nodeMap.item(i);
169
+ if (prefix.equals(attr.getLocalName())) {
170
+ return new StringBuilder().
171
+ append(attr.getName()).append('=').append('"').append(attr.getValue()).append('"');
172
+ }
173
+ }
174
+ return null;
175
+ }
176
+
177
+ public XmlElement getFragmentContext() {
178
+ return fragmentContext;
179
+ }
180
+
181
+ @Override
182
+ public void relink_namespace(ThreadContext context) {
183
+ relink_namespace(context, getChildren());
184
+ }
185
+ }
@@ -0,0 +1,481 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2011:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com]
9
+ * * {Sergio Arbeo}[http://www.serabe.com]
10
+ * * {Patrick Mahoney}[http://polycrystal.org]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri;
34
+
35
+ import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
+ import static nokogiri.internals.NokogiriHelpers.nonEmptyStringOrNil;
37
+ import static nokogiri.internals.NokogiriHelpers.stringOrNil;
38
+ import static org.jruby.runtime.Helpers.invoke;
39
+
40
+ import java.util.ArrayList;
41
+ import java.util.Arrays;
42
+ import java.util.List;
43
+
44
+ import org.apache.xerces.xni.QName;
45
+ import org.cyberneko.dtd.DTDConfiguration;
46
+ import org.jruby.Ruby;
47
+ import org.jruby.RubyArray;
48
+ import org.jruby.RubyClass;
49
+ import org.jruby.RubyHash;
50
+ import org.jruby.anno.JRubyClass;
51
+ import org.jruby.anno.JRubyMethod;
52
+ import org.jruby.runtime.ThreadContext;
53
+ import org.jruby.runtime.builtin.IRubyObject;
54
+ import org.w3c.dom.Document;
55
+ import org.w3c.dom.DocumentType;
56
+ import org.w3c.dom.Element;
57
+ import org.w3c.dom.Node;
58
+
59
+ import nokogiri.internals.NokogiriHelpers;
60
+ import nokogiri.internals.SaveContextVisitor;
61
+
62
+ /**
63
+ * Class for Nokogiri::XML::DTD
64
+ *
65
+ * @author sergio
66
+ * @author Patrick Mahoney <pat@polycrystal.org>
67
+ * @author Yoko Harada <yokolet@gmail.com>
68
+ */
69
+
70
+ @JRubyClass(name="Nokogiri::XML::DTD", parent="Nokogiri::XML::Node")
71
+ public class XmlDtd extends XmlNode {
72
+ /** cache of children, Nokogiri::XML::NodeSet */
73
+ protected IRubyObject children = null;
74
+
75
+ /** cache of name => XmlAttributeDecl */
76
+ protected RubyHash attributes = null;
77
+
78
+ /** cache of name => XmlElementDecl */
79
+ protected RubyHash elements = null;
80
+
81
+ /** cache of name => XmlEntityDecl */
82
+ protected RubyHash entities = null;
83
+
84
+ /** cache of name => Nokogiri::XML::Notation */
85
+ protected RubyHash notations = null;
86
+ protected RubyClass notationClass;
87
+
88
+ /** temporary store of content models before they are added to
89
+ * their XmlElementDecl. */
90
+ protected RubyHash contentModels;
91
+
92
+ /** node name */
93
+ protected IRubyObject name;
94
+
95
+ /** public ID (or external ID) */
96
+ protected IRubyObject pubId;
97
+
98
+ /** system ID */
99
+ protected IRubyObject sysId;
100
+
101
+ public XmlDtd(Ruby ruby, RubyClass rubyClass) {
102
+ super(ruby, rubyClass);
103
+ }
104
+
105
+ public void setNode(Ruby runtime, Node dtd) {
106
+ this.node = dtd;
107
+ notationClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::Notation");
108
+
109
+ name = pubId = sysId = runtime.getNil();
110
+ if (dtd == null) return;
111
+
112
+ // This is the dtd declaration stored in the document; it
113
+ // contains the DTD name (root element) and public and system
114
+ // ids. The actual declarations are in the NekoDTD 'dtd'
115
+ // variable. I don't know of a way to consolidate the two.
116
+
117
+ DocumentType otherDtd = dtd.getOwnerDocument().getDoctype();
118
+ if (otherDtd != null) {
119
+ name = stringOrNil(runtime, otherDtd.getNodeName());
120
+ pubId = nonEmptyStringOrNil(runtime, otherDtd.getPublicId());
121
+ sysId = nonEmptyStringOrNil(runtime, otherDtd.getSystemId());
122
+ }
123
+ }
124
+
125
+ public XmlDtd(Ruby ruby, RubyClass rubyClass, Node dtd) {
126
+ super(ruby, rubyClass, dtd);
127
+ setNode(ruby, dtd);
128
+ }
129
+
130
+ public static XmlDtd newEmpty(Ruby runtime,
131
+ Document doc,
132
+ IRubyObject name,
133
+ IRubyObject external_id,
134
+ IRubyObject system_id) {
135
+
136
+ DocumentType placeholder;
137
+ if (doc.getDoctype() == null) {
138
+ String javaName = NokogiriHelpers.rubyStringToString(name);
139
+ String javaExternalId = NokogiriHelpers.rubyStringToString(external_id);
140
+ String javaSystemId = NokogiriHelpers.rubyStringToString(system_id);
141
+ placeholder = doc.getImplementation().createDocumentType(javaName, javaExternalId, javaSystemId);
142
+ doc.appendChild(placeholder);
143
+ } else {
144
+ placeholder = doc.getDoctype();
145
+ }
146
+ // FIXME: what if the document had a doc type, why are we here ?
147
+ XmlDtd dtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
148
+ dtd.setNode(runtime, placeholder);
149
+ dtd.name = name;
150
+ dtd.pubId = external_id;
151
+ dtd.sysId = system_id;
152
+ return dtd;
153
+ }
154
+
155
+
156
+ /**
157
+ * Create an unparented element that contains DTD declarations
158
+ * parsed from the internal subset attached as user data to
159
+ * <code>doc</code>. The attached dtd must be the tree from
160
+ * NekoDTD. The owner document of the returned tree will be
161
+ * <code>doc</doc>.
162
+ *
163
+ * NekoDTD parser returns a new document node containing elements
164
+ * representing the dtd declarations. The plan is to get the root
165
+ * element and adopt it into the correct document, stipping the
166
+ * Document provided by NekoDTD.
167
+ *
168
+ */
169
+ public static XmlDtd newFromInternalSubset(Ruby runtime, Document doc) {
170
+ Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT);
171
+ if (dtdTree_ == null) {
172
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
173
+ xmlDtd.setNode(runtime, null);
174
+ return xmlDtd;
175
+ }
176
+
177
+ Node dtdTree = (Node) dtdTree_;
178
+ Node dtd = getInternalSubset(dtdTree);
179
+ if (dtd == null) {
180
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
181
+ xmlDtd.setNode(runtime, null);
182
+ return xmlDtd;
183
+ } else {
184
+ // Import the node into doc so it has the correct owner document.
185
+ dtd = doc.importNode(dtd, true);
186
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
187
+ xmlDtd.setNode(runtime, dtd);
188
+ return xmlDtd;
189
+ }
190
+ }
191
+
192
+ public static IRubyObject newFromExternalSubset(Ruby runtime, Document doc) {
193
+ Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT);
194
+ if (dtdTree_ == null) {
195
+ return runtime.getNil();
196
+ }
197
+
198
+ Node dtdTree = (Node) dtdTree_;
199
+ Node dtd = getExternalSubset(dtdTree);
200
+ if (dtd == null) {
201
+ return runtime.getNil();
202
+ } else if (!dtd.hasChildNodes()) {
203
+ return runtime.getNil();
204
+ } else {
205
+ // Import the node into doc so it has the correct owner document.
206
+ dtd = doc.importNode(dtd, true);
207
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
208
+ xmlDtd.setNode(runtime, dtd);
209
+ return xmlDtd;
210
+ }
211
+ }
212
+
213
+ /*
214
+ * <code>dtd</code> is the document node of a NekoDTD tree.
215
+ * NekoDTD tree looks like this:
216
+ *
217
+ * <code><pre>
218
+ * [#document: null]
219
+ * [#comment: ...]
220
+ * [#comment: ...]
221
+ * [dtd: null] // a DocumentType; isDTD(node) => false
222
+ * [dtd: null] // root of dtd, an Element node; isDTD(node) => true
223
+ * ... decls, content models, etc. ...
224
+ * [externalSubset: null] pubid="the pubid" sysid="the sysid"
225
+ * ... external subset decls, etc. ...
226
+ * </pre></code>
227
+ */
228
+ protected static Node getInternalSubset(Node dtdTree) {
229
+ Node root;
230
+ for (root = dtdTree.getFirstChild(); ; root = root.getNextSibling()) {
231
+ if (root == null)
232
+ return null;
233
+ else if (isDTD(root))
234
+ return root; // we have second dtd which is root
235
+ }
236
+ }
237
+
238
+ protected static Node getExternalSubset(Node dtdTree) {
239
+ Node dtd = getInternalSubset(dtdTree);
240
+ if (dtd == null) return null;
241
+ for (Node ext = dtd.getFirstChild(); ; ext = ext.getNextSibling()) {
242
+ if (ext == null)
243
+ return null;
244
+ else if (isExternalSubset(ext))
245
+ return ext;
246
+ }
247
+ }
248
+
249
+ /**
250
+ * This overrides the #attributes method defined in
251
+ * lib/nokogiri/xml/node.rb.
252
+ */
253
+ @JRubyMethod
254
+ public IRubyObject attributes(ThreadContext context) {
255
+ if (attributes == null) extractDecls(context);
256
+
257
+ return attributes;
258
+ }
259
+
260
+ @JRubyMethod
261
+ public IRubyObject elements(ThreadContext context) {
262
+ if (elements == null) extractDecls(context);
263
+
264
+ return elements;
265
+ }
266
+
267
+ @JRubyMethod
268
+ public IRubyObject entities(ThreadContext context) {
269
+ if (entities == null) extractDecls(context);
270
+
271
+ return entities;
272
+ }
273
+
274
+ @JRubyMethod
275
+ public IRubyObject notations(ThreadContext context) {
276
+ if (notations == null) extractDecls(context);
277
+
278
+ return notations;
279
+ }
280
+
281
+ /**
282
+ * Our "node" object is as-returned by NekoDTD. The actual
283
+ * "children" that we're interested in (Attribute declarations,
284
+ * etc.) are a few layers deep.
285
+ */
286
+ @Override
287
+ @JRubyMethod
288
+ public IRubyObject children(ThreadContext context) {
289
+ if (children == null) extractDecls(context);
290
+
291
+ return children;
292
+ }
293
+
294
+ /**
295
+ * Returns the name of the dtd.
296
+ */
297
+ @Override
298
+ @JRubyMethod
299
+ public IRubyObject node_name(ThreadContext context) {
300
+ return name;
301
+ }
302
+
303
+ @Override
304
+ @JRubyMethod(name = "node_name=")
305
+ public IRubyObject node_name_set(ThreadContext context, IRubyObject name) {
306
+ throw context.getRuntime()
307
+ .newRuntimeError("cannot change name of DTD");
308
+ }
309
+
310
+ @JRubyMethod
311
+ public IRubyObject system_id(ThreadContext context) {
312
+ return sysId;
313
+ }
314
+
315
+ @JRubyMethod
316
+ public IRubyObject external_id(ThreadContext context) {
317
+ return pubId;
318
+ }
319
+
320
+ @JRubyMethod
321
+ public IRubyObject validate(ThreadContext context, IRubyObject doc) {
322
+ RubyArray errors = RubyArray.newArray(context.getRuntime());
323
+ if (doc instanceof XmlDocument) {
324
+ errors = (RubyArray) ((XmlDocument)doc).getInstanceVariable("@errors");
325
+ }
326
+ return errors;
327
+ }
328
+
329
+ public static boolean nameEquals(Node node, QName name) {
330
+ return name.localpart.equals(node.getNodeName());
331
+ }
332
+
333
+ public static boolean isExternalSubset(Node node) {
334
+ return nameEquals(node, DTDConfiguration.E_EXTERNAL_SUBSET);
335
+ }
336
+
337
+ /**
338
+ * Checks instanceof Element so we return false for a DocumentType
339
+ * node (NekoDTD uses Element for all its nodes).
340
+ */
341
+ public static boolean isDTD(Node node) {
342
+ return (node instanceof Element &&
343
+ nameEquals(node, DTDConfiguration.E_DTD));
344
+ }
345
+
346
+ public static boolean isAttributeDecl(Node node) {
347
+ return nameEquals(node, DTDConfiguration.E_ATTRIBUTE_DECL);
348
+ }
349
+
350
+ public static boolean isElementDecl(Node node) {
351
+ return nameEquals(node, DTDConfiguration.E_ELEMENT_DECL);
352
+ }
353
+
354
+ public static boolean isEntityDecl(Node node) {
355
+ return (nameEquals(node, DTDConfiguration.E_INTERNAL_ENTITY_DECL) ||
356
+ nameEquals(node, DTDConfiguration.E_UNPARSED_ENTITY_DECL));
357
+ }
358
+
359
+ public static boolean isNotationDecl(Node node) {
360
+ return nameEquals(node, DTDConfiguration.E_NOTATION_DECL);
361
+ }
362
+
363
+ public static boolean isContentModel(Node node) {
364
+ return nameEquals(node, DTDConfiguration.E_CONTENT_MODEL);
365
+ }
366
+
367
+ /**
368
+ * Recursively extract various DTD declarations and store them in
369
+ * the various collections.
370
+ */
371
+ protected void extractDecls(ThreadContext context) {
372
+ Ruby runtime = context.runtime;
373
+
374
+ // initialize data structures
375
+ attributes = RubyHash.newHash(runtime);
376
+ elements = RubyHash.newHash(runtime);
377
+ entities = RubyHash.newHash(runtime);
378
+ notations = RubyHash.newHash(runtime);
379
+ contentModels = RubyHash.newHash(runtime);
380
+ children = runtime.getNil();
381
+
382
+ // recursively extract decls
383
+ if (node == null) return; // leave all the decl hash's empty
384
+
385
+ // convert allDecls to a NodeSet
386
+ children = XmlNodeSet.newNodeSet(runtime, extractDecls(context, node.getFirstChild()));
387
+
388
+ // add attribute decls as attributes to the matching element decl
389
+ RubyArray keys = attributes.keys();
390
+ for (int i = 0; i < keys.getLength(); ++i) {
391
+ IRubyObject akey = keys.entry(i);
392
+ IRubyObject val;
393
+
394
+ val = attributes.op_aref(context, akey);
395
+ if (val.isNil()) continue;
396
+ XmlAttributeDecl attrDecl = (XmlAttributeDecl) val;
397
+ IRubyObject ekey = attrDecl.element_name(context);
398
+ val = elements.op_aref(context, ekey);
399
+ if (val.isNil()) continue;
400
+ XmlElementDecl elemDecl = (XmlElementDecl) val;
401
+
402
+ elemDecl.appendAttrDecl(attrDecl);
403
+ }
404
+
405
+ // add content models to the matching element decl
406
+ keys = contentModels.keys();
407
+ for (int i = 0; i < keys.getLength(); ++i) {
408
+ IRubyObject key = keys.entry(i);
409
+ IRubyObject cm = contentModels.op_aref(context, key);
410
+
411
+ IRubyObject elem = elements.op_aref(context, key);
412
+ if (elem.isNil()) continue;
413
+ if (((XmlElementDecl)elem).isEmpty()) continue;
414
+ ((XmlElementDecl) elem).setContentModel(cm);
415
+ }
416
+ }
417
+
418
+ /**
419
+ * The <code>node</code> is either the first child of the root dtd
420
+ * node (as returned by getInternalSubset()) or the first child of
421
+ * the external subset node (as returned by getExternalSubset()).
422
+ *
423
+ * This recursive function will not descend into an
424
+ * 'externalSubset' node, thus for an internal subset it only
425
+ * extracts nodes in the internal subset, and for an external
426
+ * subset it extracts everything and assumess <code>node</code>
427
+ * and all children are part of the external subset.
428
+ */
429
+ protected IRubyObject[] extractDecls(ThreadContext context, Node node) {
430
+ List<IRubyObject> decls = new ArrayList<IRubyObject>();
431
+ while (node != null) {
432
+ if (isExternalSubset(node)) {
433
+ break;
434
+ } else if (isAttributeDecl(node)) {
435
+ XmlAttributeDecl decl = XmlAttributeDecl.create(context, node);
436
+ attributes.op_aset(context, decl.attribute_name(context), decl);
437
+ decls.add(decl);
438
+ } else if (isElementDecl(node)) {
439
+ XmlElementDecl decl = XmlElementDecl.create(context, node);
440
+ elements.op_aset(context, decl.element_name(context), decl);
441
+ decls.add(decl);
442
+ } else if (isEntityDecl(node)) {
443
+ XmlEntityDecl decl = XmlEntityDecl.create(context, node);
444
+ entities.op_aset(context, decl.node_name(context), decl);
445
+ decls.add(decl);
446
+ } else if (isNotationDecl(node)) {
447
+ XmlNode tmp = (XmlNode)
448
+ NokogiriHelpers.constructNode(context.getRuntime(), node);
449
+ IRubyObject decl = invoke(context, notationClass, "new",
450
+ tmp.getAttribute(context, "name"),
451
+ tmp.getAttribute(context, "pubid"),
452
+ tmp.getAttribute(context, "sysid"));
453
+ notations.op_aset(context,
454
+ tmp.getAttribute(context, "name"), decl);
455
+ decls.add(decl);
456
+ } else if (isContentModel(node)) {
457
+ XmlElementContent cm =
458
+ new XmlElementContent(context.getRuntime(),
459
+ (XmlDocument) document(context),
460
+ node);
461
+ contentModels.op_aset(context, cm.element_name(context), cm);
462
+ } else {
463
+ // recurse
464
+ decls.addAll(Arrays.asList(extractDecls(context, node.getFirstChild())));
465
+ }
466
+
467
+ node = node.getNextSibling();
468
+ }
469
+
470
+ return decls.toArray(new IRubyObject[decls.size()]);
471
+ }
472
+
473
+ @Override
474
+ public void accept(ThreadContext context, SaveContextVisitor visitor) {
475
+ // since we use nekoDTD to parse dtd, node might be ElementImpl type
476
+ // An external subset doesn't need to show up, so this method just see docType.
477
+ DocumentType docType = node.getOwnerDocument().getDoctype();
478
+ visitor.enter(docType);
479
+ visitor.leave(docType);
480
+ }
481
+ }