nokogiri 1.11.0.rc3-java → 1.11.4-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (187) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/dependencies.yml +12 -12
  7. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  8. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  9. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  10. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  11. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  12. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  13. data/ext/java/nokogiri/NokogiriService.java +595 -556
  14. data/ext/java/nokogiri/XmlAttr.java +118 -126
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  16. data/ext/java/nokogiri/XmlCdata.java +35 -58
  17. data/ext/java/nokogiri/XmlComment.java +46 -67
  18. data/ext/java/nokogiri/XmlDocument.java +645 -572
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  20. data/ext/java/nokogiri/XmlDtd.java +448 -414
  21. data/ext/java/nokogiri/XmlElement.java +23 -48
  22. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  23. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  24. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  25. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  26. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  27. data/ext/java/nokogiri/XmlNode.java +1843 -1622
  28. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  30. data/ext/java/nokogiri/XmlReader.java +513 -450
  31. data/ext/java/nokogiri/XmlRelaxng.java +89 -101
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  34. data/ext/java/nokogiri/XmlSchema.java +335 -242
  35. data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
  36. data/ext/java/nokogiri/XmlText.java +55 -76
  37. data/ext/java/nokogiri/XmlXpathContext.java +242 -210
  38. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  39. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  40. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
  41. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  42. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  43. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  44. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  45. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  46. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  47. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  52. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
  55. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  56. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  57. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  58. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  59. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  60. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  61. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  62. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  63. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  64. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  65. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  66. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  67. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  68. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  82. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  83. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  84. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  85. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  86. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  87. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  88. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  89. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  90. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  91. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  94. data/ext/nokogiri/depend +37 -358
  95. data/ext/nokogiri/extconf.rb +581 -374
  96. data/ext/nokogiri/html_document.c +78 -82
  97. data/ext/nokogiri/html_element_description.c +84 -71
  98. data/ext/nokogiri/html_entity_lookup.c +21 -16
  99. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  100. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  101. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  102. data/ext/nokogiri/nokogiri.c +192 -93
  103. data/ext/nokogiri/test_global_handlers.c +40 -0
  104. data/ext/nokogiri/xml_attr.c +15 -15
  105. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  106. data/ext/nokogiri/xml_cdata.c +13 -18
  107. data/ext/nokogiri/xml_comment.c +19 -26
  108. data/ext/nokogiri/xml_document.c +246 -188
  109. data/ext/nokogiri/xml_document_fragment.c +13 -15
  110. data/ext/nokogiri/xml_dtd.c +54 -48
  111. data/ext/nokogiri/xml_element_content.c +30 -27
  112. data/ext/nokogiri/xml_element_decl.c +22 -22
  113. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  114. data/ext/nokogiri/xml_entity_decl.c +32 -30
  115. data/ext/nokogiri/xml_entity_reference.c +16 -18
  116. data/ext/nokogiri/xml_namespace.c +56 -49
  117. data/ext/nokogiri/xml_node.c +371 -320
  118. data/ext/nokogiri/xml_node_set.c +168 -156
  119. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  120. data/ext/nokogiri/xml_reader.c +191 -157
  121. data/ext/nokogiri/xml_relax_ng.c +52 -28
  122. data/ext/nokogiri/xml_sax_parser.c +118 -118
  123. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  124. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  125. data/ext/nokogiri/xml_schema.c +95 -47
  126. data/ext/nokogiri/xml_syntax_error.c +42 -21
  127. data/ext/nokogiri/xml_text.c +13 -17
  128. data/ext/nokogiri/xml_xpath_context.c +206 -123
  129. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  130. data/lib/nokogiri.rb +3 -7
  131. data/lib/nokogiri/css/parser.rb +3 -3
  132. data/lib/nokogiri/css/parser.y +2 -2
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -149
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/document.rb +91 -35
  142. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  143. data/lib/nokogiri/xml/node.rb +89 -69
  144. data/lib/nokogiri/xml/parse_options.rb +6 -0
  145. data/lib/nokogiri/xml/reader.rb +2 -9
  146. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  147. data/lib/nokogiri/xml/schema.rb +12 -4
  148. data/lib/nokogiri/xml/searchable.rb +3 -1
  149. data/lib/nokogiri/xml/xpath.rb +1 -3
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  151. metadata +86 -177
  152. data/ext/nokogiri/html_document.h +0 -10
  153. data/ext/nokogiri/html_element_description.h +0 -10
  154. data/ext/nokogiri/html_entity_lookup.h +0 -8
  155. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  156. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  157. data/ext/nokogiri/nokogiri.h +0 -134
  158. data/ext/nokogiri/xml_attr.h +0 -9
  159. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  160. data/ext/nokogiri/xml_cdata.h +0 -9
  161. data/ext/nokogiri/xml_comment.h +0 -9
  162. data/ext/nokogiri/xml_document.h +0 -23
  163. data/ext/nokogiri/xml_document_fragment.h +0 -10
  164. data/ext/nokogiri/xml_dtd.h +0 -10
  165. data/ext/nokogiri/xml_element_content.h +0 -10
  166. data/ext/nokogiri/xml_element_decl.h +0 -9
  167. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  168. data/ext/nokogiri/xml_entity_decl.h +0 -10
  169. data/ext/nokogiri/xml_entity_reference.h +0 -9
  170. data/ext/nokogiri/xml_io.c +0 -63
  171. data/ext/nokogiri/xml_io.h +0 -11
  172. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  173. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  174. data/ext/nokogiri/xml_namespace.h +0 -14
  175. data/ext/nokogiri/xml_node.h +0 -13
  176. data/ext/nokogiri/xml_node_set.h +0 -12
  177. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  178. data/ext/nokogiri/xml_reader.h +0 -10
  179. data/ext/nokogiri/xml_relax_ng.h +0 -9
  180. data/ext/nokogiri/xml_sax_parser.h +0 -39
  181. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  182. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_schema.h +0 -9
  184. data/ext/nokogiri/xml_syntax_error.h +0 -13
  185. data/ext/nokogiri/xml_text.h +0 -9
  186. data/ext/nokogiri/xml_xpath_context.h +0 -10
  187. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import static nokogiri.internals.NokogiriHelpers.getLocalNameForNamespace;
@@ -58,128 +26,148 @@ import org.w3c.dom.NamedNodeMap;
58
26
 
59
27
  /**
60
28
  * Class for Nokogiri::XML::DocumentFragment
61
- *
29
+ *
62
30
  * @author sergio
63
31
  * @author Yoko Harada <yokolet@gmail.com>
64
32
  */
65
- @JRubyClass(name="Nokogiri::XML::DocumentFragment", parent="Nokogiri::XML::Node")
66
- public class XmlDocumentFragment extends XmlNode {
67
-
68
- private XmlElement fragmentContext;
69
-
70
- public XmlDocumentFragment(Ruby ruby) {
71
- this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DocumentFragment"));
33
+ @JRubyClass(name = "Nokogiri::XML::DocumentFragment", parent = "Nokogiri::XML::Node")
34
+ public class XmlDocumentFragment extends XmlNode
35
+ {
36
+
37
+ private XmlElement fragmentContext;
38
+
39
+ public
40
+ XmlDocumentFragment(Ruby ruby)
41
+ {
42
+ this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DocumentFragment"));
43
+ }
44
+
45
+ public
46
+ XmlDocumentFragment(Ruby ruby, RubyClass klazz)
47
+ {
48
+ super(ruby, klazz);
49
+ }
50
+
51
+ @JRubyMethod(name = "new", meta = true, required = 1, optional = 2)
52
+ public static IRubyObject
53
+ rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args)
54
+ {
55
+ if (args.length < 1) {
56
+ throw context.runtime.newArgumentError(args.length, 1);
72
57
  }
73
58
 
74
- public XmlDocumentFragment(Ruby ruby, RubyClass klazz) {
75
- super(ruby, klazz);
59
+ if (!(args[0] instanceof XmlDocument)) {
60
+ throw context.runtime.newArgumentError("first parameter must be a Nokogiri::XML::Document instance");
76
61
  }
77
62
 
78
- @JRubyMethod(name="new", meta = true, required=1, optional=2)
79
- public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args) {
80
- if (args.length < 1) {
81
- throw context.runtime.newArgumentError(args.length, 1);
82
- }
83
-
84
- if (!(args[0] instanceof XmlDocument)){
85
- throw context.runtime.newArgumentError("first parameter must be a Nokogiri::XML::Document instance");
86
- }
87
-
88
- XmlDocument doc = (XmlDocument) args[0];
89
-
90
- // make wellformed fragment, ignore invalid namespace, or add appropriate namespace to parse
91
- if (args.length > 1 && args[1] instanceof RubyString) {
92
- final RubyString arg1 = (RubyString) args[1];
93
- if (XmlDocumentFragment.isTag(arg1)) {
94
- args[1] = RubyString.newString(context.runtime, addNamespaceDeclIfNeeded(doc, rubyStringToString(arg1)));
95
- }
96
- }
63
+ XmlDocument doc = (XmlDocument) args[0];
97
64
 
98
- XmlDocumentFragment fragment = (XmlDocumentFragment) NokogiriService.XML_DOCUMENT_FRAGMENT_ALLOCATOR.allocate(context.runtime, (RubyClass)cls);
99
- fragment.setDocument(context, doc);
100
- fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment());
101
-
102
- //TODO: Get namespace definitions from doc.
103
- if (args.length == 3 && args[2] != null && args[2] instanceof XmlElement) {
104
- fragment.fragmentContext = (XmlElement)args[2];
105
- }
106
- Helpers.invoke(context, fragment, "initialize", args);
107
- return fragment;
65
+ // make wellformed fragment, ignore invalid namespace, or add appropriate namespace to parse
66
+ if (args.length > 1 && args[1] instanceof RubyString) {
67
+ final RubyString arg1 = (RubyString) args[1];
68
+ if (XmlDocumentFragment.isTag(arg1)) {
69
+ args[1] = RubyString.newString(context.runtime, addNamespaceDeclIfNeeded(doc, rubyStringToString(arg1)));
70
+ }
108
71
  }
109
72
 
110
- private static final ByteList TAG_BEG = ByteList.create("<");
111
- private static final ByteList TAG_END = ByteList.create(">");
73
+ XmlDocumentFragment fragment = (XmlDocumentFragment) NokogiriService.XML_DOCUMENT_FRAGMENT_ALLOCATOR.allocate(
74
+ context.runtime, (RubyClass)cls);
75
+ fragment.setDocument(context, doc);
76
+ fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment());
112
77
 
113
- private static boolean isTag(final RubyString str) {
114
- return str.getByteList().startsWith(TAG_BEG) && str.getByteList().endsWith(TAG_END);
78
+ //TODO: Get namespace definitions from doc.
79
+ if (args.length == 3 && args[2] != null && args[2] instanceof XmlElement) {
80
+ fragment.fragmentContext = (XmlElement)args[2];
115
81
  }
116
-
117
- private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
118
- if (isNamespace(qName.intern())) return true;
119
- for (int i=0; i < nodeMap.getLength(); i++) {
120
- Attr attr = (Attr)nodeMap.item(i);
121
- if (isNamespace(attr.getNodeName())) {
122
- String localPart = getLocalNameForNamespace(attr.getNodeName(), null);
123
- if (getPrefix(qName).equals(localPart)) {
124
- return true;
125
- }
126
- }
82
+ Helpers.invoke(context, fragment, "initialize", args);
83
+ return fragment;
84
+ }
85
+
86
+ private static final ByteList TAG_BEG = ByteList.create("<");
87
+ private static final ByteList TAG_END = ByteList.create(">");
88
+
89
+ private static boolean
90
+ isTag(final RubyString str)
91
+ {
92
+ return str.getByteList().startsWith(TAG_BEG) && str.getByteList().endsWith(TAG_END);
93
+ }
94
+
95
+ private static boolean
96
+ isNamespaceDefined(String qName, NamedNodeMap nodeMap)
97
+ {
98
+ if (isNamespace(qName.intern())) { return true; }
99
+ for (int i = 0; i < nodeMap.getLength(); i++) {
100
+ Attr attr = (Attr)nodeMap.item(i);
101
+ if (isNamespace(attr.getNodeName())) {
102
+ String localPart = getLocalNameForNamespace(attr.getNodeName(), null);
103
+ if (getPrefix(qName).equals(localPart)) {
104
+ return true;
127
105
  }
128
- return false;
106
+ }
129
107
  }
130
-
131
- private static final Pattern QNAME_RE = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
132
- private static final Pattern START_TAG_RE = Pattern.compile("<[^</>]+>");
133
-
134
- private static String addNamespaceDeclIfNeeded(XmlDocument doc, String tags) {
135
- if (doc.getDocument() == null) return tags;
136
- if (doc.getDocument().getDocumentElement() == null) return tags;
137
- Matcher matcher = START_TAG_RE.matcher(tags);
138
- Map<CharSequence, CharSequence> rewriteTable = null;
139
- while (matcher.find()) {
140
- String start_tag = matcher.group();
141
- Matcher matcher2 = QNAME_RE.matcher(start_tag);
142
- while (matcher2.find()) {
143
- String qName = matcher2.group();
144
- NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
145
- if (isNamespaceDefined(qName, nodeMap)) {
146
- CharSequence namespaceDecl = getNamespaceDecl(getPrefix(qName), nodeMap);
147
- if (namespaceDecl != null) {
148
- if (rewriteTable == null) rewriteTable = new HashMap(8, 1);
149
- StringBuilder str = new StringBuilder(qName.length() + namespaceDecl.length() + 3);
150
- String key = str.append('<').append(qName).append('>').toString();
151
- str.setCharAt(key.length() - 1, ' '); // (last) '>' -> ' '
152
- rewriteTable.put(key, str.append(namespaceDecl).append('>'));
153
- }
154
- }
155
- }
156
- }
157
- if (rewriteTable != null) {
158
- for (Map.Entry<CharSequence, CharSequence> e : rewriteTable.entrySet()) {
159
- tags = tags.replace(e.getKey(), e.getValue());
160
- }
108
+ return false;
109
+ }
110
+
111
+ private static final Pattern QNAME_RE = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
112
+ private static final Pattern START_TAG_RE = Pattern.compile("<[^</>]+>");
113
+
114
+ private static String
115
+ addNamespaceDeclIfNeeded(XmlDocument doc, String tags)
116
+ {
117
+ if (doc.getDocument() == null) { return tags; }
118
+ if (doc.getDocument().getDocumentElement() == null) { return tags; }
119
+ Matcher matcher = START_TAG_RE.matcher(tags);
120
+ Map<CharSequence, CharSequence> rewriteTable = null;
121
+ while (matcher.find()) {
122
+ String start_tag = matcher.group();
123
+ Matcher matcher2 = QNAME_RE.matcher(start_tag);
124
+ while (matcher2.find()) {
125
+ String qName = matcher2.group();
126
+ NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
127
+ if (isNamespaceDefined(qName, nodeMap)) {
128
+ CharSequence namespaceDecl = getNamespaceDecl(getPrefix(qName), nodeMap);
129
+ if (namespaceDecl != null) {
130
+ if (rewriteTable == null) { rewriteTable = new HashMap(8, 1); }
131
+ StringBuilder str = new StringBuilder(qName.length() + namespaceDecl.length() + 3);
132
+ String key = str.append('<').append(qName).append('>').toString();
133
+ str.setCharAt(key.length() - 1, ' '); // (last) '>' -> ' '
134
+ rewriteTable.put(key, str.append(namespaceDecl).append('>'));
135
+ }
161
136
  }
162
-
163
- return tags;
137
+ }
164
138
  }
165
-
166
- private static CharSequence getNamespaceDecl(final String prefix, NamedNodeMap nodeMap) {
167
- for (int i=0; i < nodeMap.getLength(); i++) {
168
- Attr attr = (Attr) nodeMap.item(i);
169
- if (prefix.equals(attr.getLocalName())) {
170
- return new StringBuilder().
171
- append(attr.getName()).append('=').append('"').append(attr.getValue()).append('"');
172
- }
173
- }
174
- return null;
175
- }
176
-
177
- public XmlElement getFragmentContext() {
178
- return fragmentContext;
139
+ if (rewriteTable != null) {
140
+ for (Map.Entry<CharSequence, CharSequence> e : rewriteTable.entrySet()) {
141
+ tags = tags.replace(e.getKey(), e.getValue());
142
+ }
179
143
  }
180
144
 
181
- @Override
182
- public void relink_namespace(ThreadContext context) {
183
- relink_namespace(context, getChildren());
145
+ return tags;
146
+ }
147
+
148
+ private static CharSequence
149
+ getNamespaceDecl(final String prefix, NamedNodeMap nodeMap)
150
+ {
151
+ for (int i = 0; i < nodeMap.getLength(); i++) {
152
+ Attr attr = (Attr) nodeMap.item(i);
153
+ if (prefix.equals(attr.getLocalName())) {
154
+ return new StringBuilder().
155
+ append(attr.getName()).append('=').append('"').append(attr.getValue()).append('"');
156
+ }
184
157
  }
158
+ return null;
159
+ }
160
+
161
+ public XmlElement
162
+ getFragmentContext()
163
+ {
164
+ return fragmentContext;
165
+ }
166
+
167
+ @Override
168
+ public void
169
+ relink_namespace(ThreadContext context)
170
+ {
171
+ relink_namespace(context, getChildren());
172
+ }
185
173
  }
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
@@ -61,421 +29,487 @@ import nokogiri.internals.SaveContextVisitor;
61
29
 
62
30
  /**
63
31
  * Class for Nokogiri::XML::DTD
64
- *
32
+ *
65
33
  * @author sergio
66
34
  * @author Patrick Mahoney <pat@polycrystal.org>
67
35
  * @author Yoko Harada <yokolet@gmail.com>
68
36
  */
69
37
 
70
- @JRubyClass(name="Nokogiri::XML::DTD", parent="Nokogiri::XML::Node")
71
- public class XmlDtd extends XmlNode {
72
- /** cache of children, Nokogiri::XML::NodeSet */
73
- protected IRubyObject children = null;
74
-
75
- /** cache of name => XmlAttributeDecl */
76
- protected RubyHash attributes = null;
77
-
78
- /** cache of name => XmlElementDecl */
79
- protected RubyHash elements = null;
80
-
81
- /** cache of name => XmlEntityDecl */
82
- protected RubyHash entities = null;
83
-
84
- /** cache of name => Nokogiri::XML::Notation */
85
- protected RubyHash notations = null;
86
- protected RubyClass notationClass;
87
-
88
- /** temporary store of content models before they are added to
89
- * their XmlElementDecl. */
90
- protected RubyHash contentModels;
91
-
92
- /** node name */
93
- protected IRubyObject name;
94
-
95
- /** public ID (or external ID) */
96
- protected IRubyObject pubId;
97
-
98
- /** system ID */
99
- protected IRubyObject sysId;
100
-
101
- public XmlDtd(Ruby ruby, RubyClass rubyClass) {
102
- super(ruby, rubyClass);
103
- }
104
-
105
- public void setNode(Ruby runtime, Node dtd) {
106
- this.node = dtd;
107
- notationClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::Notation");
108
-
109
- name = pubId = sysId = runtime.getNil();
110
- if (dtd == null) return;
111
-
112
- // This is the dtd declaration stored in the document; it
113
- // contains the DTD name (root element) and public and system
114
- // ids. The actual declarations are in the NekoDTD 'dtd'
115
- // variable. I don't know of a way to consolidate the two.
116
-
117
- DocumentType otherDtd = dtd.getOwnerDocument().getDoctype();
118
- if (otherDtd != null) {
119
- name = stringOrNil(runtime, otherDtd.getNodeName());
120
- pubId = nonEmptyStringOrNil(runtime, otherDtd.getPublicId());
121
- sysId = nonEmptyStringOrNil(runtime, otherDtd.getSystemId());
122
- }
123
- }
124
-
125
- public XmlDtd(Ruby ruby, RubyClass rubyClass, Node dtd) {
126
- super(ruby, rubyClass, dtd);
127
- setNode(ruby, dtd);
128
- }
129
-
130
- public static XmlDtd newEmpty(Ruby runtime,
131
- Document doc,
132
- IRubyObject name,
133
- IRubyObject external_id,
134
- IRubyObject system_id) {
135
-
136
- DocumentType placeholder;
137
- if (doc.getDoctype() == null) {
138
- String javaName = NokogiriHelpers.rubyStringToString(name);
139
- String javaExternalId = NokogiriHelpers.rubyStringToString(external_id);
140
- String javaSystemId = NokogiriHelpers.rubyStringToString(system_id);
141
- placeholder = doc.getImplementation().createDocumentType(javaName, javaExternalId, javaSystemId);
142
- doc.appendChild(placeholder);
143
- } else {
144
- placeholder = doc.getDoctype();
145
- }
146
- // FIXME: what if the document had a doc type, why are we here ?
147
- XmlDtd dtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
148
- dtd.setNode(runtime, placeholder);
149
- dtd.name = name;
150
- dtd.pubId = external_id;
151
- dtd.sysId = system_id;
152
- return dtd;
153
- }
154
-
38
+ @JRubyClass(name = "Nokogiri::XML::DTD", parent = "Nokogiri::XML::Node")
39
+ public class XmlDtd extends XmlNode
40
+ {
41
+ /** cache of children, Nokogiri::XML::NodeSet */
42
+ protected IRubyObject children = null;
155
43
 
156
- /**
157
- * Create an unparented element that contains DTD declarations
158
- * parsed from the internal subset attached as user data to
159
- * <code>doc</code>. The attached dtd must be the tree from
160
- * NekoDTD. The owner document of the returned tree will be
161
- * <code>doc</doc>.
162
- *
163
- * NekoDTD parser returns a new document node containing elements
164
- * representing the dtd declarations. The plan is to get the root
165
- * element and adopt it into the correct document, stipping the
166
- * Document provided by NekoDTD.
167
- *
168
- */
169
- public static XmlDtd newFromInternalSubset(Ruby runtime, Document doc) {
170
- Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT);
171
- if (dtdTree_ == null) {
172
- XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
173
- xmlDtd.setNode(runtime, null);
174
- return xmlDtd;
175
- }
176
-
177
- Node dtdTree = (Node) dtdTree_;
178
- Node dtd = getInternalSubset(dtdTree);
179
- if (dtd == null) {
180
- XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
181
- xmlDtd.setNode(runtime, null);
182
- return xmlDtd;
183
- } else {
184
- // Import the node into doc so it has the correct owner document.
185
- dtd = doc.importNode(dtd, true);
186
- XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
187
- xmlDtd.setNode(runtime, dtd);
188
- return xmlDtd;
189
- }
190
- }
191
-
192
- public static IRubyObject newFromExternalSubset(Ruby runtime, Document doc) {
193
- Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT);
194
- if (dtdTree_ == null) {
195
- return runtime.getNil();
196
- }
197
-
198
- Node dtdTree = (Node) dtdTree_;
199
- Node dtd = getExternalSubset(dtdTree);
200
- if (dtd == null) {
201
- return runtime.getNil();
202
- } else if (!dtd.hasChildNodes()) {
203
- return runtime.getNil();
204
- } else {
205
- // Import the node into doc so it has the correct owner document.
206
- dtd = doc.importNode(dtd, true);
207
- XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
208
- xmlDtd.setNode(runtime, dtd);
209
- return xmlDtd;
210
- }
211
- }
44
+ /** cache of name => XmlAttributeDecl */
45
+ protected RubyHash attributes = null;
212
46
 
213
- /*
214
- * <code>dtd</code> is the document node of a NekoDTD tree.
215
- * NekoDTD tree looks like this:
216
- *
217
- * <code><pre>
218
- * [#document: null]
219
- * [#comment: ...]
220
- * [#comment: ...]
221
- * [dtd: null] // a DocumentType; isDTD(node) => false
222
- * [dtd: null] // root of dtd, an Element node; isDTD(node) => true
223
- * ... decls, content models, etc. ...
224
- * [externalSubset: null] pubid="the pubid" sysid="the sysid"
225
- * ... external subset decls, etc. ...
226
- * </pre></code>
227
- */
228
- protected static Node getInternalSubset(Node dtdTree) {
229
- Node root;
230
- for (root = dtdTree.getFirstChild(); ; root = root.getNextSibling()) {
231
- if (root == null)
232
- return null;
233
- else if (isDTD(root))
234
- return root; // we have second dtd which is root
235
- }
236
- }
47
+ /** cache of name => XmlElementDecl */
48
+ protected RubyHash elements = null;
237
49
 
238
- protected static Node getExternalSubset(Node dtdTree) {
239
- Node dtd = getInternalSubset(dtdTree);
240
- if (dtd == null) return null;
241
- for (Node ext = dtd.getFirstChild(); ; ext = ext.getNextSibling()) {
242
- if (ext == null)
243
- return null;
244
- else if (isExternalSubset(ext))
245
- return ext;
246
- }
247
- }
50
+ /** cache of name => XmlEntityDecl */
51
+ protected RubyHash entities = null;
248
52
 
249
- /**
250
- * This overrides the #attributes method defined in
251
- * lib/nokogiri/xml/node.rb.
252
- */
253
- @JRubyMethod
254
- public IRubyObject attributes(ThreadContext context) {
255
- if (attributes == null) extractDecls(context);
53
+ /** cache of name => Nokogiri::XML::Notation */
54
+ protected RubyHash notations = null;
55
+ protected RubyClass notationClass;
256
56
 
257
- return attributes;
258
- }
57
+ /** temporary store of content models before they are added to
58
+ * their XmlElementDecl. */
59
+ protected RubyHash contentModels;
259
60
 
260
- @JRubyMethod
261
- public IRubyObject elements(ThreadContext context) {
262
- if (elements == null) extractDecls(context);
61
+ /** node name */
62
+ protected IRubyObject name;
263
63
 
264
- return elements;
265
- }
64
+ /** public ID (or external ID) */
65
+ protected IRubyObject pubId;
266
66
 
267
- @JRubyMethod
268
- public IRubyObject entities(ThreadContext context) {
269
- if (entities == null) extractDecls(context);
67
+ /** system ID */
68
+ protected IRubyObject sysId;
270
69
 
271
- return entities;
272
- }
70
+ public
71
+ XmlDtd(Ruby ruby, RubyClass rubyClass)
72
+ {
73
+ super(ruby, rubyClass);
74
+ }
273
75
 
274
- @JRubyMethod
275
- public IRubyObject notations(ThreadContext context) {
276
- if (notations == null) extractDecls(context);
76
+ public void
77
+ setNode(Ruby runtime, Node dtd)
78
+ {
79
+ this.node = dtd;
80
+ notationClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::Notation");
277
81
 
278
- return notations;
279
- }
82
+ name = pubId = sysId = runtime.getNil();
83
+ if (dtd == null) { return; }
280
84
 
281
- /**
282
- * Our "node" object is as-returned by NekoDTD. The actual
283
- * "children" that we're interested in (Attribute declarations,
284
- * etc.) are a few layers deep.
285
- */
286
- @Override
287
- @JRubyMethod
288
- public IRubyObject children(ThreadContext context) {
289
- if (children == null) extractDecls(context);
290
-
291
- return children;
292
- }
85
+ // This is the dtd declaration stored in the document; it
86
+ // contains the DTD name (root element) and public and system
87
+ // ids. The actual declarations are in the NekoDTD 'dtd'
88
+ // variable. I don't know of a way to consolidate the two.
293
89
 
294
- /**
295
- * Returns the name of the dtd.
296
- */
297
- @Override
298
- @JRubyMethod
299
- public IRubyObject node_name(ThreadContext context) {
300
- return name;
90
+ DocumentType otherDtd = dtd.getOwnerDocument().getDoctype();
91
+ if (otherDtd != null) {
92
+ name = stringOrNil(runtime, otherDtd.getNodeName());
93
+ pubId = nonEmptyStringOrNil(runtime, otherDtd.getPublicId());
94
+ sysId = nonEmptyStringOrNil(runtime, otherDtd.getSystemId());
301
95
  }
302
-
303
- @Override
304
- @JRubyMethod(name = "node_name=")
305
- public IRubyObject node_name_set(ThreadContext context, IRubyObject name) {
306
- throw context.getRuntime()
307
- .newRuntimeError("cannot change name of DTD");
96
+ }
97
+
98
+ public
99
+ XmlDtd(Ruby ruby, RubyClass rubyClass, Node dtd)
100
+ {
101
+ super(ruby, rubyClass, dtd);
102
+ setNode(ruby, dtd);
103
+ }
104
+
105
+ public static XmlDtd
106
+ newEmpty(Ruby runtime,
107
+ Document doc,
108
+ IRubyObject name,
109
+ IRubyObject external_id,
110
+ IRubyObject system_id)
111
+ {
112
+
113
+ DocumentType placeholder;
114
+ if (doc.getDoctype() == null) {
115
+ String javaName = NokogiriHelpers.rubyStringToString(name);
116
+ String javaExternalId = NokogiriHelpers.rubyStringToString(external_id);
117
+ String javaSystemId = NokogiriHelpers.rubyStringToString(system_id);
118
+ placeholder = doc.getImplementation().createDocumentType(javaName, javaExternalId, javaSystemId);
119
+ doc.appendChild(placeholder);
120
+ } else {
121
+ placeholder = doc.getDoctype();
308
122
  }
309
-
310
- @JRubyMethod
311
- public IRubyObject system_id(ThreadContext context) {
312
- return sysId;
123
+ // FIXME: what if the document had a doc type, why are we here ?
124
+ XmlDtd dtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
125
+ "Nokogiri::XML::DTD"));
126
+ dtd.setNode(runtime, placeholder);
127
+ dtd.name = name;
128
+ dtd.pubId = external_id;
129
+ dtd.sysId = system_id;
130
+ return dtd;
131
+ }
132
+
133
+
134
+ /**
135
+ * Create an unparented element that contains DTD declarations
136
+ * parsed from the internal subset attached as user data to
137
+ * <code>doc</code>. The attached dtd must be the tree from
138
+ * NekoDTD. The owner document of the returned tree will be
139
+ * <code>doc</doc>.
140
+ *
141
+ * NekoDTD parser returns a new document node containing elements
142
+ * representing the dtd declarations. The plan is to get the root
143
+ * element and adopt it into the correct document, stipping the
144
+ * Document provided by NekoDTD.
145
+ *
146
+ */
147
+ public static XmlDtd
148
+ newFromInternalSubset(Ruby runtime, Document doc)
149
+ {
150
+ Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT);
151
+ if (dtdTree_ == null) {
152
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
153
+ "Nokogiri::XML::DTD"));
154
+ xmlDtd.setNode(runtime, null);
155
+ return xmlDtd;
313
156
  }
314
157
 
315
- @JRubyMethod
316
- public IRubyObject external_id(ThreadContext context) {
317
- return pubId;
158
+ Node dtdTree = (Node) dtdTree_;
159
+ Node dtd = getInternalSubset(dtdTree);
160
+ if (dtd == null) {
161
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
162
+ "Nokogiri::XML::DTD"));
163
+ xmlDtd.setNode(runtime, null);
164
+ return xmlDtd;
165
+ } else {
166
+ // Import the node into doc so it has the correct owner document.
167
+ dtd = doc.importNode(dtd, true);
168
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
169
+ "Nokogiri::XML::DTD"));
170
+ xmlDtd.setNode(runtime, dtd);
171
+ return xmlDtd;
318
172
  }
319
-
320
- @JRubyMethod
321
- public IRubyObject validate(ThreadContext context, IRubyObject doc) {
322
- RubyArray errors = RubyArray.newArray(context.getRuntime());
323
- if (doc instanceof XmlDocument) {
324
- errors = (RubyArray) ((XmlDocument)doc).getInstanceVariable("@errors");
325
- }
326
- return errors;
173
+ }
174
+
175
+ public static IRubyObject
176
+ newFromExternalSubset(Ruby runtime, Document doc)
177
+ {
178
+ Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT);
179
+ if (dtdTree_ == null) {
180
+ return runtime.getNil();
327
181
  }
328
182
 
329
- public static boolean nameEquals(Node node, QName name) {
330
- return name.localpart.equals(node.getNodeName());
183
+ Node dtdTree = (Node) dtdTree_;
184
+ Node dtd = getExternalSubset(dtdTree);
185
+ if (dtd == null) {
186
+ return runtime.getNil();
187
+ } else if (!dtd.hasChildNodes()) {
188
+ return runtime.getNil();
189
+ } else {
190
+ // Import the node into doc so it has the correct owner document.
191
+ dtd = doc.importNode(dtd, true);
192
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
193
+ "Nokogiri::XML::DTD"));
194
+ xmlDtd.setNode(runtime, dtd);
195
+ return xmlDtd;
331
196
  }
332
-
333
- public static boolean isExternalSubset(Node node) {
334
- return nameEquals(node, DTDConfiguration.E_EXTERNAL_SUBSET);
197
+ }
198
+
199
+ /*
200
+ * <code>dtd</code> is the document node of a NekoDTD tree.
201
+ * NekoDTD tree looks like this:
202
+ *
203
+ * <code><pre>
204
+ * [#document: null]
205
+ * [#comment: ...]
206
+ * [#comment: ...]
207
+ * [dtd: null] // a DocumentType; isDTD(node) => false
208
+ * [dtd: null] // root of dtd, an Element node; isDTD(node) => true
209
+ * ... decls, content models, etc. ...
210
+ * [externalSubset: null] pubid="the pubid" sysid="the sysid"
211
+ * ... external subset decls, etc. ...
212
+ * </pre></code>
213
+ */
214
+ protected static Node
215
+ getInternalSubset(Node dtdTree)
216
+ {
217
+ Node root;
218
+ for (root = dtdTree.getFirstChild(); ; root = root.getNextSibling()) {
219
+ if (root == null) {
220
+ return null;
221
+ } else if (isDTD(root)) {
222
+ return root; // we have second dtd which is root
223
+ }
335
224
  }
336
-
337
- /**
338
- * Checks instanceof Element so we return false for a DocumentType
339
- * node (NekoDTD uses Element for all its nodes).
340
- */
341
- public static boolean isDTD(Node node) {
342
- return (node instanceof Element &&
343
- nameEquals(node, DTDConfiguration.E_DTD));
225
+ }
226
+
227
+ protected static Node
228
+ getExternalSubset(Node dtdTree)
229
+ {
230
+ Node dtd = getInternalSubset(dtdTree);
231
+ if (dtd == null) { return null; }
232
+ for (Node ext = dtd.getFirstChild(); ; ext = ext.getNextSibling()) {
233
+ if (ext == null) {
234
+ return null;
235
+ } else if (isExternalSubset(ext)) {
236
+ return ext;
237
+ }
344
238
  }
345
-
346
- public static boolean isAttributeDecl(Node node) {
347
- return nameEquals(node, DTDConfiguration.E_ATTRIBUTE_DECL);
239
+ }
240
+
241
+ /**
242
+ * This overrides the #attributes method defined in
243
+ * lib/nokogiri/xml/node.rb.
244
+ */
245
+ @JRubyMethod
246
+ public IRubyObject
247
+ attributes(ThreadContext context)
248
+ {
249
+ if (attributes == null) { extractDecls(context); }
250
+
251
+ return attributes;
252
+ }
253
+
254
+ @JRubyMethod
255
+ public IRubyObject
256
+ elements(ThreadContext context)
257
+ {
258
+ if (elements == null) { extractDecls(context); }
259
+
260
+ return elements;
261
+ }
262
+
263
+ @JRubyMethod
264
+ public IRubyObject
265
+ entities(ThreadContext context)
266
+ {
267
+ if (entities == null) { extractDecls(context); }
268
+
269
+ return entities;
270
+ }
271
+
272
+ @JRubyMethod
273
+ public IRubyObject
274
+ notations(ThreadContext context)
275
+ {
276
+ if (notations == null) { extractDecls(context); }
277
+
278
+ return notations;
279
+ }
280
+
281
+ /**
282
+ * Our "node" object is as-returned by NekoDTD. The actual
283
+ * "children" that we're interested in (Attribute declarations,
284
+ * etc.) are a few layers deep.
285
+ */
286
+ @Override
287
+ @JRubyMethod
288
+ public IRubyObject
289
+ children(ThreadContext context)
290
+ {
291
+ if (children == null) { extractDecls(context); }
292
+
293
+ return children;
294
+ }
295
+
296
+ /**
297
+ * Returns the name of the dtd.
298
+ */
299
+ @Override
300
+ @JRubyMethod
301
+ public IRubyObject
302
+ node_name(ThreadContext context)
303
+ {
304
+ return name;
305
+ }
306
+
307
+ @Override
308
+ @JRubyMethod(name = "node_name=")
309
+ public IRubyObject
310
+ node_name_set(ThreadContext context, IRubyObject name)
311
+ {
312
+ throw context.getRuntime()
313
+ .newRuntimeError("cannot change name of DTD");
314
+ }
315
+
316
+ @JRubyMethod
317
+ public IRubyObject
318
+ system_id(ThreadContext context)
319
+ {
320
+ return sysId;
321
+ }
322
+
323
+ @JRubyMethod
324
+ public IRubyObject
325
+ external_id(ThreadContext context)
326
+ {
327
+ return pubId;
328
+ }
329
+
330
+ @JRubyMethod
331
+ public IRubyObject
332
+ validate(ThreadContext context, IRubyObject doc)
333
+ {
334
+ RubyArray errors = RubyArray.newArray(context.getRuntime());
335
+ if (doc instanceof XmlDocument) {
336
+ errors = (RubyArray)((XmlDocument)doc).getInstanceVariable("@errors");
348
337
  }
349
-
350
- public static boolean isElementDecl(Node node) {
351
- return nameEquals(node, DTDConfiguration.E_ELEMENT_DECL);
338
+ return errors;
339
+ }
340
+
341
+ public static boolean
342
+ nameEquals(Node node, QName name)
343
+ {
344
+ return name.localpart.equals(node.getNodeName());
345
+ }
346
+
347
+ public static boolean
348
+ isExternalSubset(Node node)
349
+ {
350
+ return nameEquals(node, DTDConfiguration.E_EXTERNAL_SUBSET);
351
+ }
352
+
353
+ /**
354
+ * Checks instanceof Element so we return false for a DocumentType
355
+ * node (NekoDTD uses Element for all its nodes).
356
+ */
357
+ public static boolean
358
+ isDTD(Node node)
359
+ {
360
+ return (node instanceof Element &&
361
+ nameEquals(node, DTDConfiguration.E_DTD));
362
+ }
363
+
364
+ public static boolean
365
+ isAttributeDecl(Node node)
366
+ {
367
+ return nameEquals(node, DTDConfiguration.E_ATTRIBUTE_DECL);
368
+ }
369
+
370
+ public static boolean
371
+ isElementDecl(Node node)
372
+ {
373
+ return nameEquals(node, DTDConfiguration.E_ELEMENT_DECL);
374
+ }
375
+
376
+ public static boolean
377
+ isEntityDecl(Node node)
378
+ {
379
+ return (nameEquals(node, DTDConfiguration.E_INTERNAL_ENTITY_DECL) ||
380
+ nameEquals(node, DTDConfiguration.E_UNPARSED_ENTITY_DECL));
381
+ }
382
+
383
+ public static boolean
384
+ isNotationDecl(Node node)
385
+ {
386
+ return nameEquals(node, DTDConfiguration.E_NOTATION_DECL);
387
+ }
388
+
389
+ public static boolean
390
+ isContentModel(Node node)
391
+ {
392
+ return nameEquals(node, DTDConfiguration.E_CONTENT_MODEL);
393
+ }
394
+
395
+ /**
396
+ * Recursively extract various DTD declarations and store them in
397
+ * the various collections.
398
+ */
399
+ protected void
400
+ extractDecls(ThreadContext context)
401
+ {
402
+ Ruby runtime = context.runtime;
403
+
404
+ // initialize data structures
405
+ attributes = RubyHash.newHash(runtime);
406
+ elements = RubyHash.newHash(runtime);
407
+ entities = RubyHash.newHash(runtime);
408
+ notations = RubyHash.newHash(runtime);
409
+ contentModels = RubyHash.newHash(runtime);
410
+ children = runtime.getNil();
411
+
412
+ // recursively extract decls
413
+ if (node == null) { return; } // leave all the decl hash's empty
414
+
415
+ // convert allDecls to a NodeSet
416
+ children = XmlNodeSet.newNodeSet(runtime, extractDecls(context, node.getFirstChild()));
417
+
418
+ // add attribute decls as attributes to the matching element decl
419
+ RubyArray keys = attributes.keys();
420
+ for (int i = 0; i < keys.getLength(); ++i) {
421
+ IRubyObject akey = keys.entry(i);
422
+ IRubyObject val;
423
+
424
+ val = attributes.op_aref(context, akey);
425
+ if (val.isNil()) { continue; }
426
+ XmlAttributeDecl attrDecl = (XmlAttributeDecl) val;
427
+ IRubyObject ekey = attrDecl.element_name(context);
428
+ val = elements.op_aref(context, ekey);
429
+ if (val.isNil()) { continue; }
430
+ XmlElementDecl elemDecl = (XmlElementDecl) val;
431
+
432
+ elemDecl.appendAttrDecl(attrDecl);
352
433
  }
353
434
 
354
- public static boolean isEntityDecl(Node node) {
355
- return (nameEquals(node, DTDConfiguration.E_INTERNAL_ENTITY_DECL) ||
356
- nameEquals(node, DTDConfiguration.E_UNPARSED_ENTITY_DECL));
357
- }
435
+ // add content models to the matching element decl
436
+ keys = contentModels.keys();
437
+ for (int i = 0; i < keys.getLength(); ++i) {
438
+ IRubyObject key = keys.entry(i);
439
+ IRubyObject cm = contentModels.op_aref(context, key);
358
440
 
359
- public static boolean isNotationDecl(Node node) {
360
- return nameEquals(node, DTDConfiguration.E_NOTATION_DECL);
441
+ IRubyObject elem = elements.op_aref(context, key);
442
+ if (elem.isNil()) { continue; }
443
+ if (((XmlElementDecl)elem).isEmpty()) { continue; }
444
+ ((XmlElementDecl) elem).setContentModel(cm);
361
445
  }
362
-
363
- public static boolean isContentModel(Node node) {
364
- return nameEquals(node, DTDConfiguration.E_CONTENT_MODEL);
446
+ }
447
+
448
+ /**
449
+ * The <code>node</code> is either the first child of the root dtd
450
+ * node (as returned by getInternalSubset()) or the first child of
451
+ * the external subset node (as returned by getExternalSubset()).
452
+ *
453
+ * This recursive function will not descend into an
454
+ * 'externalSubset' node, thus for an internal subset it only
455
+ * extracts nodes in the internal subset, and for an external
456
+ * subset it extracts everything and assumess <code>node</code>
457
+ * and all children are part of the external subset.
458
+ */
459
+ protected IRubyObject[]
460
+ extractDecls(ThreadContext context, Node node)
461
+ {
462
+ List<IRubyObject> decls = new ArrayList<IRubyObject>();
463
+ while (node != null) {
464
+ if (isExternalSubset(node)) {
465
+ break;
466
+ } else if (isAttributeDecl(node)) {
467
+ XmlAttributeDecl decl = XmlAttributeDecl.create(context, node);
468
+ attributes.op_aset(context, decl.attribute_name(context), decl);
469
+ decls.add(decl);
470
+ } else if (isElementDecl(node)) {
471
+ XmlElementDecl decl = XmlElementDecl.create(context, node);
472
+ elements.op_aset(context, decl.element_name(context), decl);
473
+ decls.add(decl);
474
+ } else if (isEntityDecl(node)) {
475
+ XmlEntityDecl decl = XmlEntityDecl.create(context, node);
476
+ entities.op_aset(context, decl.node_name(context), decl);
477
+ decls.add(decl);
478
+ } else if (isNotationDecl(node)) {
479
+ XmlNode tmp = (XmlNode)
480
+ NokogiriHelpers.constructNode(context.getRuntime(), node);
481
+ IRubyObject decl = invoke(context, notationClass, "new",
482
+ tmp.getAttribute(context, "name"),
483
+ tmp.getAttribute(context, "pubid"),
484
+ tmp.getAttribute(context, "sysid"));
485
+ notations.op_aset(context,
486
+ tmp.getAttribute(context, "name"), decl);
487
+ decls.add(decl);
488
+ } else if (isContentModel(node)) {
489
+ XmlElementContent cm =
490
+ new XmlElementContent(context.getRuntime(),
491
+ (XmlDocument) document(context),
492
+ node);
493
+ contentModels.op_aset(context, cm.element_name(context), cm);
494
+ } else {
495
+ // recurse
496
+ decls.addAll(Arrays.asList(extractDecls(context, node.getFirstChild())));
497
+ }
498
+
499
+ node = node.getNextSibling();
365
500
  }
366
501
 
367
- /**
368
- * Recursively extract various DTD declarations and store them in
369
- * the various collections.
370
- */
371
- protected void extractDecls(ThreadContext context) {
372
- Ruby runtime = context.runtime;
373
-
374
- // initialize data structures
375
- attributes = RubyHash.newHash(runtime);
376
- elements = RubyHash.newHash(runtime);
377
- entities = RubyHash.newHash(runtime);
378
- notations = RubyHash.newHash(runtime);
379
- contentModels = RubyHash.newHash(runtime);
380
- children = runtime.getNil();
381
-
382
- // recursively extract decls
383
- if (node == null) return; // leave all the decl hash's empty
384
-
385
- // convert allDecls to a NodeSet
386
- children = XmlNodeSet.newNodeSet(runtime, extractDecls(context, node.getFirstChild()));
387
-
388
- // add attribute decls as attributes to the matching element decl
389
- RubyArray keys = attributes.keys();
390
- for (int i = 0; i < keys.getLength(); ++i) {
391
- IRubyObject akey = keys.entry(i);
392
- IRubyObject val;
393
-
394
- val = attributes.op_aref(context, akey);
395
- if (val.isNil()) continue;
396
- XmlAttributeDecl attrDecl = (XmlAttributeDecl) val;
397
- IRubyObject ekey = attrDecl.element_name(context);
398
- val = elements.op_aref(context, ekey);
399
- if (val.isNil()) continue;
400
- XmlElementDecl elemDecl = (XmlElementDecl) val;
401
-
402
- elemDecl.appendAttrDecl(attrDecl);
403
- }
404
-
405
- // add content models to the matching element decl
406
- keys = contentModels.keys();
407
- for (int i = 0; i < keys.getLength(); ++i) {
408
- IRubyObject key = keys.entry(i);
409
- IRubyObject cm = contentModels.op_aref(context, key);
410
-
411
- IRubyObject elem = elements.op_aref(context, key);
412
- if (elem.isNil()) continue;
413
- if (((XmlElementDecl)elem).isEmpty()) continue;
414
- ((XmlElementDecl) elem).setContentModel(cm);
415
- }
416
- }
417
-
418
- /**
419
- * The <code>node</code> is either the first child of the root dtd
420
- * node (as returned by getInternalSubset()) or the first child of
421
- * the external subset node (as returned by getExternalSubset()).
422
- *
423
- * This recursive function will not descend into an
424
- * 'externalSubset' node, thus for an internal subset it only
425
- * extracts nodes in the internal subset, and for an external
426
- * subset it extracts everything and assumess <code>node</code>
427
- * and all children are part of the external subset.
428
- */
429
- protected IRubyObject[] extractDecls(ThreadContext context, Node node) {
430
- List<IRubyObject> decls = new ArrayList<IRubyObject>();
431
- while (node != null) {
432
- if (isExternalSubset(node)) {
433
- break;
434
- } else if (isAttributeDecl(node)) {
435
- XmlAttributeDecl decl = XmlAttributeDecl.create(context, node);
436
- attributes.op_aset(context, decl.attribute_name(context), decl);
437
- decls.add(decl);
438
- } else if (isElementDecl(node)) {
439
- XmlElementDecl decl = XmlElementDecl.create(context, node);
440
- elements.op_aset(context, decl.element_name(context), decl);
441
- decls.add(decl);
442
- } else if (isEntityDecl(node)) {
443
- XmlEntityDecl decl = XmlEntityDecl.create(context, node);
444
- entities.op_aset(context, decl.node_name(context), decl);
445
- decls.add(decl);
446
- } else if (isNotationDecl(node)) {
447
- XmlNode tmp = (XmlNode)
448
- NokogiriHelpers.constructNode(context.getRuntime(), node);
449
- IRubyObject decl = invoke(context, notationClass, "new",
450
- tmp.getAttribute(context, "name"),
451
- tmp.getAttribute(context, "pubid"),
452
- tmp.getAttribute(context, "sysid"));
453
- notations.op_aset(context,
454
- tmp.getAttribute(context, "name"), decl);
455
- decls.add(decl);
456
- } else if (isContentModel(node)) {
457
- XmlElementContent cm =
458
- new XmlElementContent(context.getRuntime(),
459
- (XmlDocument) document(context),
460
- node);
461
- contentModels.op_aset(context, cm.element_name(context), cm);
462
- } else {
463
- // recurse
464
- decls.addAll(Arrays.asList(extractDecls(context, node.getFirstChild())));
465
- }
466
-
467
- node = node.getNextSibling();
468
- }
469
-
470
- return decls.toArray(new IRubyObject[decls.size()]);
471
- }
472
-
473
- @Override
474
- public void accept(ThreadContext context, SaveContextVisitor visitor) {
475
- // since we use nekoDTD to parse dtd, node might be ElementImpl type
476
- // An external subset doesn't need to show up, so this method just see docType.
477
- DocumentType docType = node.getOwnerDocument().getDoctype();
478
- visitor.enter(docType);
479
- visitor.leave(docType);
480
- }
502
+ return decls.toArray(new IRubyObject[decls.size()]);
503
+ }
504
+
505
+ @Override
506
+ public void
507
+ accept(ThreadContext context, SaveContextVisitor visitor)
508
+ {
509
+ // since we use nekoDTD to parse dtd, node might be ElementImpl type
510
+ // An external subset doesn't need to show up, so this method just see docType.
511
+ DocumentType docType = node.getOwnerDocument().getDoctype();
512
+ visitor.enter(docType);
513
+ visitor.leave(docType);
514
+ }
481
515
  }