nokogiri 1.11.1-java → 1.11.2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +12 -12
  3. data/LICENSE.md +1 -1
  4. data/README.md +20 -15
  5. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  6. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  7. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  8. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  9. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  10. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  11. data/ext/java/nokogiri/NokogiriService.java +597 -526
  12. data/ext/java/nokogiri/XmlAttr.java +120 -96
  13. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  14. data/ext/java/nokogiri/XmlCdata.java +35 -26
  15. data/ext/java/nokogiri/XmlComment.java +48 -37
  16. data/ext/java/nokogiri/XmlDocument.java +642 -540
  17. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  18. data/ext/java/nokogiri/XmlDtd.java +450 -384
  19. data/ext/java/nokogiri/XmlElement.java +25 -18
  20. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  21. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  22. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  23. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  24. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  25. data/ext/java/nokogiri/XmlNode.java +1843 -1588
  26. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  27. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  28. data/ext/java/nokogiri/XmlReader.java +513 -418
  29. data/ext/java/nokogiri/XmlRelaxng.java +91 -78
  30. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -285
  31. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  32. data/ext/java/nokogiri/XmlSchema.java +328 -263
  33. data/ext/java/nokogiri/XmlSyntaxError.java +113 -83
  34. data/ext/java/nokogiri/XmlText.java +57 -46
  35. data/ext/java/nokogiri/XmlXpathContext.java +240 -206
  36. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  37. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  38. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +199 -168
  39. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  40. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  41. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  42. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  43. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  44. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  45. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  46. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +83 -68
  48. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  50. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  51. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +118 -101
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -24
  53. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  54. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  55. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  56. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  57. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  58. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  59. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  60. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  61. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  62. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  63. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  64. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  65. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  66. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  67. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  77. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  80. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  81. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  82. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  83. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  84. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  85. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  86. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  87. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  88. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  89. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  90. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  92. data/ext/nokogiri/depend +34 -474
  93. data/ext/nokogiri/extconf.rb +253 -183
  94. data/ext/nokogiri/html_document.c +10 -15
  95. data/ext/nokogiri/html_element_description.c +84 -71
  96. data/ext/nokogiri/html_entity_lookup.c +21 -16
  97. data/ext/nokogiri/html_sax_parser_context.c +66 -65
  98. data/ext/nokogiri/html_sax_push_parser.c +29 -27
  99. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  100. data/ext/nokogiri/nokogiri.c +171 -63
  101. data/ext/nokogiri/test_global_handlers.c +3 -4
  102. data/ext/nokogiri/xml_attr.c +15 -15
  103. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  104. data/ext/nokogiri/xml_cdata.c +13 -18
  105. data/ext/nokogiri/xml_comment.c +19 -26
  106. data/ext/nokogiri/xml_document.c +221 -164
  107. data/ext/nokogiri/xml_document_fragment.c +13 -15
  108. data/ext/nokogiri/xml_dtd.c +54 -48
  109. data/ext/nokogiri/xml_element_content.c +30 -27
  110. data/ext/nokogiri/xml_element_decl.c +22 -22
  111. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  112. data/ext/nokogiri/xml_entity_decl.c +32 -30
  113. data/ext/nokogiri/xml_entity_reference.c +16 -18
  114. data/ext/nokogiri/xml_namespace.c +56 -49
  115. data/ext/nokogiri/xml_node.c +338 -286
  116. data/ext/nokogiri/xml_node_set.c +168 -156
  117. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  118. data/ext/nokogiri/xml_reader.c +191 -157
  119. data/ext/nokogiri/xml_relax_ng.c +29 -23
  120. data/ext/nokogiri/xml_sax_parser.c +117 -112
  121. data/ext/nokogiri/xml_sax_parser_context.c +100 -85
  122. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  123. data/ext/nokogiri/xml_schema.c +48 -42
  124. data/ext/nokogiri/xml_syntax_error.c +21 -23
  125. data/ext/nokogiri/xml_text.c +13 -17
  126. data/ext/nokogiri/xml_xpath_context.c +134 -127
  127. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  128. data/lib/nokogiri.rb +1 -22
  129. data/lib/nokogiri/css/parser.rb +1 -1
  130. data/lib/nokogiri/extension.rb +26 -0
  131. data/lib/nokogiri/html/document_fragment.rb +15 -15
  132. data/lib/nokogiri/nokogiri.jar +0 -0
  133. data/lib/nokogiri/version/constant.rb +1 -1
  134. data/lib/nokogiri/version/info.rb +31 -8
  135. data/lib/nokogiri/xml/document.rb +31 -11
  136. data/lib/nokogiri/xml/node.rb +38 -42
  137. data/lib/nokogiri/xml/reader.rb +2 -9
  138. data/lib/nokogiri/xml/xpath.rb +1 -3
  139. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  140. metadata +7 -8
  141. data/ext/nokogiri/xml_io.c +0 -63
  142. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -74,661 +74,777 @@ import nokogiri.XmlXpathContext;
74
74
 
75
75
  /**
76
76
  * A class for various utility methods.
77
- *
77
+ *
78
78
  * @author serabe
79
79
  * @author Patrick Mahoney <pat@polycrystal.org>
80
80
  * @author Yoko Harada <yokolet@gmail.com>
81
81
  */
82
- public class NokogiriHelpers {
83
- public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
84
- public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
85
- public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
86
-
87
- public static XmlNode getCachedNode(Node node) {
88
- return (XmlNode) node.getUserData(CACHED_NODE);
89
- }
90
-
91
- public static void clearCachedNode(Node node) {
92
- node.setUserData(CACHED_NODE, null, null);
93
- }
94
-
95
- public static void clearXpathContext(Node node) {
96
- if (node == null) return;
97
-
98
- Node ownerDocument = node.getOwnerDocument();
99
- if (ownerDocument == null) {
100
- ownerDocument = node;
82
+ public class NokogiriHelpers
83
+ {
84
+ public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
85
+ public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
86
+ public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
87
+
88
+ public static XmlNode
89
+ getCachedNode(Node node)
90
+ {
91
+ return (XmlNode) node.getUserData(CACHED_NODE);
92
+ }
93
+
94
+ public static void
95
+ clearCachedNode(Node node)
96
+ {
97
+ node.setUserData(CACHED_NODE, null, null);
98
+ }
99
+
100
+ public static void
101
+ clearXpathContext(Node node)
102
+ {
103
+ if (node == null) { return; }
104
+
105
+ Node ownerDocument = node.getOwnerDocument();
106
+ if (ownerDocument == null) {
107
+ ownerDocument = node;
108
+ }
109
+ ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
110
+ }
111
+
112
+ /**
113
+ * Get the XmlNode associated with the underlying
114
+ * <code>node</code>. Creates a new XmlNode (or appropriate subclass)
115
+ * or XmlNamespace wrapping <code>node</code> if there is no cached
116
+ * value.
117
+ */
118
+ public static IRubyObject
119
+ getCachedNodeOrCreate(Ruby runtime, Node node)
120
+ {
121
+ if (node == null) { return runtime.getNil(); }
122
+ if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
123
+ XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
124
+ if (!(xmlDocument instanceof HtmlDocument)) {
125
+ String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
126
+ String href = ((Attr) node).getValue();
127
+ XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
128
+ if (xmlNamespace != null) { return xmlNamespace; }
129
+ return XmlNamespace.createFromAttr(runtime, (Attr) node);
130
+ }
131
+ }
132
+ XmlNode xmlNode = getCachedNode(node);
133
+ if (xmlNode == null) {
134
+ xmlNode = (XmlNode) constructNode(runtime, node);
135
+ node.setUserData(CACHED_NODE, xmlNode, null);
136
+ }
137
+ return xmlNode;
138
+ }
139
+
140
+ /**
141
+ * Construct a new XmlNode wrapping <code>node</code>. The proper
142
+ * subclass of XmlNode is chosen based on the type of
143
+ * <code>node</code>.
144
+ */
145
+ public static IRubyObject
146
+ constructNode(Ruby runtime, Node node)
147
+ {
148
+ if (node == null) { return runtime.getNil(); }
149
+ // this is slow; need a way to cache nokogiri classes/modules somewhere
150
+ switch (node.getNodeType()) {
151
+ case Node.ELEMENT_NODE:
152
+ XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
153
+ "Nokogiri::XML::Element"));
154
+ xmlElement.setNode(runtime, node);
155
+ return xmlElement;
156
+ case Node.ATTRIBUTE_NODE:
157
+ XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
158
+ "Nokogiri::XML::Attr"));
159
+ xmlAttr.setNode(runtime, node);
160
+ return xmlAttr;
161
+ case Node.TEXT_NODE:
162
+ XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
163
+ "Nokogiri::XML::Text"));
164
+ xmlText.setNode(runtime, node);
165
+ return xmlText;
166
+ case Node.COMMENT_NODE:
167
+ XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
168
+ "Nokogiri::XML::Comment"));
169
+ xmlComment.setNode(runtime, node);
170
+ return xmlComment;
171
+ case Node.ENTITY_NODE:
172
+ return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
173
+ case Node.ENTITY_REFERENCE_NODE:
174
+ XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime,
175
+ getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
176
+ xmlEntityRef.setNode(runtime, node);
177
+ return xmlEntityRef;
178
+ case Node.PROCESSING_INSTRUCTION_NODE:
179
+ XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction)
180
+ NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
181
+ "Nokogiri::XML::ProcessingInstruction"));
182
+ xmlProcessingInstruction.setNode(runtime, node);
183
+ return xmlProcessingInstruction;
184
+ case Node.CDATA_SECTION_NODE:
185
+ XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
186
+ "Nokogiri::XML::CDATA"));
187
+ xmlCdata.setNode(runtime, node);
188
+ return xmlCdata;
189
+ case Node.DOCUMENT_NODE:
190
+ XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime,
191
+ getNokogiriClass(runtime, "Nokogiri::XML::Document"));
192
+ xmlDocument.setDocumentNode(runtime, (Document) node);
193
+ return xmlDocument;
194
+ case Node.DOCUMENT_TYPE_NODE:
195
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
196
+ "Nokogiri::XML::DTD"));
197
+ xmlDtd.setNode(runtime, node);
198
+ return xmlDtd;
199
+ default:
200
+ XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
201
+ "Nokogiri::XML::Node"));
202
+ xmlNode.setNode(runtime, node);
203
+ return xmlNode;
204
+ }
205
+ }
206
+
207
+ public static RubyClass
208
+ getNokogiriClass(Ruby ruby, String name)
209
+ {
210
+ return NokogiriService.getNokogiriClassCache(ruby).get(name);
211
+ }
212
+
213
+ public static IRubyObject
214
+ stringOrNil(Ruby runtime, String str)
215
+ {
216
+ return str == null ? runtime.getNil() : convertString(runtime, str);
217
+ }
218
+
219
+ public static IRubyObject
220
+ stringOrNil(Ruby runtime, CharSequence str)
221
+ {
222
+ return str == null ? runtime.getNil() : convertString(runtime, str);
223
+ }
224
+
225
+ public static IRubyObject
226
+ stringOrNil(Ruby runtime, byte[] bytes)
227
+ {
228
+ return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
229
+ }
230
+
231
+ public static IRubyObject
232
+ stringOrBlank(Ruby runtime, String str)
233
+ {
234
+ return str == null ? runtime.newString() : convertString(runtime, str);
235
+ }
236
+
237
+ public static RubyString
238
+ convertString(Ruby runtime, String str)
239
+ {
240
+ return RubyString.newUTF8String(runtime, str);
241
+ }
242
+
243
+ public static RubyString
244
+ convertString(Ruby runtime, CharSequence str)
245
+ {
246
+ return RubyString.newUTF8String(runtime, str);
247
+ }
248
+
249
+ /**
250
+ * Convert <code>s</code> to a RubyString, or if s is null or
251
+ * empty return RubyNil.
252
+ */
253
+ public static IRubyObject
254
+ nonEmptyStringOrNil(Ruby runtime, String s)
255
+ {
256
+ if (s == null || s.length() == 0) { return runtime.getNil(); }
257
+ return RubyString.newString(runtime, s);
258
+ }
259
+
260
+ /**
261
+ * Return the prefix of a qualified name like "prefix:local".
262
+ * Returns null if there is no prefix.
263
+ */
264
+ public static String
265
+ getPrefix(String qName)
266
+ {
267
+ if (qName == null) { return null; }
268
+
269
+ final int pos = qName.indexOf(':');
270
+ return pos > 0 ? qName.substring(0, pos) : null;
271
+ }
272
+
273
+ /**
274
+ * Return the local part of a qualified name like "prefix:local".
275
+ * Returns <code>qName</code> if there is no prefix.
276
+ */
277
+ public static String
278
+ getLocalPart(String qName)
279
+ {
280
+ if (qName == null) { return null; }
281
+
282
+ final int pos = qName.indexOf(':');
283
+ return pos > 0 ? qName.substring(pos + 1) : qName;
284
+ }
285
+
286
+ public static String
287
+ getLocalNameForNamespace(String name, String defValue)
288
+ {
289
+ String localName = getLocalPart(name);
290
+ return ("xmlns".equals(localName)) ? defValue : localName;
291
+ }
292
+
293
+ public static String
294
+ rubyStringToString(IRubyObject str)
295
+ {
296
+ if (str.isNil()) { return null; }
297
+ return str.convertToString().decodeString();
298
+ }
299
+
300
+ public static String
301
+ rubyStringToString(RubyString str)
302
+ {
303
+ return str.decodeString(); // if encoding UTF-8 will decode UTF-8
304
+ }
305
+
306
+ public static ByteArrayInputStream
307
+ stringBytesToStream(final IRubyObject str)
308
+ {
309
+ if (str instanceof RubyString || str.respondsTo("to_str")) {
310
+ final ByteList bytes = str.convertToString().getByteList();
311
+ return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
312
+ }
313
+ return null;
314
+ }
315
+
316
+ public static String
317
+ getNodeCompletePath(Node node)
318
+ {
319
+
320
+ Node cur, tmp, next;
321
+
322
+ String buffer = "";
323
+
324
+ cur = node;
325
+
326
+ do {
327
+ String name = "";
328
+ String sep = "?";
329
+ int occur = 0;
330
+ boolean generic = false;
331
+
332
+ if (cur.getNodeType() == Node.DOCUMENT_NODE) {
333
+ if (buffer.startsWith("/")) { break; }
334
+
335
+ sep = "/";
336
+ next = null;
337
+ } else if (cur.getNodeType() == Node.ELEMENT_NODE) {
338
+ generic = false;
339
+ sep = "/";
340
+
341
+ name = cur.getLocalName();
342
+ if (name == null) { name = cur.getNodeName(); }
343
+ if (cur.getNamespaceURI() != null) {
344
+ if (cur.getPrefix() != null) {
345
+ name = cur.getPrefix() + ":" + name;
346
+ } else {
347
+ generic = true;
348
+ name = "*";
349
+ }
101
350
  }
102
- ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
103
- }
104
351
 
105
- /**
106
- * Get the XmlNode associated with the underlying
107
- * <code>node</code>. Creates a new XmlNode (or appropriate subclass)
108
- * or XmlNamespace wrapping <code>node</code> if there is no cached
109
- * value.
110
- */
111
- public static IRubyObject getCachedNodeOrCreate(Ruby runtime, Node node) {
112
- if (node == null) return runtime.getNil();
113
- if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
114
- XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
115
- if (!(xmlDocument instanceof HtmlDocument)) {
116
- String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
117
- String href = ((Attr) node).getValue();
118
- XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
119
- if (xmlNamespace != null) return xmlNamespace;
120
- return XmlNamespace.createFromAttr(runtime, (Attr) node);
121
- }
122
- }
123
- XmlNode xmlNode = getCachedNode(node);
124
- if (xmlNode == null) {
125
- xmlNode = (XmlNode) constructNode(runtime, node);
126
- node.setUserData(CACHED_NODE, xmlNode, null);
127
- }
128
- return xmlNode;
129
- }
352
+ next = cur.getParentNode();
130
353
 
131
- /**
132
- * Construct a new XmlNode wrapping <code>node</code>. The proper
133
- * subclass of XmlNode is chosen based on the type of
134
- * <code>node</code>.
135
- */
136
- public static IRubyObject constructNode(Ruby runtime, Node node) {
137
- if (node == null) return runtime.getNil();
138
- // this is slow; need a way to cache nokogiri classes/modules somewhere
139
- switch (node.getNodeType()) {
140
- case Node.ELEMENT_NODE:
141
- XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Element"));
142
- xmlElement.setNode(runtime, node);
143
- return xmlElement;
144
- case Node.ATTRIBUTE_NODE:
145
- XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Attr"));
146
- xmlAttr.setNode(runtime, node);
147
- return xmlAttr;
148
- case Node.TEXT_NODE:
149
- XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Text"));
150
- xmlText.setNode(runtime, node);
151
- return xmlText;
152
- case Node.COMMENT_NODE:
153
- XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Comment"));
154
- xmlComment.setNode(runtime, node);
155
- return xmlComment;
156
- case Node.ENTITY_NODE:
157
- return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
158
- case Node.ENTITY_REFERENCE_NODE:
159
- XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
160
- xmlEntityRef.setNode(runtime, node);
161
- return xmlEntityRef;
162
- case Node.PROCESSING_INSTRUCTION_NODE:
163
- XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ProcessingInstruction"));
164
- xmlProcessingInstruction.setNode(runtime, node);
165
- return xmlProcessingInstruction;
166
- case Node.CDATA_SECTION_NODE:
167
- XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::CDATA"));
168
- xmlCdata.setNode(runtime, node);
169
- return xmlCdata;
170
- case Node.DOCUMENT_NODE:
171
- XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"));
172
- xmlDocument.setDocumentNode(runtime, (Document) node);
173
- return xmlDocument;
174
- case Node.DOCUMENT_TYPE_NODE:
175
- XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
176
- xmlDtd.setNode(runtime, node);
177
- return xmlDtd;
178
- default:
179
- XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Node"));
180
- xmlNode.setNode(runtime, node);
181
- return xmlNode;
182
- }
183
- }
184
-
185
- public static RubyClass getNokogiriClass(Ruby ruby, String name) {
186
- return NokogiriService.getNokogiriClassCache(ruby).get(name);
187
- }
188
-
189
- public static IRubyObject stringOrNil(Ruby runtime, String str) {
190
- return str == null ? runtime.getNil() : convertString(runtime, str);
191
- }
192
-
193
- public static IRubyObject stringOrNil(Ruby runtime, CharSequence str) {
194
- return str == null ? runtime.getNil() : convertString(runtime, str);
195
- }
354
+ /*
355
+ * Thumbler index computation
356
+ */
196
357
 
197
- public static IRubyObject stringOrNil(Ruby runtime, byte[] bytes) {
198
- return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
199
- }
200
-
201
- public static IRubyObject stringOrBlank(Ruby runtime, String str) {
202
- return str == null ? runtime.newString() : convertString(runtime, str);
203
- }
204
-
205
- public static RubyString convertString(Ruby runtime, String str) {
206
- return RubyString.newUTF8String(runtime, str);
207
- }
208
-
209
- public static RubyString convertString(Ruby runtime, CharSequence str) {
210
- return RubyString.newUTF8String(runtime, str);
211
- }
358
+ tmp = cur.getPreviousSibling();
212
359
 
213
- /**
214
- * Convert <code>s</code> to a RubyString, or if s is null or
215
- * empty return RubyNil.
216
- */
217
- public static IRubyObject nonEmptyStringOrNil(Ruby runtime, String s) {
218
- if (s == null || s.length() == 0) return runtime.getNil();
219
- return RubyString.newString(runtime, s);
220
- }
221
-
222
- /**
223
- * Return the prefix of a qualified name like "prefix:local".
224
- * Returns null if there is no prefix.
225
- */
226
- public static String getPrefix(String qName) {
227
- if (qName == null) return null;
228
-
229
- final int pos = qName.indexOf(':');
230
- return pos > 0 ? qName.substring(0, pos) : null;
231
- }
232
-
233
- /**
234
- * Return the local part of a qualified name like "prefix:local".
235
- * Returns <code>qName</code> if there is no prefix.
236
- */
237
- public static String getLocalPart(String qName) {
238
- if (qName == null) return null;
239
-
240
- final int pos = qName.indexOf(':');
241
- return pos > 0 ? qName.substring(pos + 1) : qName;
242
- }
243
-
244
- public static String getLocalNameForNamespace(String name, String defValue) {
245
- String localName = getLocalPart(name);
246
- return ("xmlns".equals(localName)) ? defValue : localName;
247
- }
248
-
249
- public static String rubyStringToString(IRubyObject str) {
250
- if (str.isNil()) return null;
251
- return str.convertToString().decodeString();
252
- }
253
-
254
- public static String rubyStringToString(RubyString str) {
255
- return str.decodeString(); // if encoding UTF-8 will decode UTF-8
256
- }
257
-
258
- public static ByteArrayInputStream stringBytesToStream(final IRubyObject str) {
259
- if (str instanceof RubyString || str.respondsTo("to_str")) {
260
- final ByteList bytes = str.convertToString().getByteList();
261
- return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
360
+ while (tmp != null) {
361
+ if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
362
+ (generic || fullNamesMatch(tmp, cur))) {
363
+ occur++;
364
+ }
365
+ tmp = tmp.getPreviousSibling();
262
366
  }
263
- return null;
264
- }
265
367
 
266
- public static String getNodeCompletePath(Node node) {
267
-
268
- Node cur, tmp, next;
269
-
270
- String buffer = "";
271
-
272
- cur = node;
273
-
274
- do {
275
- String name = "";
276
- String sep = "?";
277
- int occur = 0;
278
- boolean generic = false;
279
-
280
- if(cur.getNodeType() == Node.DOCUMENT_NODE) {
281
- if(buffer.startsWith("/")) break;
282
-
283
- sep = "/";
284
- next = null;
285
- } else if(cur.getNodeType() == Node.ELEMENT_NODE) {
286
- generic = false;
287
- sep = "/";
288
-
289
- name = cur.getLocalName();
290
- if (name == null) name = cur.getNodeName();
291
- if(cur.getNamespaceURI() != null) {
292
- if(cur.getPrefix() != null) {
293
- name = cur.getPrefix() + ":" + name;
294
- } else {
295
- generic = true;
296
- name = "*";
297
- }
298
- }
299
-
300
- next = cur.getParentNode();
301
-
302
- /*
303
- * Thumbler index computation
304
- */
305
-
306
- tmp = cur.getPreviousSibling();
307
-
308
- while(tmp != null) {
309
- if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
310
- (generic || fullNamesMatch(tmp, cur))) {
311
- occur++;
312
- }
313
- tmp = tmp.getPreviousSibling();
314
- }
315
-
316
- if(occur == 0) {
317
- tmp = cur.getNextSibling();
318
-
319
- while(tmp != null && occur == 0) {
320
- if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
321
- (generic || fullNamesMatch(tmp,cur))) {
322
- occur++;
323
- }
324
- tmp = tmp.getNextSibling();
325
- }
326
-
327
- if(occur != 0) occur = 1;
328
-
329
- } else {
330
- occur++;
331
- }
332
- } else if(cur.getNodeType() == Node.COMMENT_NODE) {
333
- sep = "/";
334
- name = "comment()";
335
- next = cur.getParentNode();
336
-
337
- /*
338
- * Thumbler index computation.
339
- */
340
-
341
- tmp = cur.getPreviousSibling();
342
-
343
- while(tmp != null) {
344
- if(tmp.getNodeType() == Node.COMMENT_NODE) {
345
- occur++;
346
- }
347
- tmp = tmp.getPreviousSibling();
348
- }
349
-
350
- if(occur == 0) {
351
- tmp = cur.getNextSibling();
352
- while(tmp != null && occur == 0) {
353
- if(tmp.getNodeType() == Node.COMMENT_NODE) {
354
- occur++;
355
- }
356
- tmp = tmp.getNextSibling();
357
- }
358
- if(occur != 0) occur = 1;
359
- } else {
360
- occur = 1;
361
- }
362
-
363
- } else if(cur.getNodeType() == Node.TEXT_NODE ||
364
- cur.getNodeType() == Node.CDATA_SECTION_NODE) {
365
- // I'm here. gist:129
366
- // http://gist.github.com/144923
367
-
368
- sep = "/";
369
- name = "text()";
370
- next = cur.getParentNode();
371
-
372
- /*
373
- * Thumbler index computation.
374
- */
375
-
376
- tmp = cur.getPreviousSibling();
377
- while(tmp != null) {
378
- if(tmp.getNodeType() == Node.TEXT_NODE ||
379
- tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
380
- occur++;
381
- }
382
- tmp = tmp.getPreviousSibling();
383
- }
384
-
385
- if(occur == 0) {
386
- tmp = cur.getNextSibling();
387
-
388
- while(tmp != null && occur == 0) {
389
- if(tmp.getNodeType() == Node.TEXT_NODE ||
390
- tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
391
- occur++;
392
- }
393
- tmp = tmp.getNextSibling();
394
- }
395
- } else {
396
- occur++;
397
- }
398
-
399
- } else if(cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
400
- sep = "/";
401
- name = "processing-instruction('"+cur.getLocalName()+"')";
402
- next = cur.getParentNode();
403
-
404
- /*
405
- * Thumbler index computation.
406
- */
407
-
408
- tmp = cur.getParentNode();
409
-
410
- while(tmp != null) {
411
- if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
412
- tmp.getLocalName().equals(cur.getLocalName())) {
413
- occur++;
414
- }
415
- tmp = tmp.getPreviousSibling();
416
- }
417
-
418
- if(occur == 0) {
419
- tmp = cur.getNextSibling();
420
-
421
- while(tmp != null && occur == 0) {
422
- if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
423
- tmp.getLocalName().equals(cur.getLocalName())){
424
- occur++;
425
- }
426
- tmp = tmp.getNextSibling();
427
- }
428
-
429
- if(occur != 0) {
430
- occur = 1;
431
- }
432
-
433
- } else {
434
- occur++;
435
- }
436
-
437
- } else if(cur.getNodeType() == Node.ATTRIBUTE_NODE) {
438
- sep = "/@";
439
- name = cur.getLocalName();
440
-
441
- if(cur.getNamespaceURI() != null) {
442
- if(cur.getPrefix() != null) {
443
- name = cur.getPrefix() + ":" + name;
444
- }
445
- }
446
-
447
- next = ((Attr) cur).getOwnerElement();
448
-
449
- } else {
450
- next = cur.getParentNode();
451
- }
368
+ if (occur == 0) {
369
+ tmp = cur.getNextSibling();
452
370
 
453
- if(occur == 0){
454
- buffer = sep+name+buffer;
455
- } else {
456
- buffer = sep+name+"["+occur+"]"+buffer;
371
+ while (tmp != null && occur == 0) {
372
+ if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
373
+ (generic || fullNamesMatch(tmp, cur))) {
374
+ occur++;
457
375
  }
376
+ tmp = tmp.getNextSibling();
377
+ }
458
378
 
459
- cur = next;
460
-
461
- } while(cur != null);
379
+ if (occur != 0) { occur = 1; }
462
380
 
463
- return buffer;
464
- }
465
-
466
- static boolean compareTwoNodes(Node m, Node n) {
467
- return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
468
- nodesAreEqual(m.getPrefix(), n.getPrefix());
469
- }
470
-
471
- private static boolean nodesAreEqual(Object a, Object b) {
472
- return (((a == null) && (b == null)) ||
473
- ((a != null) && (b != null) && (b.equals(a))));
474
- }
475
-
476
- private static boolean fullNamesMatch(Node a, Node b) {
477
- return a.getNodeName().equals(b.getNodeName());
478
- }
479
-
480
- private static final Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
481
- private static final String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
482
- private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
483
- private static final String[] decoded = {"&", ">", "<", "\r"};
484
-
485
- private static StringBuffer convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars) {
486
- Matcher matcher = ptn.matcher(input);
487
- boolean result = matcher.find();
488
- StringBuffer sb = new StringBuffer(input.length() + 8);
489
- while (result) {
490
- String matched = matcher.group();
491
- String replacement = "";
492
- for (int i=0; i<oldChars.length; i++) {
493
- if (matched.contains(oldChars[i])) {
494
- replacement = matched.replace(oldChars[i], newChars[i]);
495
- break;
496
- }
497
- }
498
- matcher.appendReplacement(sb, replacement);
499
- result = matcher.find();
381
+ } else {
382
+ occur++;
383
+ }
384
+ } else if (cur.getNodeType() == Node.COMMENT_NODE) {
385
+ sep = "/";
386
+ name = "comment()";
387
+ next = cur.getParentNode();
388
+
389
+ /*
390
+ * Thumbler index computation.
391
+ */
392
+
393
+ tmp = cur.getPreviousSibling();
394
+
395
+ while (tmp != null) {
396
+ if (tmp.getNodeType() == Node.COMMENT_NODE) {
397
+ occur++;
398
+ }
399
+ tmp = tmp.getPreviousSibling();
500
400
  }
501
- matcher.appendTail(sb);
502
- return sb;
503
- }
504
-
505
- public static CharSequence encodeJavaString(CharSequence str) {
506
- return convert(decoded_pattern, str, decoded, encoded);
507
- }
508
-
509
- public static CharSequence decodeJavaString(CharSequence str) {
510
- return convert(encoded_pattern, str, encoded, decoded);
511
- }
512
-
513
- public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
514
- public static boolean isNamespace(Node node) {
515
- return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
516
- }
517
-
518
- public static boolean isNamespace(String nodeName) {
519
- return (nodeName.startsWith("xmlns"));
520
- }
521
-
522
- public static boolean isNonDefaultNamespace(Node node) {
523
- return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
524
- }
525
-
526
- public static boolean isXmlBase(String attrName) {
527
- return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
528
- }
529
-
530
- public static boolean isBlank(IRubyObject obj) {
531
- if ( !(obj instanceof XmlText) ) return false;
532
-
533
- CharSequence content = ((XmlNode) obj).getContentImpl();
534
- return content == null || isBlank(content);
535
- }
536
-
537
- public static boolean isBlank(CharSequence str) {
538
- int len = str.length(); int beg = 0;
539
- while ((beg < len) && (str.charAt(beg) <= ' ')) beg++;
540
- return beg == len;
541
- }
542
-
543
- public static boolean isBlank(String str) {
544
- return str.isEmpty() || isBlank((CharSequence) str);
545
- }
546
-
547
- public static boolean isNullOrEmpty(String str) {
548
- return str == null || str.isEmpty();
549
- }
550
401
 
551
- public static CharSequence canonicalizeWhitespace(CharSequence str) {
552
- final int len = str.length();
553
- StringBuilder sb = new StringBuilder(len);
554
- boolean newline_added = false;
555
- for ( int i = 0; i < len; i++ ) {
556
- char c = str.charAt(i);
557
- if ( c == '\n' ) {
558
- if ( ! newline_added ) {
559
- sb.append(c); newline_added = true;
560
- }
561
- } else {
562
- sb.append(c);
402
+ if (occur == 0) {
403
+ tmp = cur.getNextSibling();
404
+ while (tmp != null && occur == 0) {
405
+ if (tmp.getNodeType() == Node.COMMENT_NODE) {
406
+ occur++;
563
407
  }
408
+ tmp = tmp.getNextSibling();
409
+ }
410
+ if (occur != 0) { occur = 1; }
411
+ } else {
412
+ occur = 1;
564
413
  }
565
- return sb;
566
- }
567
-
568
- public static String newQName(String newPrefix, Node node) {
569
- String tagName = getLocalPart(node.getNodeName());
570
- if (newPrefix == null) return tagName;
571
- return newPrefix + ':' + tagName;
572
- }
573
414
 
574
- public static IRubyObject[] nodeListToRubyArray(Ruby runtime, NodeList nodes) {
575
- IRubyObject[] array = new IRubyObject[nodes.getLength()];
576
- for (int i = 0; i < nodes.getLength(); i++) {
577
- array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
415
+ } else if (cur.getNodeType() == Node.TEXT_NODE ||
416
+ cur.getNodeType() == Node.CDATA_SECTION_NODE) {
417
+ // I'm here. gist:129
418
+ // http://gist.github.com/144923
419
+
420
+ sep = "/";
421
+ name = "text()";
422
+ next = cur.getParentNode();
423
+
424
+ /*
425
+ * Thumbler index computation.
426
+ */
427
+
428
+ tmp = cur.getPreviousSibling();
429
+ while (tmp != null) {
430
+ if (tmp.getNodeType() == Node.TEXT_NODE ||
431
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
432
+ occur++;
433
+ }
434
+ tmp = tmp.getPreviousSibling();
578
435
  }
579
- return array;
580
- }
581
436
 
582
- public static IRubyObject[] nodeListToArray(Ruby ruby, List<Node> nodes) {
583
- IRubyObject[] result = new IRubyObject[nodes.size()];
584
- for (int i = 0; i < result.length; i++) {
585
- result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
586
- }
587
- return result;
588
- }
437
+ if (occur == 0) {
438
+ tmp = cur.getNextSibling();
589
439
 
590
- public static RubyArray nodeArrayToRubyArray(Ruby ruby, Node[] nodes) {
591
- RubyArray n = RubyArray.newArray(ruby, nodes.length);
592
- for (int i = 0; i < nodes.length; i++) {
593
- n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
440
+ while (tmp != null && occur == 0) {
441
+ if (tmp.getNodeType() == Node.TEXT_NODE ||
442
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
443
+ occur++;
444
+ }
445
+ tmp = tmp.getNextSibling();
446
+ }
447
+ } else {
448
+ occur++;
594
449
  }
595
- return n;
596
- }
597
450
 
598
- public static String getValidEncodingOrNull(IRubyObject encoding) {
599
- if (encoding.isNil()) return null; // charsetNames does not like contains(null)
600
- String enc = rubyStringToString(encoding.convertToString());
601
- if (CharsetNames.contains(enc)) return enc;
602
- return null;
603
- }
451
+ } else if (cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
452
+ sep = "/";
453
+ name = "processing-instruction('" + cur.getLocalName() + "')";
454
+ next = cur.getParentNode();
604
455
 
605
- public static String getValidEncoding(IRubyObject encoding) {
606
- String validEncoding = getValidEncodingOrNull(encoding);
607
- if (validEncoding != null) return validEncoding;
608
- return Charset.defaultCharset().name();
609
- }
456
+ /*
457
+ * Thumbler index computation.
458
+ */
610
459
 
611
- private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
612
-
613
- public static String adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId) {
614
- if (systemId == null) return systemId;
615
- File file = new File(systemId);
616
- if (file.isAbsolute()) return systemId;
617
- String path = resolveSystemId(baseURI, systemId);
618
- if (path != null) return path;
619
- path = resolveSystemId(currentDir, systemId);
620
- if (path != null) return path;
621
- return resolveSystemId(scriptFileName, systemId);
622
- }
623
-
624
- private static String resolveSystemId(String baseName, String systemId) {
625
- if (baseName == null || baseName.length() < 1) return null;
626
- String parentName;
627
- baseName = baseName.replace("%20", " ");
628
- File base = new File(baseName);
629
- if (base.isDirectory()) parentName = baseName;
630
- else parentName = base.getParent();
631
- if (parentName == null) return null;
632
- if (parentName.toLowerCase().startsWith("file:")) parentName = parentName.substring("file:".length());
633
- File dtdFile = new File(parentName + "/" + systemId);
634
- if (dtdFile.exists()) return dtdFile.getPath();
635
- return null;
636
- }
460
+ tmp = cur.getParentNode();
637
461
 
638
- private static final Charset UTF8 = Charset.forName("UTF-8");
639
-
640
- public static boolean isUTF8(String encoding) {
641
- if (encoding == null) return true; // no need to convert encoding
462
+ while (tmp != null) {
463
+ if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
464
+ tmp.getLocalName().equals(cur.getLocalName())) {
465
+ occur++;
466
+ }
467
+ tmp = tmp.getPreviousSibling();
468
+ }
642
469
 
643
- if ("UTF-8".equals(encoding)) return true;
644
- return UTF8.aliases().contains(encoding);
645
- }
470
+ if (occur == 0) {
471
+ tmp = cur.getNextSibling();
646
472
 
647
- public static ByteBuffer convertEncoding(Charset output_charset, CharSequence input_string) {
648
- return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
649
- }
473
+ while (tmp != null && occur == 0) {
474
+ if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
475
+ tmp.getLocalName().equals(cur.getLocalName())) {
476
+ occur++;
477
+ }
478
+ tmp = tmp.getNextSibling();
479
+ }
650
480
 
651
- public static CharSequence convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str) {
652
- if (!(doc instanceof HtmlDocument)) return str;
653
- String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
654
- if (parsed_encoding == null) return str;
655
- String ruby_encoding = rubyStringToString(doc.getEncoding());
656
- if (ruby_encoding == null) return str;
657
- Charset encoding = Charset.forName(ruby_encoding);
658
- if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) return str;
659
- if (str.length() == 0) return str; // no need to convert
660
- return NokogiriHelpers.nkf(context, encoding, str);
661
- }
481
+ if (occur != 0) {
482
+ occur = 1;
483
+ }
662
484
 
663
- private static final ByteList _Sw = new ByteList(new byte[] { '-','S','w' }, false);
664
- private static final ByteList _Jw = new ByteList(new byte[] { '-','J','w' }, false);
665
- private static final ByteList _Ew = new ByteList(new byte[] { '-','E','w' }, false);
666
- private static final ByteList _Ww = new ByteList(new byte[] { '-','W','w' }, false);
667
-
668
- // This method is used from HTML documents. HTML meta tag with encoding specification
669
- // might appear after non-ascii characters are used. For example, a title tag before
670
- // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
671
- // Nokogiri uses NKF library to convert characters correct encoding. This means the method
672
- // works only for JIS/Shift_JIS/EUC-JP.
673
- private static CharSequence nkf(ThreadContext context, Charset encoding, CharSequence str) {
674
- final Ruby runtime = context.getRuntime();
675
- final ByteList opt;
676
- if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) opt = _Sw;
677
- else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) opt = _Jw;
678
- else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) opt = _Ew;
679
- else opt = _Ww; // should not come here. should be treated before this method.
680
-
681
- Class nkfClass;
682
- try {
683
- // JRuby 1.7 and later
684
- nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
685
- } catch (ClassNotFoundException e1) {
686
- return str;
687
- }
688
- Method nkf_method;
689
- try {
690
- nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
691
- RubyString r_str =
692
- (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
693
- return NokogiriHelpers.rubyStringToString(r_str);
694
- } catch (SecurityException e) {
695
- return str;
696
- } catch (NoSuchMethodException e) {
697
- return str;
698
- } catch (IllegalArgumentException e) {
699
- return str;
700
- } catch (IllegalAccessException e) {
701
- return str;
702
- } catch (InvocationTargetException e) {
703
- return str;
485
+ } else {
486
+ occur++;
704
487
  }
705
- }
706
-
707
- private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
708
- private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
709
- private static final Charset EUC_JP = Charset.forName("EUC-JP");
710
488
 
711
- public static boolean shouldEncode(Node text) {
712
- final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
713
- return encoded == null || ! encoded;
714
- }
489
+ } else if (cur.getNodeType() == Node.ATTRIBUTE_NODE) {
490
+ sep = "/@";
491
+ name = cur.getLocalName();
715
492
 
716
- public static boolean shouldDecode(Node text) {
717
- return !shouldEncode(text);
718
- }
719
-
720
- public static NokogiriNamespaceCache getNamespaceCache(Node node) {
721
- XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
722
- return xmlDoc.getNamespaceCache();
723
- }
493
+ if (cur.getNamespaceURI() != null) {
494
+ if (cur.getPrefix() != null) {
495
+ name = cur.getPrefix() + ":" + name;
496
+ }
497
+ }
724
498
 
725
- public static Node renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException {
726
- Document doc = node.getOwnerDocument();
727
- NokogiriNamespaceCache nsCache = getNamespaceCache(node);
728
- Node result = doc.renameNode(node, namespaceURI, qualifiedName);
729
- if (result != node) {
730
- nsCache.replaceNode(node, result);
499
+ next = ((Attr) cur).getOwnerElement();
500
+
501
+ } else {
502
+ next = cur.getParentNode();
503
+ }
504
+
505
+ if (occur == 0) {
506
+ buffer = sep + name + buffer;
507
+ } else {
508
+ buffer = sep + name + "[" + occur + "]" + buffer;
509
+ }
510
+
511
+ cur = next;
512
+
513
+ } while (cur != null);
514
+
515
+ return buffer;
516
+ }
517
+
518
+ static boolean
519
+ compareTwoNodes(Node m, Node n)
520
+ {
521
+ return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
522
+ nodesAreEqual(m.getPrefix(), n.getPrefix());
523
+ }
524
+
525
+ private static boolean
526
+ nodesAreEqual(Object a, Object b)
527
+ {
528
+ return (((a == null) && (b == null)) ||
529
+ ((a != null) && (b != null) && (b.equals(a))));
530
+ }
531
+
532
+ private static boolean
533
+ fullNamesMatch(Node a, Node b)
534
+ {
535
+ return a.getNodeName().equals(b.getNodeName());
536
+ }
537
+
538
+ private static final Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
539
+ private static final String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
540
+ private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
541
+ private static final String[] decoded = {"&", ">", "<", "\r"};
542
+
543
+ private static StringBuffer
544
+ convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars)
545
+ {
546
+ Matcher matcher = ptn.matcher(input);
547
+ boolean result = matcher.find();
548
+ StringBuffer sb = new StringBuffer(input.length() + 8);
549
+ while (result) {
550
+ String matched = matcher.group();
551
+ String replacement = "";
552
+ for (int i = 0; i < oldChars.length; i++) {
553
+ if (matched.contains(oldChars[i])) {
554
+ replacement = matched.replace(oldChars[i], newChars[i]);
555
+ break;
731
556
  }
732
- return result;
733
- }
557
+ }
558
+ matcher.appendReplacement(sb, replacement);
559
+ result = matcher.find();
560
+ }
561
+ matcher.appendTail(sb);
562
+ return sb;
563
+ }
564
+
565
+ public static CharSequence
566
+ encodeJavaString(CharSequence str)
567
+ {
568
+ return convert(decoded_pattern, str, decoded, encoded);
569
+ }
570
+
571
+ public static CharSequence
572
+ decodeJavaString(CharSequence str)
573
+ {
574
+ return convert(encoded_pattern, str, encoded, decoded);
575
+ }
576
+
577
+ public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
578
+ public static boolean
579
+ isNamespace(Node node)
580
+ {
581
+ return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
582
+ }
583
+
584
+ public static boolean
585
+ isNamespace(String nodeName)
586
+ {
587
+ return (nodeName.startsWith("xmlns"));
588
+ }
589
+
590
+ public static boolean
591
+ isNonDefaultNamespace(Node node)
592
+ {
593
+ return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
594
+ }
595
+
596
+ public static boolean
597
+ isXmlBase(String attrName)
598
+ {
599
+ return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
600
+ }
601
+
602
+ public static boolean
603
+ isBlank(IRubyObject obj)
604
+ {
605
+ if (!(obj instanceof XmlText)) { return false; }
606
+
607
+ CharSequence content = ((XmlNode) obj).getContentImpl();
608
+ return content == null || isBlank(content);
609
+ }
610
+
611
+ public static boolean
612
+ isBlank(CharSequence str)
613
+ {
614
+ int len = str.length();
615
+ int beg = 0;
616
+ while ((beg < len) && (str.charAt(beg) <= ' ')) { beg++; }
617
+ return beg == len;
618
+ }
619
+
620
+ public static boolean
621
+ isBlank(String str)
622
+ {
623
+ return str.isEmpty() || isBlank((CharSequence) str);
624
+ }
625
+
626
+ public static boolean
627
+ isNullOrEmpty(String str)
628
+ {
629
+ return str == null || str.isEmpty();
630
+ }
631
+
632
+ public static CharSequence
633
+ canonicalizeWhitespace(CharSequence str)
634
+ {
635
+ final int len = str.length();
636
+ StringBuilder sb = new StringBuilder(len);
637
+ boolean newline_added = false;
638
+ for (int i = 0; i < len; i++) {
639
+ char c = str.charAt(i);
640
+ if (c == '\n') {
641
+ if (! newline_added) {
642
+ sb.append(c);
643
+ newline_added = true;
644
+ }
645
+ } else {
646
+ sb.append(c);
647
+ }
648
+ }
649
+ return sb;
650
+ }
651
+
652
+ public static String
653
+ newQName(String newPrefix, Node node)
654
+ {
655
+ String tagName = getLocalPart(node.getNodeName());
656
+ if (newPrefix == null) { return tagName; }
657
+ return newPrefix + ':' + tagName;
658
+ }
659
+
660
+ public static IRubyObject[]
661
+ nodeListToRubyArray(Ruby runtime, NodeList nodes)
662
+ {
663
+ IRubyObject[] array = new IRubyObject[nodes.getLength()];
664
+ for (int i = 0; i < nodes.getLength(); i++) {
665
+ array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
666
+ }
667
+ return array;
668
+ }
669
+
670
+ public static IRubyObject[]
671
+ nodeListToArray(Ruby ruby, List<Node> nodes)
672
+ {
673
+ IRubyObject[] result = new IRubyObject[nodes.size()];
674
+ for (int i = 0; i < result.length; i++) {
675
+ result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
676
+ }
677
+ return result;
678
+ }
679
+
680
+ public static RubyArray
681
+ nodeArrayToRubyArray(Ruby ruby, Node[] nodes)
682
+ {
683
+ RubyArray n = RubyArray.newArray(ruby, nodes.length);
684
+ for (int i = 0; i < nodes.length; i++) {
685
+ n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
686
+ }
687
+ return n;
688
+ }
689
+
690
+ public static String
691
+ getValidEncodingOrNull(IRubyObject encoding)
692
+ {
693
+ if (encoding.isNil()) { return null; } // charsetNames does not like contains(null)
694
+ String enc = rubyStringToString(encoding.convertToString());
695
+ if (CharsetNames.contains(enc)) { return enc; }
696
+ return null;
697
+ }
698
+
699
+ public static String
700
+ getValidEncoding(IRubyObject encoding)
701
+ {
702
+ String validEncoding = getValidEncodingOrNull(encoding);
703
+ if (validEncoding != null) { return validEncoding; }
704
+ return Charset.defaultCharset().name();
705
+ }
706
+
707
+ private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
708
+
709
+ public static String
710
+ adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId)
711
+ {
712
+ if (systemId == null) { return systemId; }
713
+ File file = new File(systemId);
714
+ if (file.isAbsolute()) { return systemId; }
715
+ String path = resolveSystemId(baseURI, systemId);
716
+ if (path != null) { return path; }
717
+ path = resolveSystemId(currentDir, systemId);
718
+ if (path != null) { return path; }
719
+ return resolveSystemId(scriptFileName, systemId);
720
+ }
721
+
722
+ private static String
723
+ resolveSystemId(String baseName, String systemId)
724
+ {
725
+ if (baseName == null || baseName.length() < 1) { return null; }
726
+ String parentName;
727
+ baseName = baseName.replace("%20", " ");
728
+ File base = new File(baseName);
729
+ if (base.isDirectory()) { parentName = baseName; }
730
+ else { parentName = base.getParent(); }
731
+ if (parentName == null) { return null; }
732
+ if (parentName.toLowerCase().startsWith("file:")) { parentName = parentName.substring("file:".length()); }
733
+ File dtdFile = new File(parentName + "/" + systemId);
734
+ if (dtdFile.exists()) { return dtdFile.getPath(); }
735
+ return null;
736
+ }
737
+
738
+ private static final Charset UTF8 = Charset.forName("UTF-8");
739
+
740
+ public static boolean
741
+ isUTF8(String encoding)
742
+ {
743
+ if (encoding == null) { return true; } // no need to convert encoding
744
+
745
+ if ("UTF-8".equals(encoding)) { return true; }
746
+ return UTF8.aliases().contains(encoding);
747
+ }
748
+
749
+ public static ByteBuffer
750
+ convertEncoding(Charset output_charset, CharSequence input_string)
751
+ {
752
+ return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
753
+ }
754
+
755
+ public static CharSequence
756
+ convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
757
+ {
758
+ if (!(doc instanceof HtmlDocument)) { return str; }
759
+ String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
760
+ if (parsed_encoding == null) { return str; }
761
+ String ruby_encoding = rubyStringToString(doc.getEncoding());
762
+ if (ruby_encoding == null) { return str; }
763
+ Charset encoding = Charset.forName(ruby_encoding);
764
+ if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) { return str; }
765
+ if (str.length() == 0) { return str; } // no need to convert
766
+ return NokogiriHelpers.nkf(context, encoding, str);
767
+ }
768
+
769
+ private static final ByteList _Sw = new ByteList(new byte[] { '-', 'S', 'w' }, false);
770
+ private static final ByteList _Jw = new ByteList(new byte[] { '-', 'J', 'w' }, false);
771
+ private static final ByteList _Ew = new ByteList(new byte[] { '-', 'E', 'w' }, false);
772
+ private static final ByteList _Ww = new ByteList(new byte[] { '-', 'W', 'w' }, false);
773
+
774
+ // This method is used from HTML documents. HTML meta tag with encoding specification
775
+ // might appear after non-ascii characters are used. For example, a title tag before
776
+ // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
777
+ // Nokogiri uses NKF library to convert characters correct encoding. This means the method
778
+ // works only for JIS/Shift_JIS/EUC-JP.
779
+ private static CharSequence
780
+ nkf(ThreadContext context, Charset encoding, CharSequence str)
781
+ {
782
+ final Ruby runtime = context.getRuntime();
783
+ final ByteList opt;
784
+ if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) { opt = _Sw; }
785
+ else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) { opt = _Jw; }
786
+ else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) { opt = _Ew; }
787
+ else { opt = _Ww; } // should not come here. should be treated before this method.
788
+
789
+ Class nkfClass;
790
+ try {
791
+ // JRuby 1.7 and later
792
+ nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
793
+ } catch (ClassNotFoundException e1) {
794
+ return str;
795
+ }
796
+ Method nkf_method;
797
+ try {
798
+ nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
799
+ RubyString r_str =
800
+ (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
801
+ return NokogiriHelpers.rubyStringToString(r_str);
802
+ } catch (SecurityException e) {
803
+ return str;
804
+ } catch (NoSuchMethodException e) {
805
+ return str;
806
+ } catch (IllegalArgumentException e) {
807
+ return str;
808
+ } catch (IllegalAccessException e) {
809
+ return str;
810
+ } catch (InvocationTargetException e) {
811
+ return str;
812
+ }
813
+ }
814
+
815
+ private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
816
+ private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
817
+ private static final Charset EUC_JP = Charset.forName("EUC-JP");
818
+
819
+ public static boolean
820
+ shouldEncode(Node text)
821
+ {
822
+ final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
823
+ return encoded == null || ! encoded;
824
+ }
825
+
826
+ public static boolean
827
+ shouldDecode(Node text)
828
+ {
829
+ return !shouldEncode(text);
830
+ }
831
+
832
+ public static NokogiriNamespaceCache
833
+ getNamespaceCache(Node node)
834
+ {
835
+ XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
836
+ return xmlDoc.getNamespaceCache();
837
+ }
838
+
839
+ public static Node
840
+ renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException
841
+ {
842
+ Document doc = node.getOwnerDocument();
843
+ NokogiriNamespaceCache nsCache = getNamespaceCache(node);
844
+ Node result = doc.renameNode(node, namespaceURI, qualifiedName);
845
+ if (result != node) {
846
+ nsCache.replaceNode(node, result);
847
+ }
848
+ return result;
849
+ }
734
850
  }