nokogiri 1.11.0.rc1-java → 1.11.2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (188) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  7. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  8. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  12. data/ext/java/nokogiri/NokogiriService.java +597 -526
  13. data/ext/java/nokogiri/XmlAttr.java +120 -96
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  15. data/ext/java/nokogiri/XmlCdata.java +35 -26
  16. data/ext/java/nokogiri/XmlComment.java +48 -37
  17. data/ext/java/nokogiri/XmlDocument.java +642 -540
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  19. data/ext/java/nokogiri/XmlDtd.java +450 -384
  20. data/ext/java/nokogiri/XmlElement.java +25 -18
  21. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  22. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  23. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  24. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  25. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1590
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  29. data/ext/java/nokogiri/XmlReader.java +513 -418
  30. data/ext/java/nokogiri/XmlRelaxng.java +92 -72
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -280
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  33. data/ext/java/nokogiri/XmlSchema.java +335 -210
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -87
  35. data/ext/java/nokogiri/XmlText.java +57 -46
  36. data/ext/java/nokogiri/XmlXpathContext.java +242 -178
  37. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +203 -160
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -59
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +121 -48
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -22
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  93. data/ext/nokogiri/depend +37 -358
  94. data/ext/nokogiri/extconf.rb +585 -374
  95. data/ext/nokogiri/html_document.c +78 -82
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  99. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +192 -93
  102. data/ext/nokogiri/test_global_handlers.c +40 -0
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +225 -163
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +338 -286
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +195 -172
  120. data/ext/nokogiri/xml_relax_ng.c +52 -28
  121. data/ext/nokogiri/xml_sax_parser.c +118 -118
  122. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  123. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  124. data/ext/nokogiri/xml_schema.c +111 -34
  125. data/ext/nokogiri/xml_syntax_error.c +42 -21
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +206 -123
  128. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  129. data/lib/nokogiri.rb +4 -8
  130. data/lib/nokogiri/css/parser.rb +62 -62
  131. data/lib/nokogiri/css/parser.y +2 -2
  132. data/lib/nokogiri/css/parser_extras.rb +38 -36
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -148
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/builder.rb +2 -2
  142. data/lib/nokogiri/xml/document.rb +48 -18
  143. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  144. data/lib/nokogiri/xml/node.rb +599 -279
  145. data/lib/nokogiri/xml/parse_options.rb +6 -0
  146. data/lib/nokogiri/xml/reader.rb +2 -9
  147. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  148. data/lib/nokogiri/xml/schema.rb +12 -4
  149. data/lib/nokogiri/xml/searchable.rb +24 -16
  150. data/lib/nokogiri/xml/xpath.rb +1 -3
  151. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  152. metadata +87 -158
  153. data/ext/nokogiri/html_document.h +0 -10
  154. data/ext/nokogiri/html_element_description.h +0 -10
  155. data/ext/nokogiri/html_entity_lookup.h +0 -8
  156. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  157. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  158. data/ext/nokogiri/nokogiri.h +0 -122
  159. data/ext/nokogiri/xml_attr.h +0 -9
  160. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  161. data/ext/nokogiri/xml_cdata.h +0 -9
  162. data/ext/nokogiri/xml_comment.h +0 -9
  163. data/ext/nokogiri/xml_document.h +0 -23
  164. data/ext/nokogiri/xml_document_fragment.h +0 -10
  165. data/ext/nokogiri/xml_dtd.h +0 -10
  166. data/ext/nokogiri/xml_element_content.h +0 -10
  167. data/ext/nokogiri/xml_element_decl.h +0 -9
  168. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  169. data/ext/nokogiri/xml_entity_decl.h +0 -10
  170. data/ext/nokogiri/xml_entity_reference.h +0 -9
  171. data/ext/nokogiri/xml_io.c +0 -61
  172. data/ext/nokogiri/xml_io.h +0 -11
  173. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  174. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  175. data/ext/nokogiri/xml_namespace.h +0 -14
  176. data/ext/nokogiri/xml_node.h +0 -13
  177. data/ext/nokogiri/xml_node_set.h +0 -12
  178. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  179. data/ext/nokogiri/xml_reader.h +0 -10
  180. data/ext/nokogiri/xml_relax_ng.h +0 -9
  181. data/ext/nokogiri/xml_sax_parser.h +0 -39
  182. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  183. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_schema.h +0 -9
  185. data/ext/nokogiri/xml_syntax_error.h +0 -13
  186. data/ext/nokogiri/xml_text.h +0 -9
  187. data/ext/nokogiri/xml_xpath_context.h +0 -10
  188. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -74,661 +74,777 @@ import nokogiri.XmlXpathContext;
74
74
 
75
75
  /**
76
76
  * A class for various utility methods.
77
- *
77
+ *
78
78
  * @author serabe
79
79
  * @author Patrick Mahoney <pat@polycrystal.org>
80
80
  * @author Yoko Harada <yokolet@gmail.com>
81
81
  */
82
- public class NokogiriHelpers {
83
- public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
84
- public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
85
- public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
86
-
87
- public static XmlNode getCachedNode(Node node) {
88
- return (XmlNode) node.getUserData(CACHED_NODE);
89
- }
90
-
91
- public static void clearCachedNode(Node node) {
92
- node.setUserData(CACHED_NODE, null, null);
93
- }
94
-
95
- public static void clearXpathContext(Node node) {
96
- if (node == null) return;
97
-
98
- Node ownerDocument = node.getOwnerDocument();
99
- if (ownerDocument == null) {
100
- ownerDocument = node;
82
+ public class NokogiriHelpers
83
+ {
84
+ public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
85
+ public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
86
+ public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
87
+
88
+ public static XmlNode
89
+ getCachedNode(Node node)
90
+ {
91
+ return (XmlNode) node.getUserData(CACHED_NODE);
92
+ }
93
+
94
+ public static void
95
+ clearCachedNode(Node node)
96
+ {
97
+ node.setUserData(CACHED_NODE, null, null);
98
+ }
99
+
100
+ public static void
101
+ clearXpathContext(Node node)
102
+ {
103
+ if (node == null) { return; }
104
+
105
+ Node ownerDocument = node.getOwnerDocument();
106
+ if (ownerDocument == null) {
107
+ ownerDocument = node;
108
+ }
109
+ ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
110
+ }
111
+
112
+ /**
113
+ * Get the XmlNode associated with the underlying
114
+ * <code>node</code>. Creates a new XmlNode (or appropriate subclass)
115
+ * or XmlNamespace wrapping <code>node</code> if there is no cached
116
+ * value.
117
+ */
118
+ public static IRubyObject
119
+ getCachedNodeOrCreate(Ruby runtime, Node node)
120
+ {
121
+ if (node == null) { return runtime.getNil(); }
122
+ if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
123
+ XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
124
+ if (!(xmlDocument instanceof HtmlDocument)) {
125
+ String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
126
+ String href = ((Attr) node).getValue();
127
+ XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
128
+ if (xmlNamespace != null) { return xmlNamespace; }
129
+ return XmlNamespace.createFromAttr(runtime, (Attr) node);
130
+ }
131
+ }
132
+ XmlNode xmlNode = getCachedNode(node);
133
+ if (xmlNode == null) {
134
+ xmlNode = (XmlNode) constructNode(runtime, node);
135
+ node.setUserData(CACHED_NODE, xmlNode, null);
136
+ }
137
+ return xmlNode;
138
+ }
139
+
140
+ /**
141
+ * Construct a new XmlNode wrapping <code>node</code>. The proper
142
+ * subclass of XmlNode is chosen based on the type of
143
+ * <code>node</code>.
144
+ */
145
+ public static IRubyObject
146
+ constructNode(Ruby runtime, Node node)
147
+ {
148
+ if (node == null) { return runtime.getNil(); }
149
+ // this is slow; need a way to cache nokogiri classes/modules somewhere
150
+ switch (node.getNodeType()) {
151
+ case Node.ELEMENT_NODE:
152
+ XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
153
+ "Nokogiri::XML::Element"));
154
+ xmlElement.setNode(runtime, node);
155
+ return xmlElement;
156
+ case Node.ATTRIBUTE_NODE:
157
+ XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
158
+ "Nokogiri::XML::Attr"));
159
+ xmlAttr.setNode(runtime, node);
160
+ return xmlAttr;
161
+ case Node.TEXT_NODE:
162
+ XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
163
+ "Nokogiri::XML::Text"));
164
+ xmlText.setNode(runtime, node);
165
+ return xmlText;
166
+ case Node.COMMENT_NODE:
167
+ XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
168
+ "Nokogiri::XML::Comment"));
169
+ xmlComment.setNode(runtime, node);
170
+ return xmlComment;
171
+ case Node.ENTITY_NODE:
172
+ return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
173
+ case Node.ENTITY_REFERENCE_NODE:
174
+ XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime,
175
+ getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
176
+ xmlEntityRef.setNode(runtime, node);
177
+ return xmlEntityRef;
178
+ case Node.PROCESSING_INSTRUCTION_NODE:
179
+ XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction)
180
+ NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
181
+ "Nokogiri::XML::ProcessingInstruction"));
182
+ xmlProcessingInstruction.setNode(runtime, node);
183
+ return xmlProcessingInstruction;
184
+ case Node.CDATA_SECTION_NODE:
185
+ XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
186
+ "Nokogiri::XML::CDATA"));
187
+ xmlCdata.setNode(runtime, node);
188
+ return xmlCdata;
189
+ case Node.DOCUMENT_NODE:
190
+ XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime,
191
+ getNokogiriClass(runtime, "Nokogiri::XML::Document"));
192
+ xmlDocument.setDocumentNode(runtime, (Document) node);
193
+ return xmlDocument;
194
+ case Node.DOCUMENT_TYPE_NODE:
195
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
196
+ "Nokogiri::XML::DTD"));
197
+ xmlDtd.setNode(runtime, node);
198
+ return xmlDtd;
199
+ default:
200
+ XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
201
+ "Nokogiri::XML::Node"));
202
+ xmlNode.setNode(runtime, node);
203
+ return xmlNode;
204
+ }
205
+ }
206
+
207
+ public static RubyClass
208
+ getNokogiriClass(Ruby ruby, String name)
209
+ {
210
+ return NokogiriService.getNokogiriClassCache(ruby).get(name);
211
+ }
212
+
213
+ public static IRubyObject
214
+ stringOrNil(Ruby runtime, String str)
215
+ {
216
+ return str == null ? runtime.getNil() : convertString(runtime, str);
217
+ }
218
+
219
+ public static IRubyObject
220
+ stringOrNil(Ruby runtime, CharSequence str)
221
+ {
222
+ return str == null ? runtime.getNil() : convertString(runtime, str);
223
+ }
224
+
225
+ public static IRubyObject
226
+ stringOrNil(Ruby runtime, byte[] bytes)
227
+ {
228
+ return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
229
+ }
230
+
231
+ public static IRubyObject
232
+ stringOrBlank(Ruby runtime, String str)
233
+ {
234
+ return str == null ? runtime.newString() : convertString(runtime, str);
235
+ }
236
+
237
+ public static RubyString
238
+ convertString(Ruby runtime, String str)
239
+ {
240
+ return RubyString.newUTF8String(runtime, str);
241
+ }
242
+
243
+ public static RubyString
244
+ convertString(Ruby runtime, CharSequence str)
245
+ {
246
+ return RubyString.newUTF8String(runtime, str);
247
+ }
248
+
249
+ /**
250
+ * Convert <code>s</code> to a RubyString, or if s is null or
251
+ * empty return RubyNil.
252
+ */
253
+ public static IRubyObject
254
+ nonEmptyStringOrNil(Ruby runtime, String s)
255
+ {
256
+ if (s == null || s.length() == 0) { return runtime.getNil(); }
257
+ return RubyString.newString(runtime, s);
258
+ }
259
+
260
+ /**
261
+ * Return the prefix of a qualified name like "prefix:local".
262
+ * Returns null if there is no prefix.
263
+ */
264
+ public static String
265
+ getPrefix(String qName)
266
+ {
267
+ if (qName == null) { return null; }
268
+
269
+ final int pos = qName.indexOf(':');
270
+ return pos > 0 ? qName.substring(0, pos) : null;
271
+ }
272
+
273
+ /**
274
+ * Return the local part of a qualified name like "prefix:local".
275
+ * Returns <code>qName</code> if there is no prefix.
276
+ */
277
+ public static String
278
+ getLocalPart(String qName)
279
+ {
280
+ if (qName == null) { return null; }
281
+
282
+ final int pos = qName.indexOf(':');
283
+ return pos > 0 ? qName.substring(pos + 1) : qName;
284
+ }
285
+
286
+ public static String
287
+ getLocalNameForNamespace(String name, String defValue)
288
+ {
289
+ String localName = getLocalPart(name);
290
+ return ("xmlns".equals(localName)) ? defValue : localName;
291
+ }
292
+
293
+ public static String
294
+ rubyStringToString(IRubyObject str)
295
+ {
296
+ if (str.isNil()) { return null; }
297
+ return str.convertToString().decodeString();
298
+ }
299
+
300
+ public static String
301
+ rubyStringToString(RubyString str)
302
+ {
303
+ return str.decodeString(); // if encoding UTF-8 will decode UTF-8
304
+ }
305
+
306
+ public static ByteArrayInputStream
307
+ stringBytesToStream(final IRubyObject str)
308
+ {
309
+ if (str instanceof RubyString || str.respondsTo("to_str")) {
310
+ final ByteList bytes = str.convertToString().getByteList();
311
+ return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
312
+ }
313
+ return null;
314
+ }
315
+
316
+ public static String
317
+ getNodeCompletePath(Node node)
318
+ {
319
+
320
+ Node cur, tmp, next;
321
+
322
+ String buffer = "";
323
+
324
+ cur = node;
325
+
326
+ do {
327
+ String name = "";
328
+ String sep = "?";
329
+ int occur = 0;
330
+ boolean generic = false;
331
+
332
+ if (cur.getNodeType() == Node.DOCUMENT_NODE) {
333
+ if (buffer.startsWith("/")) { break; }
334
+
335
+ sep = "/";
336
+ next = null;
337
+ } else if (cur.getNodeType() == Node.ELEMENT_NODE) {
338
+ generic = false;
339
+ sep = "/";
340
+
341
+ name = cur.getLocalName();
342
+ if (name == null) { name = cur.getNodeName(); }
343
+ if (cur.getNamespaceURI() != null) {
344
+ if (cur.getPrefix() != null) {
345
+ name = cur.getPrefix() + ":" + name;
346
+ } else {
347
+ generic = true;
348
+ name = "*";
349
+ }
101
350
  }
102
- ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
103
- }
104
351
 
105
- /**
106
- * Get the XmlNode associated with the underlying
107
- * <code>node</code>. Creates a new XmlNode (or appropriate subclass)
108
- * or XmlNamespace wrapping <code>node</code> if there is no cached
109
- * value.
110
- */
111
- public static IRubyObject getCachedNodeOrCreate(Ruby runtime, Node node) {
112
- if (node == null) return runtime.getNil();
113
- if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
114
- XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
115
- if (!(xmlDocument instanceof HtmlDocument)) {
116
- String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
117
- String href = ((Attr) node).getValue();
118
- XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
119
- if (xmlNamespace != null) return xmlNamespace;
120
- return XmlNamespace.createFromAttr(runtime, (Attr) node);
121
- }
122
- }
123
- XmlNode xmlNode = getCachedNode(node);
124
- if (xmlNode == null) {
125
- xmlNode = (XmlNode) constructNode(runtime, node);
126
- node.setUserData(CACHED_NODE, xmlNode, null);
127
- }
128
- return xmlNode;
129
- }
352
+ next = cur.getParentNode();
130
353
 
131
- /**
132
- * Construct a new XmlNode wrapping <code>node</code>. The proper
133
- * subclass of XmlNode is chosen based on the type of
134
- * <code>node</code>.
135
- */
136
- public static IRubyObject constructNode(Ruby runtime, Node node) {
137
- if (node == null) return runtime.getNil();
138
- // this is slow; need a way to cache nokogiri classes/modules somewhere
139
- switch (node.getNodeType()) {
140
- case Node.ELEMENT_NODE:
141
- XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Element"));
142
- xmlElement.setNode(runtime, node);
143
- return xmlElement;
144
- case Node.ATTRIBUTE_NODE:
145
- XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Attr"));
146
- xmlAttr.setNode(runtime, node);
147
- return xmlAttr;
148
- case Node.TEXT_NODE:
149
- XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Text"));
150
- xmlText.setNode(runtime, node);
151
- return xmlText;
152
- case Node.COMMENT_NODE:
153
- XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Comment"));
154
- xmlComment.setNode(runtime, node);
155
- return xmlComment;
156
- case Node.ENTITY_NODE:
157
- return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
158
- case Node.ENTITY_REFERENCE_NODE:
159
- XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
160
- xmlEntityRef.setNode(runtime, node);
161
- return xmlEntityRef;
162
- case Node.PROCESSING_INSTRUCTION_NODE:
163
- XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ProcessingInstruction"));
164
- xmlProcessingInstruction.setNode(runtime, node);
165
- return xmlProcessingInstruction;
166
- case Node.CDATA_SECTION_NODE:
167
- XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::CDATA"));
168
- xmlCdata.setNode(runtime, node);
169
- return xmlCdata;
170
- case Node.DOCUMENT_NODE:
171
- XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"));
172
- xmlDocument.setDocumentNode(runtime, (Document) node);
173
- return xmlDocument;
174
- case Node.DOCUMENT_TYPE_NODE:
175
- XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
176
- xmlDtd.setNode(runtime, node);
177
- return xmlDtd;
178
- default:
179
- XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Node"));
180
- xmlNode.setNode(runtime, node);
181
- return xmlNode;
182
- }
183
- }
184
-
185
- public static RubyClass getNokogiriClass(Ruby ruby, String name) {
186
- return NokogiriService.getNokogiriClassCache(ruby).get(name);
187
- }
188
-
189
- public static IRubyObject stringOrNil(Ruby runtime, String str) {
190
- return str == null ? runtime.getNil() : convertString(runtime, str);
191
- }
192
-
193
- public static IRubyObject stringOrNil(Ruby runtime, CharSequence str) {
194
- return str == null ? runtime.getNil() : convertString(runtime, str);
195
- }
354
+ /*
355
+ * Thumbler index computation
356
+ */
196
357
 
197
- public static IRubyObject stringOrNil(Ruby runtime, byte[] bytes) {
198
- return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
199
- }
200
-
201
- public static IRubyObject stringOrBlank(Ruby runtime, String str) {
202
- return str == null ? runtime.newString() : convertString(runtime, str);
203
- }
204
-
205
- public static RubyString convertString(Ruby runtime, String str) {
206
- return RubyString.newUTF8String(runtime, str);
207
- }
208
-
209
- public static RubyString convertString(Ruby runtime, CharSequence str) {
210
- return RubyString.newUTF8String(runtime, str);
211
- }
358
+ tmp = cur.getPreviousSibling();
212
359
 
213
- /**
214
- * Convert <code>s</code> to a RubyString, or if s is null or
215
- * empty return RubyNil.
216
- */
217
- public static IRubyObject nonEmptyStringOrNil(Ruby runtime, String s) {
218
- if (s == null || s.length() == 0) return runtime.getNil();
219
- return RubyString.newString(runtime, s);
220
- }
221
-
222
- /**
223
- * Return the prefix of a qualified name like "prefix:local".
224
- * Returns null if there is no prefix.
225
- */
226
- public static String getPrefix(String qName) {
227
- if (qName == null) return null;
228
-
229
- final int pos = qName.indexOf(':');
230
- return pos > 0 ? qName.substring(0, pos) : null;
231
- }
232
-
233
- /**
234
- * Return the local part of a qualified name like "prefix:local".
235
- * Returns <code>qName</code> if there is no prefix.
236
- */
237
- public static String getLocalPart(String qName) {
238
- if (qName == null) return null;
239
-
240
- final int pos = qName.indexOf(':');
241
- return pos > 0 ? qName.substring(pos + 1) : qName;
242
- }
243
-
244
- public static String getLocalNameForNamespace(String name, String defValue) {
245
- String localName = getLocalPart(name);
246
- return ("xmlns".equals(localName)) ? defValue : localName;
247
- }
248
-
249
- public static String rubyStringToString(IRubyObject str) {
250
- if (str.isNil()) return null;
251
- return str.convertToString().decodeString();
252
- }
253
-
254
- public static String rubyStringToString(RubyString str) {
255
- return str.decodeString(); // if encoding UTF-8 will decode UTF-8
256
- }
257
-
258
- public static ByteArrayInputStream stringBytesToStream(final IRubyObject str) {
259
- if (str instanceof RubyString || str.respondsTo("to_str")) {
260
- final ByteList bytes = str.convertToString().getByteList();
261
- return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
360
+ while (tmp != null) {
361
+ if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
362
+ (generic || fullNamesMatch(tmp, cur))) {
363
+ occur++;
364
+ }
365
+ tmp = tmp.getPreviousSibling();
262
366
  }
263
- return null;
264
- }
265
367
 
266
- public static String getNodeCompletePath(Node node) {
267
-
268
- Node cur, tmp, next;
269
-
270
- String buffer = "";
271
-
272
- cur = node;
273
-
274
- do {
275
- String name = "";
276
- String sep = "?";
277
- int occur = 0;
278
- boolean generic = false;
279
-
280
- if(cur.getNodeType() == Node.DOCUMENT_NODE) {
281
- if(buffer.startsWith("/")) break;
282
-
283
- sep = "/";
284
- next = null;
285
- } else if(cur.getNodeType() == Node.ELEMENT_NODE) {
286
- generic = false;
287
- sep = "/";
288
-
289
- name = cur.getLocalName();
290
- if (name == null) name = cur.getNodeName();
291
- if(cur.getNamespaceURI() != null) {
292
- if(cur.getPrefix() != null) {
293
- name = cur.getPrefix() + ":" + name;
294
- } else {
295
- generic = true;
296
- name = "*";
297
- }
298
- }
299
-
300
- next = cur.getParentNode();
301
-
302
- /*
303
- * Thumbler index computation
304
- */
305
-
306
- tmp = cur.getPreviousSibling();
307
-
308
- while(tmp != null) {
309
- if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
310
- (generic || fullNamesMatch(tmp, cur))) {
311
- occur++;
312
- }
313
- tmp = tmp.getPreviousSibling();
314
- }
315
-
316
- if(occur == 0) {
317
- tmp = cur.getNextSibling();
318
-
319
- while(tmp != null && occur == 0) {
320
- if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
321
- (generic || fullNamesMatch(tmp,cur))) {
322
- occur++;
323
- }
324
- tmp = tmp.getNextSibling();
325
- }
326
-
327
- if(occur != 0) occur = 1;
328
-
329
- } else {
330
- occur++;
331
- }
332
- } else if(cur.getNodeType() == Node.COMMENT_NODE) {
333
- sep = "/";
334
- name = "comment()";
335
- next = cur.getParentNode();
336
-
337
- /*
338
- * Thumbler index computation.
339
- */
340
-
341
- tmp = cur.getPreviousSibling();
342
-
343
- while(tmp != null) {
344
- if(tmp.getNodeType() == Node.COMMENT_NODE) {
345
- occur++;
346
- }
347
- tmp = tmp.getPreviousSibling();
348
- }
349
-
350
- if(occur == 0) {
351
- tmp = cur.getNextSibling();
352
- while(tmp != null && occur == 0) {
353
- if(tmp.getNodeType() == Node.COMMENT_NODE) {
354
- occur++;
355
- }
356
- tmp = tmp.getNextSibling();
357
- }
358
- if(occur != 0) occur = 1;
359
- } else {
360
- occur = 1;
361
- }
362
-
363
- } else if(cur.getNodeType() == Node.TEXT_NODE ||
364
- cur.getNodeType() == Node.CDATA_SECTION_NODE) {
365
- // I'm here. gist:129
366
- // http://gist.github.com/144923
367
-
368
- sep = "/";
369
- name = "text()";
370
- next = cur.getParentNode();
371
-
372
- /*
373
- * Thumbler index computation.
374
- */
375
-
376
- tmp = cur.getPreviousSibling();
377
- while(tmp != null) {
378
- if(tmp.getNodeType() == Node.TEXT_NODE ||
379
- tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
380
- occur++;
381
- }
382
- tmp = tmp.getPreviousSibling();
383
- }
384
-
385
- if(occur == 0) {
386
- tmp = cur.getNextSibling();
387
-
388
- while(tmp != null && occur == 0) {
389
- if(tmp.getNodeType() == Node.TEXT_NODE ||
390
- tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
391
- occur++;
392
- }
393
- tmp = tmp.getNextSibling();
394
- }
395
- } else {
396
- occur++;
397
- }
398
-
399
- } else if(cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
400
- sep = "/";
401
- name = "processing-instruction('"+cur.getLocalName()+"')";
402
- next = cur.getParentNode();
403
-
404
- /*
405
- * Thumbler index computation.
406
- */
407
-
408
- tmp = cur.getParentNode();
409
-
410
- while(tmp != null) {
411
- if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
412
- tmp.getLocalName().equals(cur.getLocalName())) {
413
- occur++;
414
- }
415
- tmp = tmp.getPreviousSibling();
416
- }
417
-
418
- if(occur == 0) {
419
- tmp = cur.getNextSibling();
420
-
421
- while(tmp != null && occur == 0) {
422
- if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
423
- tmp.getLocalName().equals(cur.getLocalName())){
424
- occur++;
425
- }
426
- tmp = tmp.getNextSibling();
427
- }
428
-
429
- if(occur != 0) {
430
- occur = 1;
431
- }
432
-
433
- } else {
434
- occur++;
435
- }
436
-
437
- } else if(cur.getNodeType() == Node.ATTRIBUTE_NODE) {
438
- sep = "/@";
439
- name = cur.getLocalName();
440
-
441
- if(cur.getNamespaceURI() != null) {
442
- if(cur.getPrefix() != null) {
443
- name = cur.getPrefix() + ":" + name;
444
- }
445
- }
446
-
447
- next = ((Attr) cur).getOwnerElement();
448
-
449
- } else {
450
- next = cur.getParentNode();
451
- }
368
+ if (occur == 0) {
369
+ tmp = cur.getNextSibling();
452
370
 
453
- if(occur == 0){
454
- buffer = sep+name+buffer;
455
- } else {
456
- buffer = sep+name+"["+occur+"]"+buffer;
371
+ while (tmp != null && occur == 0) {
372
+ if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
373
+ (generic || fullNamesMatch(tmp, cur))) {
374
+ occur++;
457
375
  }
376
+ tmp = tmp.getNextSibling();
377
+ }
458
378
 
459
- cur = next;
460
-
461
- } while(cur != null);
379
+ if (occur != 0) { occur = 1; }
462
380
 
463
- return buffer;
464
- }
465
-
466
- static boolean compareTwoNodes(Node m, Node n) {
467
- return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
468
- nodesAreEqual(m.getPrefix(), n.getPrefix());
469
- }
470
-
471
- private static boolean nodesAreEqual(Object a, Object b) {
472
- return (((a == null) && (b == null)) ||
473
- ((a != null) && (b != null) && (b.equals(a))));
474
- }
475
-
476
- private static boolean fullNamesMatch(Node a, Node b) {
477
- return a.getNodeName().equals(b.getNodeName());
478
- }
479
-
480
- private static final Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
481
- private static final String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
482
- private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
483
- private static final String[] decoded = {"&", ">", "<", "\r"};
484
-
485
- private static StringBuffer convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars) {
486
- Matcher matcher = ptn.matcher(input);
487
- boolean result = matcher.find();
488
- StringBuffer sb = new StringBuffer(input.length() + 8);
489
- while (result) {
490
- String matched = matcher.group();
491
- String replacement = "";
492
- for (int i=0; i<oldChars.length; i++) {
493
- if (matched.contains(oldChars[i])) {
494
- replacement = matched.replace(oldChars[i], newChars[i]);
495
- break;
496
- }
497
- }
498
- matcher.appendReplacement(sb, replacement);
499
- result = matcher.find();
381
+ } else {
382
+ occur++;
383
+ }
384
+ } else if (cur.getNodeType() == Node.COMMENT_NODE) {
385
+ sep = "/";
386
+ name = "comment()";
387
+ next = cur.getParentNode();
388
+
389
+ /*
390
+ * Thumbler index computation.
391
+ */
392
+
393
+ tmp = cur.getPreviousSibling();
394
+
395
+ while (tmp != null) {
396
+ if (tmp.getNodeType() == Node.COMMENT_NODE) {
397
+ occur++;
398
+ }
399
+ tmp = tmp.getPreviousSibling();
500
400
  }
501
- matcher.appendTail(sb);
502
- return sb;
503
- }
504
-
505
- public static CharSequence encodeJavaString(CharSequence str) {
506
- return convert(decoded_pattern, str, decoded, encoded);
507
- }
508
-
509
- public static CharSequence decodeJavaString(CharSequence str) {
510
- return convert(encoded_pattern, str, encoded, decoded);
511
- }
512
-
513
- public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
514
- public static boolean isNamespace(Node node) {
515
- return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
516
- }
517
-
518
- public static boolean isNamespace(String nodeName) {
519
- return (nodeName.startsWith("xmlns"));
520
- }
521
-
522
- public static boolean isNonDefaultNamespace(Node node) {
523
- return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
524
- }
525
-
526
- public static boolean isXmlBase(String attrName) {
527
- return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
528
- }
529
-
530
- public static boolean isBlank(IRubyObject obj) {
531
- if ( !(obj instanceof XmlText) ) return false;
532
-
533
- CharSequence content = ((XmlNode) obj).getContentImpl();
534
- return content == null || isBlank(content);
535
- }
536
-
537
- public static boolean isBlank(CharSequence str) {
538
- int len = str.length(); int beg = 0;
539
- while ((beg < len) && (str.charAt(beg) <= ' ')) beg++;
540
- return beg == len;
541
- }
542
-
543
- public static boolean isBlank(String str) {
544
- return str.isEmpty() || isBlank((CharSequence) str);
545
- }
546
-
547
- public static boolean isNullOrEmpty(String str) {
548
- return str == null || str.isEmpty();
549
- }
550
401
 
551
- public static CharSequence canonicalizeWhitespace(CharSequence str) {
552
- final int len = str.length();
553
- StringBuilder sb = new StringBuilder(len);
554
- boolean newline_added = false;
555
- for ( int i = 0; i < len; i++ ) {
556
- char c = str.charAt(i);
557
- if ( c == '\n' ) {
558
- if ( ! newline_added ) {
559
- sb.append(c); newline_added = true;
560
- }
561
- } else {
562
- sb.append(c);
402
+ if (occur == 0) {
403
+ tmp = cur.getNextSibling();
404
+ while (tmp != null && occur == 0) {
405
+ if (tmp.getNodeType() == Node.COMMENT_NODE) {
406
+ occur++;
563
407
  }
408
+ tmp = tmp.getNextSibling();
409
+ }
410
+ if (occur != 0) { occur = 1; }
411
+ } else {
412
+ occur = 1;
564
413
  }
565
- return sb;
566
- }
567
-
568
- public static String newQName(String newPrefix, Node node) {
569
- String tagName = getLocalPart(node.getNodeName());
570
- if (newPrefix == null) return tagName;
571
- return newPrefix + ':' + tagName;
572
- }
573
414
 
574
- public static IRubyObject[] nodeListToRubyArray(Ruby runtime, NodeList nodes) {
575
- IRubyObject[] array = new IRubyObject[nodes.getLength()];
576
- for (int i = 0; i < nodes.getLength(); i++) {
577
- array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
415
+ } else if (cur.getNodeType() == Node.TEXT_NODE ||
416
+ cur.getNodeType() == Node.CDATA_SECTION_NODE) {
417
+ // I'm here. gist:129
418
+ // http://gist.github.com/144923
419
+
420
+ sep = "/";
421
+ name = "text()";
422
+ next = cur.getParentNode();
423
+
424
+ /*
425
+ * Thumbler index computation.
426
+ */
427
+
428
+ tmp = cur.getPreviousSibling();
429
+ while (tmp != null) {
430
+ if (tmp.getNodeType() == Node.TEXT_NODE ||
431
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
432
+ occur++;
433
+ }
434
+ tmp = tmp.getPreviousSibling();
578
435
  }
579
- return array;
580
- }
581
436
 
582
- public static IRubyObject[] nodeListToArray(Ruby ruby, List<Node> nodes) {
583
- IRubyObject[] result = new IRubyObject[nodes.size()];
584
- for (int i = 0; i < result.length; i++) {
585
- result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
586
- }
587
- return result;
588
- }
437
+ if (occur == 0) {
438
+ tmp = cur.getNextSibling();
589
439
 
590
- public static RubyArray nodeArrayToRubyArray(Ruby ruby, Node[] nodes) {
591
- RubyArray n = RubyArray.newArray(ruby, nodes.length);
592
- for (int i = 0; i < nodes.length; i++) {
593
- n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
440
+ while (tmp != null && occur == 0) {
441
+ if (tmp.getNodeType() == Node.TEXT_NODE ||
442
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
443
+ occur++;
444
+ }
445
+ tmp = tmp.getNextSibling();
446
+ }
447
+ } else {
448
+ occur++;
594
449
  }
595
- return n;
596
- }
597
450
 
598
- public static String getValidEncodingOrNull(IRubyObject encoding) {
599
- if (encoding.isNil()) return null; // charsetNames does not like contains(null)
600
- String enc = rubyStringToString(encoding.convertToString());
601
- if (CharsetNames.contains(enc)) return enc;
602
- return null;
603
- }
451
+ } else if (cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
452
+ sep = "/";
453
+ name = "processing-instruction('" + cur.getLocalName() + "')";
454
+ next = cur.getParentNode();
604
455
 
605
- public static String getValidEncoding(IRubyObject encoding) {
606
- String validEncoding = getValidEncodingOrNull(encoding);
607
- if (validEncoding != null) return validEncoding;
608
- return Charset.defaultCharset().name();
609
- }
456
+ /*
457
+ * Thumbler index computation.
458
+ */
610
459
 
611
- private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
612
-
613
- public static String adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId) {
614
- if (systemId == null) return systemId;
615
- File file = new File(systemId);
616
- if (file.isAbsolute()) return systemId;
617
- String path = resolveSystemId(baseURI, systemId);
618
- if (path != null) return path;
619
- path = resolveSystemId(currentDir, systemId);
620
- if (path != null) return path;
621
- return resolveSystemId(scriptFileName, systemId);
622
- }
623
-
624
- private static String resolveSystemId(String baseName, String systemId) {
625
- if (baseName == null || baseName.length() < 1) return null;
626
- String parentName;
627
- baseName = baseName.replace("%20", " ");
628
- File base = new File(baseName);
629
- if (base.isDirectory()) parentName = baseName;
630
- else parentName = base.getParent();
631
- if (parentName == null) return null;
632
- if (parentName.toLowerCase().startsWith("file:")) parentName = parentName.substring("file:".length());
633
- File dtdFile = new File(parentName + "/" + systemId);
634
- if (dtdFile.exists()) return dtdFile.getPath();
635
- return null;
636
- }
460
+ tmp = cur.getParentNode();
637
461
 
638
- private static final Charset UTF8 = Charset.forName("UTF-8");
639
-
640
- public static boolean isUTF8(String encoding) {
641
- if (encoding == null) return true; // no need to convert encoding
462
+ while (tmp != null) {
463
+ if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
464
+ tmp.getLocalName().equals(cur.getLocalName())) {
465
+ occur++;
466
+ }
467
+ tmp = tmp.getPreviousSibling();
468
+ }
642
469
 
643
- if ("UTF-8".equals(encoding)) return true;
644
- return UTF8.aliases().contains(encoding);
645
- }
470
+ if (occur == 0) {
471
+ tmp = cur.getNextSibling();
646
472
 
647
- public static ByteBuffer convertEncoding(Charset output_charset, CharSequence input_string) {
648
- return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
649
- }
473
+ while (tmp != null && occur == 0) {
474
+ if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
475
+ tmp.getLocalName().equals(cur.getLocalName())) {
476
+ occur++;
477
+ }
478
+ tmp = tmp.getNextSibling();
479
+ }
650
480
 
651
- public static CharSequence convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str) {
652
- if (!(doc instanceof HtmlDocument)) return str;
653
- String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
654
- if (parsed_encoding == null) return str;
655
- String ruby_encoding = rubyStringToString(doc.getEncoding());
656
- if (ruby_encoding == null) return str;
657
- Charset encoding = Charset.forName(ruby_encoding);
658
- if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) return str;
659
- if (str.length() == 0) return str; // no need to convert
660
- return NokogiriHelpers.nkf(context, encoding, str);
661
- }
481
+ if (occur != 0) {
482
+ occur = 1;
483
+ }
662
484
 
663
- private static final ByteList _Sw = new ByteList(new byte[] { '-','S','w' }, false);
664
- private static final ByteList _Jw = new ByteList(new byte[] { '-','J','w' }, false);
665
- private static final ByteList _Ew = new ByteList(new byte[] { '-','E','w' }, false);
666
- private static final ByteList _Ww = new ByteList(new byte[] { '-','W','w' }, false);
667
-
668
- // This method is used from HTML documents. HTML meta tag with encoding specification
669
- // might appear after non-ascii characters are used. For example, a title tag before
670
- // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
671
- // Nokogiri uses NKF library to convert characters correct encoding. This means the method
672
- // works only for JIS/Shift_JIS/EUC-JP.
673
- private static CharSequence nkf(ThreadContext context, Charset encoding, CharSequence str) {
674
- final Ruby runtime = context.getRuntime();
675
- final ByteList opt;
676
- if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) opt = _Sw;
677
- else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) opt = _Jw;
678
- else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) opt = _Ew;
679
- else opt = _Ww; // should not come here. should be treated before this method.
680
-
681
- Class nkfClass;
682
- try {
683
- // JRuby 1.7 and later
684
- nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
685
- } catch (ClassNotFoundException e1) {
686
- return str;
687
- }
688
- Method nkf_method;
689
- try {
690
- nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
691
- RubyString r_str =
692
- (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
693
- return NokogiriHelpers.rubyStringToString(r_str);
694
- } catch (SecurityException e) {
695
- return str;
696
- } catch (NoSuchMethodException e) {
697
- return str;
698
- } catch (IllegalArgumentException e) {
699
- return str;
700
- } catch (IllegalAccessException e) {
701
- return str;
702
- } catch (InvocationTargetException e) {
703
- return str;
485
+ } else {
486
+ occur++;
704
487
  }
705
- }
706
-
707
- private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
708
- private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
709
- private static final Charset EUC_JP = Charset.forName("EUC-JP");
710
488
 
711
- public static boolean shouldEncode(Node text) {
712
- final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
713
- return encoded == null || ! encoded;
714
- }
489
+ } else if (cur.getNodeType() == Node.ATTRIBUTE_NODE) {
490
+ sep = "/@";
491
+ name = cur.getLocalName();
715
492
 
716
- public static boolean shouldDecode(Node text) {
717
- return !shouldEncode(text);
718
- }
719
-
720
- public static NokogiriNamespaceCache getNamespaceCache(Node node) {
721
- XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
722
- return xmlDoc.getNamespaceCache();
723
- }
493
+ if (cur.getNamespaceURI() != null) {
494
+ if (cur.getPrefix() != null) {
495
+ name = cur.getPrefix() + ":" + name;
496
+ }
497
+ }
724
498
 
725
- public static Node renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException {
726
- Document doc = node.getOwnerDocument();
727
- NokogiriNamespaceCache nsCache = getNamespaceCache(node);
728
- Node result = doc.renameNode(node, namespaceURI, qualifiedName);
729
- if (result != node) {
730
- nsCache.replaceNode(node, result);
499
+ next = ((Attr) cur).getOwnerElement();
500
+
501
+ } else {
502
+ next = cur.getParentNode();
503
+ }
504
+
505
+ if (occur == 0) {
506
+ buffer = sep + name + buffer;
507
+ } else {
508
+ buffer = sep + name + "[" + occur + "]" + buffer;
509
+ }
510
+
511
+ cur = next;
512
+
513
+ } while (cur != null);
514
+
515
+ return buffer;
516
+ }
517
+
518
+ static boolean
519
+ compareTwoNodes(Node m, Node n)
520
+ {
521
+ return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
522
+ nodesAreEqual(m.getPrefix(), n.getPrefix());
523
+ }
524
+
525
+ private static boolean
526
+ nodesAreEqual(Object a, Object b)
527
+ {
528
+ return (((a == null) && (b == null)) ||
529
+ ((a != null) && (b != null) && (b.equals(a))));
530
+ }
531
+
532
+ private static boolean
533
+ fullNamesMatch(Node a, Node b)
534
+ {
535
+ return a.getNodeName().equals(b.getNodeName());
536
+ }
537
+
538
+ private static final Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
539
+ private static final String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
540
+ private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
541
+ private static final String[] decoded = {"&", ">", "<", "\r"};
542
+
543
+ private static StringBuffer
544
+ convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars)
545
+ {
546
+ Matcher matcher = ptn.matcher(input);
547
+ boolean result = matcher.find();
548
+ StringBuffer sb = new StringBuffer(input.length() + 8);
549
+ while (result) {
550
+ String matched = matcher.group();
551
+ String replacement = "";
552
+ for (int i = 0; i < oldChars.length; i++) {
553
+ if (matched.contains(oldChars[i])) {
554
+ replacement = matched.replace(oldChars[i], newChars[i]);
555
+ break;
731
556
  }
732
- return result;
733
- }
557
+ }
558
+ matcher.appendReplacement(sb, replacement);
559
+ result = matcher.find();
560
+ }
561
+ matcher.appendTail(sb);
562
+ return sb;
563
+ }
564
+
565
+ public static CharSequence
566
+ encodeJavaString(CharSequence str)
567
+ {
568
+ return convert(decoded_pattern, str, decoded, encoded);
569
+ }
570
+
571
+ public static CharSequence
572
+ decodeJavaString(CharSequence str)
573
+ {
574
+ return convert(encoded_pattern, str, encoded, decoded);
575
+ }
576
+
577
+ public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
578
+ public static boolean
579
+ isNamespace(Node node)
580
+ {
581
+ return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
582
+ }
583
+
584
+ public static boolean
585
+ isNamespace(String nodeName)
586
+ {
587
+ return (nodeName.startsWith("xmlns"));
588
+ }
589
+
590
+ public static boolean
591
+ isNonDefaultNamespace(Node node)
592
+ {
593
+ return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
594
+ }
595
+
596
+ public static boolean
597
+ isXmlBase(String attrName)
598
+ {
599
+ return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
600
+ }
601
+
602
+ public static boolean
603
+ isBlank(IRubyObject obj)
604
+ {
605
+ if (!(obj instanceof XmlText)) { return false; }
606
+
607
+ CharSequence content = ((XmlNode) obj).getContentImpl();
608
+ return content == null || isBlank(content);
609
+ }
610
+
611
+ public static boolean
612
+ isBlank(CharSequence str)
613
+ {
614
+ int len = str.length();
615
+ int beg = 0;
616
+ while ((beg < len) && (str.charAt(beg) <= ' ')) { beg++; }
617
+ return beg == len;
618
+ }
619
+
620
+ public static boolean
621
+ isBlank(String str)
622
+ {
623
+ return str.isEmpty() || isBlank((CharSequence) str);
624
+ }
625
+
626
+ public static boolean
627
+ isNullOrEmpty(String str)
628
+ {
629
+ return str == null || str.isEmpty();
630
+ }
631
+
632
+ public static CharSequence
633
+ canonicalizeWhitespace(CharSequence str)
634
+ {
635
+ final int len = str.length();
636
+ StringBuilder sb = new StringBuilder(len);
637
+ boolean newline_added = false;
638
+ for (int i = 0; i < len; i++) {
639
+ char c = str.charAt(i);
640
+ if (c == '\n') {
641
+ if (! newline_added) {
642
+ sb.append(c);
643
+ newline_added = true;
644
+ }
645
+ } else {
646
+ sb.append(c);
647
+ }
648
+ }
649
+ return sb;
650
+ }
651
+
652
+ public static String
653
+ newQName(String newPrefix, Node node)
654
+ {
655
+ String tagName = getLocalPart(node.getNodeName());
656
+ if (newPrefix == null) { return tagName; }
657
+ return newPrefix + ':' + tagName;
658
+ }
659
+
660
+ public static IRubyObject[]
661
+ nodeListToRubyArray(Ruby runtime, NodeList nodes)
662
+ {
663
+ IRubyObject[] array = new IRubyObject[nodes.getLength()];
664
+ for (int i = 0; i < nodes.getLength(); i++) {
665
+ array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
666
+ }
667
+ return array;
668
+ }
669
+
670
+ public static IRubyObject[]
671
+ nodeListToArray(Ruby ruby, List<Node> nodes)
672
+ {
673
+ IRubyObject[] result = new IRubyObject[nodes.size()];
674
+ for (int i = 0; i < result.length; i++) {
675
+ result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
676
+ }
677
+ return result;
678
+ }
679
+
680
+ public static RubyArray
681
+ nodeArrayToRubyArray(Ruby ruby, Node[] nodes)
682
+ {
683
+ RubyArray n = RubyArray.newArray(ruby, nodes.length);
684
+ for (int i = 0; i < nodes.length; i++) {
685
+ n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
686
+ }
687
+ return n;
688
+ }
689
+
690
+ public static String
691
+ getValidEncodingOrNull(IRubyObject encoding)
692
+ {
693
+ if (encoding.isNil()) { return null; } // charsetNames does not like contains(null)
694
+ String enc = rubyStringToString(encoding.convertToString());
695
+ if (CharsetNames.contains(enc)) { return enc; }
696
+ return null;
697
+ }
698
+
699
+ public static String
700
+ getValidEncoding(IRubyObject encoding)
701
+ {
702
+ String validEncoding = getValidEncodingOrNull(encoding);
703
+ if (validEncoding != null) { return validEncoding; }
704
+ return Charset.defaultCharset().name();
705
+ }
706
+
707
+ private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
708
+
709
+ public static String
710
+ adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId)
711
+ {
712
+ if (systemId == null) { return systemId; }
713
+ File file = new File(systemId);
714
+ if (file.isAbsolute()) { return systemId; }
715
+ String path = resolveSystemId(baseURI, systemId);
716
+ if (path != null) { return path; }
717
+ path = resolveSystemId(currentDir, systemId);
718
+ if (path != null) { return path; }
719
+ return resolveSystemId(scriptFileName, systemId);
720
+ }
721
+
722
+ private static String
723
+ resolveSystemId(String baseName, String systemId)
724
+ {
725
+ if (baseName == null || baseName.length() < 1) { return null; }
726
+ String parentName;
727
+ baseName = baseName.replace("%20", " ");
728
+ File base = new File(baseName);
729
+ if (base.isDirectory()) { parentName = baseName; }
730
+ else { parentName = base.getParent(); }
731
+ if (parentName == null) { return null; }
732
+ if (parentName.toLowerCase().startsWith("file:")) { parentName = parentName.substring("file:".length()); }
733
+ File dtdFile = new File(parentName + "/" + systemId);
734
+ if (dtdFile.exists()) { return dtdFile.getPath(); }
735
+ return null;
736
+ }
737
+
738
+ private static final Charset UTF8 = Charset.forName("UTF-8");
739
+
740
+ public static boolean
741
+ isUTF8(String encoding)
742
+ {
743
+ if (encoding == null) { return true; } // no need to convert encoding
744
+
745
+ if ("UTF-8".equals(encoding)) { return true; }
746
+ return UTF8.aliases().contains(encoding);
747
+ }
748
+
749
+ public static ByteBuffer
750
+ convertEncoding(Charset output_charset, CharSequence input_string)
751
+ {
752
+ return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
753
+ }
754
+
755
+ public static CharSequence
756
+ convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
757
+ {
758
+ if (!(doc instanceof HtmlDocument)) { return str; }
759
+ String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
760
+ if (parsed_encoding == null) { return str; }
761
+ String ruby_encoding = rubyStringToString(doc.getEncoding());
762
+ if (ruby_encoding == null) { return str; }
763
+ Charset encoding = Charset.forName(ruby_encoding);
764
+ if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) { return str; }
765
+ if (str.length() == 0) { return str; } // no need to convert
766
+ return NokogiriHelpers.nkf(context, encoding, str);
767
+ }
768
+
769
+ private static final ByteList _Sw = new ByteList(new byte[] { '-', 'S', 'w' }, false);
770
+ private static final ByteList _Jw = new ByteList(new byte[] { '-', 'J', 'w' }, false);
771
+ private static final ByteList _Ew = new ByteList(new byte[] { '-', 'E', 'w' }, false);
772
+ private static final ByteList _Ww = new ByteList(new byte[] { '-', 'W', 'w' }, false);
773
+
774
+ // This method is used from HTML documents. HTML meta tag with encoding specification
775
+ // might appear after non-ascii characters are used. For example, a title tag before
776
+ // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
777
+ // Nokogiri uses NKF library to convert characters correct encoding. This means the method
778
+ // works only for JIS/Shift_JIS/EUC-JP.
779
+ private static CharSequence
780
+ nkf(ThreadContext context, Charset encoding, CharSequence str)
781
+ {
782
+ final Ruby runtime = context.getRuntime();
783
+ final ByteList opt;
784
+ if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) { opt = _Sw; }
785
+ else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) { opt = _Jw; }
786
+ else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) { opt = _Ew; }
787
+ else { opt = _Ww; } // should not come here. should be treated before this method.
788
+
789
+ Class nkfClass;
790
+ try {
791
+ // JRuby 1.7 and later
792
+ nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
793
+ } catch (ClassNotFoundException e1) {
794
+ return str;
795
+ }
796
+ Method nkf_method;
797
+ try {
798
+ nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
799
+ RubyString r_str =
800
+ (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
801
+ return NokogiriHelpers.rubyStringToString(r_str);
802
+ } catch (SecurityException e) {
803
+ return str;
804
+ } catch (NoSuchMethodException e) {
805
+ return str;
806
+ } catch (IllegalArgumentException e) {
807
+ return str;
808
+ } catch (IllegalAccessException e) {
809
+ return str;
810
+ } catch (InvocationTargetException e) {
811
+ return str;
812
+ }
813
+ }
814
+
815
+ private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
816
+ private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
817
+ private static final Charset EUC_JP = Charset.forName("EUC-JP");
818
+
819
+ public static boolean
820
+ shouldEncode(Node text)
821
+ {
822
+ final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
823
+ return encoded == null || ! encoded;
824
+ }
825
+
826
+ public static boolean
827
+ shouldDecode(Node text)
828
+ {
829
+ return !shouldEncode(text);
830
+ }
831
+
832
+ public static NokogiriNamespaceCache
833
+ getNamespaceCache(Node node)
834
+ {
835
+ XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
836
+ return xmlDoc.getNamespaceCache();
837
+ }
838
+
839
+ public static Node
840
+ renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException
841
+ {
842
+ Document doc = node.getOwnerDocument();
843
+ NokogiriNamespaceCache nsCache = getNamespaceCache(node);
844
+ Node result = doc.renameNode(node, namespaceURI, qualifiedName);
845
+ if (result != node) {
846
+ nsCache.replaceNode(node, result);
847
+ }
848
+ return result;
849
+ }
734
850
  }