nokogiri 1.11.1-java → 1.11.2-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +12 -12
- data/LICENSE.md +1 -1
- data/README.md +20 -15
- data/ext/java/nokogiri/EncodingHandler.java +78 -59
- data/ext/java/nokogiri/HtmlDocument.java +137 -114
- data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
- data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
- data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
- data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
- data/ext/java/nokogiri/NokogiriService.java +597 -526
- data/ext/java/nokogiri/XmlAttr.java +120 -96
- data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
- data/ext/java/nokogiri/XmlCdata.java +35 -26
- data/ext/java/nokogiri/XmlComment.java +48 -37
- data/ext/java/nokogiri/XmlDocument.java +642 -540
- data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
- data/ext/java/nokogiri/XmlDtd.java +450 -384
- data/ext/java/nokogiri/XmlElement.java +25 -18
- data/ext/java/nokogiri/XmlElementContent.java +345 -286
- data/ext/java/nokogiri/XmlElementDecl.java +126 -95
- data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
- data/ext/java/nokogiri/XmlEntityReference.java +51 -42
- data/ext/java/nokogiri/XmlNamespace.java +177 -145
- data/ext/java/nokogiri/XmlNode.java +1843 -1588
- data/ext/java/nokogiri/XmlNodeSet.java +361 -299
- data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
- data/ext/java/nokogiri/XmlReader.java +513 -418
- data/ext/java/nokogiri/XmlRelaxng.java +91 -78
- data/ext/java/nokogiri/XmlSaxParserContext.java +330 -285
- data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
- data/ext/java/nokogiri/XmlSchema.java +328 -263
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -83
- data/ext/java/nokogiri/XmlText.java +57 -46
- data/ext/java/nokogiri/XmlXpathContext.java +240 -206
- data/ext/java/nokogiri/XsltStylesheet.java +282 -239
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +199 -168
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +83 -68
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +118 -101
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -24
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
- data/ext/java/nokogiri/internals/ParserContext.java +206 -179
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
- data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
- data/ext/nokogiri/depend +34 -474
- data/ext/nokogiri/extconf.rb +253 -183
- data/ext/nokogiri/html_document.c +10 -15
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +66 -65
- data/ext/nokogiri/html_sax_push_parser.c +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +171 -63
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +221 -164
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +338 -286
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +117 -112
- data/ext/nokogiri/xml_sax_parser_context.c +100 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +48 -42
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +134 -127
- data/ext/nokogiri/xslt_stylesheet.c +157 -157
- data/lib/nokogiri.rb +1 -22
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +31 -8
- data/lib/nokogiri/xml/document.rb +31 -11
- data/lib/nokogiri/xml/node.rb +38 -42
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +7 -8
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -74,661 +74,777 @@ import nokogiri.XmlXpathContext;
|
|
74
74
|
|
75
75
|
/**
|
76
76
|
* A class for various utility methods.
|
77
|
-
*
|
77
|
+
*
|
78
78
|
* @author serabe
|
79
79
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
80
80
|
* @author Yoko Harada <yokolet@gmail.com>
|
81
81
|
*/
|
82
|
-
public class NokogiriHelpers
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
82
|
+
public class NokogiriHelpers
|
83
|
+
{
|
84
|
+
public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
|
85
|
+
public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
|
86
|
+
public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
|
87
|
+
|
88
|
+
public static XmlNode
|
89
|
+
getCachedNode(Node node)
|
90
|
+
{
|
91
|
+
return (XmlNode) node.getUserData(CACHED_NODE);
|
92
|
+
}
|
93
|
+
|
94
|
+
public static void
|
95
|
+
clearCachedNode(Node node)
|
96
|
+
{
|
97
|
+
node.setUserData(CACHED_NODE, null, null);
|
98
|
+
}
|
99
|
+
|
100
|
+
public static void
|
101
|
+
clearXpathContext(Node node)
|
102
|
+
{
|
103
|
+
if (node == null) { return; }
|
104
|
+
|
105
|
+
Node ownerDocument = node.getOwnerDocument();
|
106
|
+
if (ownerDocument == null) {
|
107
|
+
ownerDocument = node;
|
108
|
+
}
|
109
|
+
ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
|
110
|
+
}
|
111
|
+
|
112
|
+
/**
|
113
|
+
* Get the XmlNode associated with the underlying
|
114
|
+
* <code>node</code>. Creates a new XmlNode (or appropriate subclass)
|
115
|
+
* or XmlNamespace wrapping <code>node</code> if there is no cached
|
116
|
+
* value.
|
117
|
+
*/
|
118
|
+
public static IRubyObject
|
119
|
+
getCachedNodeOrCreate(Ruby runtime, Node node)
|
120
|
+
{
|
121
|
+
if (node == null) { return runtime.getNil(); }
|
122
|
+
if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
|
123
|
+
XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
|
124
|
+
if (!(xmlDocument instanceof HtmlDocument)) {
|
125
|
+
String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
|
126
|
+
String href = ((Attr) node).getValue();
|
127
|
+
XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
|
128
|
+
if (xmlNamespace != null) { return xmlNamespace; }
|
129
|
+
return XmlNamespace.createFromAttr(runtime, (Attr) node);
|
130
|
+
}
|
131
|
+
}
|
132
|
+
XmlNode xmlNode = getCachedNode(node);
|
133
|
+
if (xmlNode == null) {
|
134
|
+
xmlNode = (XmlNode) constructNode(runtime, node);
|
135
|
+
node.setUserData(CACHED_NODE, xmlNode, null);
|
136
|
+
}
|
137
|
+
return xmlNode;
|
138
|
+
}
|
139
|
+
|
140
|
+
/**
|
141
|
+
* Construct a new XmlNode wrapping <code>node</code>. The proper
|
142
|
+
* subclass of XmlNode is chosen based on the type of
|
143
|
+
* <code>node</code>.
|
144
|
+
*/
|
145
|
+
public static IRubyObject
|
146
|
+
constructNode(Ruby runtime, Node node)
|
147
|
+
{
|
148
|
+
if (node == null) { return runtime.getNil(); }
|
149
|
+
// this is slow; need a way to cache nokogiri classes/modules somewhere
|
150
|
+
switch (node.getNodeType()) {
|
151
|
+
case Node.ELEMENT_NODE:
|
152
|
+
XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
153
|
+
"Nokogiri::XML::Element"));
|
154
|
+
xmlElement.setNode(runtime, node);
|
155
|
+
return xmlElement;
|
156
|
+
case Node.ATTRIBUTE_NODE:
|
157
|
+
XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
158
|
+
"Nokogiri::XML::Attr"));
|
159
|
+
xmlAttr.setNode(runtime, node);
|
160
|
+
return xmlAttr;
|
161
|
+
case Node.TEXT_NODE:
|
162
|
+
XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
163
|
+
"Nokogiri::XML::Text"));
|
164
|
+
xmlText.setNode(runtime, node);
|
165
|
+
return xmlText;
|
166
|
+
case Node.COMMENT_NODE:
|
167
|
+
XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
168
|
+
"Nokogiri::XML::Comment"));
|
169
|
+
xmlComment.setNode(runtime, node);
|
170
|
+
return xmlComment;
|
171
|
+
case Node.ENTITY_NODE:
|
172
|
+
return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
|
173
|
+
case Node.ENTITY_REFERENCE_NODE:
|
174
|
+
XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime,
|
175
|
+
getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
|
176
|
+
xmlEntityRef.setNode(runtime, node);
|
177
|
+
return xmlEntityRef;
|
178
|
+
case Node.PROCESSING_INSTRUCTION_NODE:
|
179
|
+
XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction)
|
180
|
+
NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
181
|
+
"Nokogiri::XML::ProcessingInstruction"));
|
182
|
+
xmlProcessingInstruction.setNode(runtime, node);
|
183
|
+
return xmlProcessingInstruction;
|
184
|
+
case Node.CDATA_SECTION_NODE:
|
185
|
+
XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
186
|
+
"Nokogiri::XML::CDATA"));
|
187
|
+
xmlCdata.setNode(runtime, node);
|
188
|
+
return xmlCdata;
|
189
|
+
case Node.DOCUMENT_NODE:
|
190
|
+
XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime,
|
191
|
+
getNokogiriClass(runtime, "Nokogiri::XML::Document"));
|
192
|
+
xmlDocument.setDocumentNode(runtime, (Document) node);
|
193
|
+
return xmlDocument;
|
194
|
+
case Node.DOCUMENT_TYPE_NODE:
|
195
|
+
XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
196
|
+
"Nokogiri::XML::DTD"));
|
197
|
+
xmlDtd.setNode(runtime, node);
|
198
|
+
return xmlDtd;
|
199
|
+
default:
|
200
|
+
XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
201
|
+
"Nokogiri::XML::Node"));
|
202
|
+
xmlNode.setNode(runtime, node);
|
203
|
+
return xmlNode;
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
public static RubyClass
|
208
|
+
getNokogiriClass(Ruby ruby, String name)
|
209
|
+
{
|
210
|
+
return NokogiriService.getNokogiriClassCache(ruby).get(name);
|
211
|
+
}
|
212
|
+
|
213
|
+
public static IRubyObject
|
214
|
+
stringOrNil(Ruby runtime, String str)
|
215
|
+
{
|
216
|
+
return str == null ? runtime.getNil() : convertString(runtime, str);
|
217
|
+
}
|
218
|
+
|
219
|
+
public static IRubyObject
|
220
|
+
stringOrNil(Ruby runtime, CharSequence str)
|
221
|
+
{
|
222
|
+
return str == null ? runtime.getNil() : convertString(runtime, str);
|
223
|
+
}
|
224
|
+
|
225
|
+
public static IRubyObject
|
226
|
+
stringOrNil(Ruby runtime, byte[] bytes)
|
227
|
+
{
|
228
|
+
return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
|
229
|
+
}
|
230
|
+
|
231
|
+
public static IRubyObject
|
232
|
+
stringOrBlank(Ruby runtime, String str)
|
233
|
+
{
|
234
|
+
return str == null ? runtime.newString() : convertString(runtime, str);
|
235
|
+
}
|
236
|
+
|
237
|
+
public static RubyString
|
238
|
+
convertString(Ruby runtime, String str)
|
239
|
+
{
|
240
|
+
return RubyString.newUTF8String(runtime, str);
|
241
|
+
}
|
242
|
+
|
243
|
+
public static RubyString
|
244
|
+
convertString(Ruby runtime, CharSequence str)
|
245
|
+
{
|
246
|
+
return RubyString.newUTF8String(runtime, str);
|
247
|
+
}
|
248
|
+
|
249
|
+
/**
|
250
|
+
* Convert <code>s</code> to a RubyString, or if s is null or
|
251
|
+
* empty return RubyNil.
|
252
|
+
*/
|
253
|
+
public static IRubyObject
|
254
|
+
nonEmptyStringOrNil(Ruby runtime, String s)
|
255
|
+
{
|
256
|
+
if (s == null || s.length() == 0) { return runtime.getNil(); }
|
257
|
+
return RubyString.newString(runtime, s);
|
258
|
+
}
|
259
|
+
|
260
|
+
/**
|
261
|
+
* Return the prefix of a qualified name like "prefix:local".
|
262
|
+
* Returns null if there is no prefix.
|
263
|
+
*/
|
264
|
+
public static String
|
265
|
+
getPrefix(String qName)
|
266
|
+
{
|
267
|
+
if (qName == null) { return null; }
|
268
|
+
|
269
|
+
final int pos = qName.indexOf(':');
|
270
|
+
return pos > 0 ? qName.substring(0, pos) : null;
|
271
|
+
}
|
272
|
+
|
273
|
+
/**
|
274
|
+
* Return the local part of a qualified name like "prefix:local".
|
275
|
+
* Returns <code>qName</code> if there is no prefix.
|
276
|
+
*/
|
277
|
+
public static String
|
278
|
+
getLocalPart(String qName)
|
279
|
+
{
|
280
|
+
if (qName == null) { return null; }
|
281
|
+
|
282
|
+
final int pos = qName.indexOf(':');
|
283
|
+
return pos > 0 ? qName.substring(pos + 1) : qName;
|
284
|
+
}
|
285
|
+
|
286
|
+
public static String
|
287
|
+
getLocalNameForNamespace(String name, String defValue)
|
288
|
+
{
|
289
|
+
String localName = getLocalPart(name);
|
290
|
+
return ("xmlns".equals(localName)) ? defValue : localName;
|
291
|
+
}
|
292
|
+
|
293
|
+
public static String
|
294
|
+
rubyStringToString(IRubyObject str)
|
295
|
+
{
|
296
|
+
if (str.isNil()) { return null; }
|
297
|
+
return str.convertToString().decodeString();
|
298
|
+
}
|
299
|
+
|
300
|
+
public static String
|
301
|
+
rubyStringToString(RubyString str)
|
302
|
+
{
|
303
|
+
return str.decodeString(); // if encoding UTF-8 will decode UTF-8
|
304
|
+
}
|
305
|
+
|
306
|
+
public static ByteArrayInputStream
|
307
|
+
stringBytesToStream(final IRubyObject str)
|
308
|
+
{
|
309
|
+
if (str instanceof RubyString || str.respondsTo("to_str")) {
|
310
|
+
final ByteList bytes = str.convertToString().getByteList();
|
311
|
+
return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
|
312
|
+
}
|
313
|
+
return null;
|
314
|
+
}
|
315
|
+
|
316
|
+
public static String
|
317
|
+
getNodeCompletePath(Node node)
|
318
|
+
{
|
319
|
+
|
320
|
+
Node cur, tmp, next;
|
321
|
+
|
322
|
+
String buffer = "";
|
323
|
+
|
324
|
+
cur = node;
|
325
|
+
|
326
|
+
do {
|
327
|
+
String name = "";
|
328
|
+
String sep = "?";
|
329
|
+
int occur = 0;
|
330
|
+
boolean generic = false;
|
331
|
+
|
332
|
+
if (cur.getNodeType() == Node.DOCUMENT_NODE) {
|
333
|
+
if (buffer.startsWith("/")) { break; }
|
334
|
+
|
335
|
+
sep = "/";
|
336
|
+
next = null;
|
337
|
+
} else if (cur.getNodeType() == Node.ELEMENT_NODE) {
|
338
|
+
generic = false;
|
339
|
+
sep = "/";
|
340
|
+
|
341
|
+
name = cur.getLocalName();
|
342
|
+
if (name == null) { name = cur.getNodeName(); }
|
343
|
+
if (cur.getNamespaceURI() != null) {
|
344
|
+
if (cur.getPrefix() != null) {
|
345
|
+
name = cur.getPrefix() + ":" + name;
|
346
|
+
} else {
|
347
|
+
generic = true;
|
348
|
+
name = "*";
|
349
|
+
}
|
101
350
|
}
|
102
|
-
ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
|
103
|
-
}
|
104
351
|
|
105
|
-
|
106
|
-
* Get the XmlNode associated with the underlying
|
107
|
-
* <code>node</code>. Creates a new XmlNode (or appropriate subclass)
|
108
|
-
* or XmlNamespace wrapping <code>node</code> if there is no cached
|
109
|
-
* value.
|
110
|
-
*/
|
111
|
-
public static IRubyObject getCachedNodeOrCreate(Ruby runtime, Node node) {
|
112
|
-
if (node == null) return runtime.getNil();
|
113
|
-
if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
|
114
|
-
XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
|
115
|
-
if (!(xmlDocument instanceof HtmlDocument)) {
|
116
|
-
String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
|
117
|
-
String href = ((Attr) node).getValue();
|
118
|
-
XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
|
119
|
-
if (xmlNamespace != null) return xmlNamespace;
|
120
|
-
return XmlNamespace.createFromAttr(runtime, (Attr) node);
|
121
|
-
}
|
122
|
-
}
|
123
|
-
XmlNode xmlNode = getCachedNode(node);
|
124
|
-
if (xmlNode == null) {
|
125
|
-
xmlNode = (XmlNode) constructNode(runtime, node);
|
126
|
-
node.setUserData(CACHED_NODE, xmlNode, null);
|
127
|
-
}
|
128
|
-
return xmlNode;
|
129
|
-
}
|
352
|
+
next = cur.getParentNode();
|
130
353
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
* <code>node</code>.
|
135
|
-
*/
|
136
|
-
public static IRubyObject constructNode(Ruby runtime, Node node) {
|
137
|
-
if (node == null) return runtime.getNil();
|
138
|
-
// this is slow; need a way to cache nokogiri classes/modules somewhere
|
139
|
-
switch (node.getNodeType()) {
|
140
|
-
case Node.ELEMENT_NODE:
|
141
|
-
XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Element"));
|
142
|
-
xmlElement.setNode(runtime, node);
|
143
|
-
return xmlElement;
|
144
|
-
case Node.ATTRIBUTE_NODE:
|
145
|
-
XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Attr"));
|
146
|
-
xmlAttr.setNode(runtime, node);
|
147
|
-
return xmlAttr;
|
148
|
-
case Node.TEXT_NODE:
|
149
|
-
XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Text"));
|
150
|
-
xmlText.setNode(runtime, node);
|
151
|
-
return xmlText;
|
152
|
-
case Node.COMMENT_NODE:
|
153
|
-
XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Comment"));
|
154
|
-
xmlComment.setNode(runtime, node);
|
155
|
-
return xmlComment;
|
156
|
-
case Node.ENTITY_NODE:
|
157
|
-
return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
|
158
|
-
case Node.ENTITY_REFERENCE_NODE:
|
159
|
-
XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
|
160
|
-
xmlEntityRef.setNode(runtime, node);
|
161
|
-
return xmlEntityRef;
|
162
|
-
case Node.PROCESSING_INSTRUCTION_NODE:
|
163
|
-
XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ProcessingInstruction"));
|
164
|
-
xmlProcessingInstruction.setNode(runtime, node);
|
165
|
-
return xmlProcessingInstruction;
|
166
|
-
case Node.CDATA_SECTION_NODE:
|
167
|
-
XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::CDATA"));
|
168
|
-
xmlCdata.setNode(runtime, node);
|
169
|
-
return xmlCdata;
|
170
|
-
case Node.DOCUMENT_NODE:
|
171
|
-
XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"));
|
172
|
-
xmlDocument.setDocumentNode(runtime, (Document) node);
|
173
|
-
return xmlDocument;
|
174
|
-
case Node.DOCUMENT_TYPE_NODE:
|
175
|
-
XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
|
176
|
-
xmlDtd.setNode(runtime, node);
|
177
|
-
return xmlDtd;
|
178
|
-
default:
|
179
|
-
XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Node"));
|
180
|
-
xmlNode.setNode(runtime, node);
|
181
|
-
return xmlNode;
|
182
|
-
}
|
183
|
-
}
|
184
|
-
|
185
|
-
public static RubyClass getNokogiriClass(Ruby ruby, String name) {
|
186
|
-
return NokogiriService.getNokogiriClassCache(ruby).get(name);
|
187
|
-
}
|
188
|
-
|
189
|
-
public static IRubyObject stringOrNil(Ruby runtime, String str) {
|
190
|
-
return str == null ? runtime.getNil() : convertString(runtime, str);
|
191
|
-
}
|
192
|
-
|
193
|
-
public static IRubyObject stringOrNil(Ruby runtime, CharSequence str) {
|
194
|
-
return str == null ? runtime.getNil() : convertString(runtime, str);
|
195
|
-
}
|
354
|
+
/*
|
355
|
+
* Thumbler index computation
|
356
|
+
*/
|
196
357
|
|
197
|
-
|
198
|
-
return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
|
199
|
-
}
|
200
|
-
|
201
|
-
public static IRubyObject stringOrBlank(Ruby runtime, String str) {
|
202
|
-
return str == null ? runtime.newString() : convertString(runtime, str);
|
203
|
-
}
|
204
|
-
|
205
|
-
public static RubyString convertString(Ruby runtime, String str) {
|
206
|
-
return RubyString.newUTF8String(runtime, str);
|
207
|
-
}
|
208
|
-
|
209
|
-
public static RubyString convertString(Ruby runtime, CharSequence str) {
|
210
|
-
return RubyString.newUTF8String(runtime, str);
|
211
|
-
}
|
358
|
+
tmp = cur.getPreviousSibling();
|
212
359
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
return RubyString.newString(runtime, s);
|
220
|
-
}
|
221
|
-
|
222
|
-
/**
|
223
|
-
* Return the prefix of a qualified name like "prefix:local".
|
224
|
-
* Returns null if there is no prefix.
|
225
|
-
*/
|
226
|
-
public static String getPrefix(String qName) {
|
227
|
-
if (qName == null) return null;
|
228
|
-
|
229
|
-
final int pos = qName.indexOf(':');
|
230
|
-
return pos > 0 ? qName.substring(0, pos) : null;
|
231
|
-
}
|
232
|
-
|
233
|
-
/**
|
234
|
-
* Return the local part of a qualified name like "prefix:local".
|
235
|
-
* Returns <code>qName</code> if there is no prefix.
|
236
|
-
*/
|
237
|
-
public static String getLocalPart(String qName) {
|
238
|
-
if (qName == null) return null;
|
239
|
-
|
240
|
-
final int pos = qName.indexOf(':');
|
241
|
-
return pos > 0 ? qName.substring(pos + 1) : qName;
|
242
|
-
}
|
243
|
-
|
244
|
-
public static String getLocalNameForNamespace(String name, String defValue) {
|
245
|
-
String localName = getLocalPart(name);
|
246
|
-
return ("xmlns".equals(localName)) ? defValue : localName;
|
247
|
-
}
|
248
|
-
|
249
|
-
public static String rubyStringToString(IRubyObject str) {
|
250
|
-
if (str.isNil()) return null;
|
251
|
-
return str.convertToString().decodeString();
|
252
|
-
}
|
253
|
-
|
254
|
-
public static String rubyStringToString(RubyString str) {
|
255
|
-
return str.decodeString(); // if encoding UTF-8 will decode UTF-8
|
256
|
-
}
|
257
|
-
|
258
|
-
public static ByteArrayInputStream stringBytesToStream(final IRubyObject str) {
|
259
|
-
if (str instanceof RubyString || str.respondsTo("to_str")) {
|
260
|
-
final ByteList bytes = str.convertToString().getByteList();
|
261
|
-
return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
|
360
|
+
while (tmp != null) {
|
361
|
+
if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
|
362
|
+
(generic || fullNamesMatch(tmp, cur))) {
|
363
|
+
occur++;
|
364
|
+
}
|
365
|
+
tmp = tmp.getPreviousSibling();
|
262
366
|
}
|
263
|
-
return null;
|
264
|
-
}
|
265
367
|
|
266
|
-
|
267
|
-
|
268
|
-
Node cur, tmp, next;
|
269
|
-
|
270
|
-
String buffer = "";
|
271
|
-
|
272
|
-
cur = node;
|
273
|
-
|
274
|
-
do {
|
275
|
-
String name = "";
|
276
|
-
String sep = "?";
|
277
|
-
int occur = 0;
|
278
|
-
boolean generic = false;
|
279
|
-
|
280
|
-
if(cur.getNodeType() == Node.DOCUMENT_NODE) {
|
281
|
-
if(buffer.startsWith("/")) break;
|
282
|
-
|
283
|
-
sep = "/";
|
284
|
-
next = null;
|
285
|
-
} else if(cur.getNodeType() == Node.ELEMENT_NODE) {
|
286
|
-
generic = false;
|
287
|
-
sep = "/";
|
288
|
-
|
289
|
-
name = cur.getLocalName();
|
290
|
-
if (name == null) name = cur.getNodeName();
|
291
|
-
if(cur.getNamespaceURI() != null) {
|
292
|
-
if(cur.getPrefix() != null) {
|
293
|
-
name = cur.getPrefix() + ":" + name;
|
294
|
-
} else {
|
295
|
-
generic = true;
|
296
|
-
name = "*";
|
297
|
-
}
|
298
|
-
}
|
299
|
-
|
300
|
-
next = cur.getParentNode();
|
301
|
-
|
302
|
-
/*
|
303
|
-
* Thumbler index computation
|
304
|
-
*/
|
305
|
-
|
306
|
-
tmp = cur.getPreviousSibling();
|
307
|
-
|
308
|
-
while(tmp != null) {
|
309
|
-
if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
|
310
|
-
(generic || fullNamesMatch(tmp, cur))) {
|
311
|
-
occur++;
|
312
|
-
}
|
313
|
-
tmp = tmp.getPreviousSibling();
|
314
|
-
}
|
315
|
-
|
316
|
-
if(occur == 0) {
|
317
|
-
tmp = cur.getNextSibling();
|
318
|
-
|
319
|
-
while(tmp != null && occur == 0) {
|
320
|
-
if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
|
321
|
-
(generic || fullNamesMatch(tmp,cur))) {
|
322
|
-
occur++;
|
323
|
-
}
|
324
|
-
tmp = tmp.getNextSibling();
|
325
|
-
}
|
326
|
-
|
327
|
-
if(occur != 0) occur = 1;
|
328
|
-
|
329
|
-
} else {
|
330
|
-
occur++;
|
331
|
-
}
|
332
|
-
} else if(cur.getNodeType() == Node.COMMENT_NODE) {
|
333
|
-
sep = "/";
|
334
|
-
name = "comment()";
|
335
|
-
next = cur.getParentNode();
|
336
|
-
|
337
|
-
/*
|
338
|
-
* Thumbler index computation.
|
339
|
-
*/
|
340
|
-
|
341
|
-
tmp = cur.getPreviousSibling();
|
342
|
-
|
343
|
-
while(tmp != null) {
|
344
|
-
if(tmp.getNodeType() == Node.COMMENT_NODE) {
|
345
|
-
occur++;
|
346
|
-
}
|
347
|
-
tmp = tmp.getPreviousSibling();
|
348
|
-
}
|
349
|
-
|
350
|
-
if(occur == 0) {
|
351
|
-
tmp = cur.getNextSibling();
|
352
|
-
while(tmp != null && occur == 0) {
|
353
|
-
if(tmp.getNodeType() == Node.COMMENT_NODE) {
|
354
|
-
occur++;
|
355
|
-
}
|
356
|
-
tmp = tmp.getNextSibling();
|
357
|
-
}
|
358
|
-
if(occur != 0) occur = 1;
|
359
|
-
} else {
|
360
|
-
occur = 1;
|
361
|
-
}
|
362
|
-
|
363
|
-
} else if(cur.getNodeType() == Node.TEXT_NODE ||
|
364
|
-
cur.getNodeType() == Node.CDATA_SECTION_NODE) {
|
365
|
-
// I'm here. gist:129
|
366
|
-
// http://gist.github.com/144923
|
367
|
-
|
368
|
-
sep = "/";
|
369
|
-
name = "text()";
|
370
|
-
next = cur.getParentNode();
|
371
|
-
|
372
|
-
/*
|
373
|
-
* Thumbler index computation.
|
374
|
-
*/
|
375
|
-
|
376
|
-
tmp = cur.getPreviousSibling();
|
377
|
-
while(tmp != null) {
|
378
|
-
if(tmp.getNodeType() == Node.TEXT_NODE ||
|
379
|
-
tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
|
380
|
-
occur++;
|
381
|
-
}
|
382
|
-
tmp = tmp.getPreviousSibling();
|
383
|
-
}
|
384
|
-
|
385
|
-
if(occur == 0) {
|
386
|
-
tmp = cur.getNextSibling();
|
387
|
-
|
388
|
-
while(tmp != null && occur == 0) {
|
389
|
-
if(tmp.getNodeType() == Node.TEXT_NODE ||
|
390
|
-
tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
|
391
|
-
occur++;
|
392
|
-
}
|
393
|
-
tmp = tmp.getNextSibling();
|
394
|
-
}
|
395
|
-
} else {
|
396
|
-
occur++;
|
397
|
-
}
|
398
|
-
|
399
|
-
} else if(cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
|
400
|
-
sep = "/";
|
401
|
-
name = "processing-instruction('"+cur.getLocalName()+"')";
|
402
|
-
next = cur.getParentNode();
|
403
|
-
|
404
|
-
/*
|
405
|
-
* Thumbler index computation.
|
406
|
-
*/
|
407
|
-
|
408
|
-
tmp = cur.getParentNode();
|
409
|
-
|
410
|
-
while(tmp != null) {
|
411
|
-
if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
|
412
|
-
tmp.getLocalName().equals(cur.getLocalName())) {
|
413
|
-
occur++;
|
414
|
-
}
|
415
|
-
tmp = tmp.getPreviousSibling();
|
416
|
-
}
|
417
|
-
|
418
|
-
if(occur == 0) {
|
419
|
-
tmp = cur.getNextSibling();
|
420
|
-
|
421
|
-
while(tmp != null && occur == 0) {
|
422
|
-
if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
|
423
|
-
tmp.getLocalName().equals(cur.getLocalName())){
|
424
|
-
occur++;
|
425
|
-
}
|
426
|
-
tmp = tmp.getNextSibling();
|
427
|
-
}
|
428
|
-
|
429
|
-
if(occur != 0) {
|
430
|
-
occur = 1;
|
431
|
-
}
|
432
|
-
|
433
|
-
} else {
|
434
|
-
occur++;
|
435
|
-
}
|
436
|
-
|
437
|
-
} else if(cur.getNodeType() == Node.ATTRIBUTE_NODE) {
|
438
|
-
sep = "/@";
|
439
|
-
name = cur.getLocalName();
|
440
|
-
|
441
|
-
if(cur.getNamespaceURI() != null) {
|
442
|
-
if(cur.getPrefix() != null) {
|
443
|
-
name = cur.getPrefix() + ":" + name;
|
444
|
-
}
|
445
|
-
}
|
446
|
-
|
447
|
-
next = ((Attr) cur).getOwnerElement();
|
448
|
-
|
449
|
-
} else {
|
450
|
-
next = cur.getParentNode();
|
451
|
-
}
|
368
|
+
if (occur == 0) {
|
369
|
+
tmp = cur.getNextSibling();
|
452
370
|
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
371
|
+
while (tmp != null && occur == 0) {
|
372
|
+
if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
|
373
|
+
(generic || fullNamesMatch(tmp, cur))) {
|
374
|
+
occur++;
|
457
375
|
}
|
376
|
+
tmp = tmp.getNextSibling();
|
377
|
+
}
|
458
378
|
|
459
|
-
|
460
|
-
|
461
|
-
} while(cur != null);
|
379
|
+
if (occur != 0) { occur = 1; }
|
462
380
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
|
483
|
-
private static final String[] decoded = {"&", ">", "<", "\r"};
|
484
|
-
|
485
|
-
private static StringBuffer convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars) {
|
486
|
-
Matcher matcher = ptn.matcher(input);
|
487
|
-
boolean result = matcher.find();
|
488
|
-
StringBuffer sb = new StringBuffer(input.length() + 8);
|
489
|
-
while (result) {
|
490
|
-
String matched = matcher.group();
|
491
|
-
String replacement = "";
|
492
|
-
for (int i=0; i<oldChars.length; i++) {
|
493
|
-
if (matched.contains(oldChars[i])) {
|
494
|
-
replacement = matched.replace(oldChars[i], newChars[i]);
|
495
|
-
break;
|
496
|
-
}
|
497
|
-
}
|
498
|
-
matcher.appendReplacement(sb, replacement);
|
499
|
-
result = matcher.find();
|
381
|
+
} else {
|
382
|
+
occur++;
|
383
|
+
}
|
384
|
+
} else if (cur.getNodeType() == Node.COMMENT_NODE) {
|
385
|
+
sep = "/";
|
386
|
+
name = "comment()";
|
387
|
+
next = cur.getParentNode();
|
388
|
+
|
389
|
+
/*
|
390
|
+
* Thumbler index computation.
|
391
|
+
*/
|
392
|
+
|
393
|
+
tmp = cur.getPreviousSibling();
|
394
|
+
|
395
|
+
while (tmp != null) {
|
396
|
+
if (tmp.getNodeType() == Node.COMMENT_NODE) {
|
397
|
+
occur++;
|
398
|
+
}
|
399
|
+
tmp = tmp.getPreviousSibling();
|
500
400
|
}
|
501
|
-
matcher.appendTail(sb);
|
502
|
-
return sb;
|
503
|
-
}
|
504
|
-
|
505
|
-
public static CharSequence encodeJavaString(CharSequence str) {
|
506
|
-
return convert(decoded_pattern, str, decoded, encoded);
|
507
|
-
}
|
508
|
-
|
509
|
-
public static CharSequence decodeJavaString(CharSequence str) {
|
510
|
-
return convert(encoded_pattern, str, encoded, decoded);
|
511
|
-
}
|
512
|
-
|
513
|
-
public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
|
514
|
-
public static boolean isNamespace(Node node) {
|
515
|
-
return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
|
516
|
-
}
|
517
|
-
|
518
|
-
public static boolean isNamespace(String nodeName) {
|
519
|
-
return (nodeName.startsWith("xmlns"));
|
520
|
-
}
|
521
|
-
|
522
|
-
public static boolean isNonDefaultNamespace(Node node) {
|
523
|
-
return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
|
524
|
-
}
|
525
|
-
|
526
|
-
public static boolean isXmlBase(String attrName) {
|
527
|
-
return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
|
528
|
-
}
|
529
|
-
|
530
|
-
public static boolean isBlank(IRubyObject obj) {
|
531
|
-
if ( !(obj instanceof XmlText) ) return false;
|
532
|
-
|
533
|
-
CharSequence content = ((XmlNode) obj).getContentImpl();
|
534
|
-
return content == null || isBlank(content);
|
535
|
-
}
|
536
|
-
|
537
|
-
public static boolean isBlank(CharSequence str) {
|
538
|
-
int len = str.length(); int beg = 0;
|
539
|
-
while ((beg < len) && (str.charAt(beg) <= ' ')) beg++;
|
540
|
-
return beg == len;
|
541
|
-
}
|
542
|
-
|
543
|
-
public static boolean isBlank(String str) {
|
544
|
-
return str.isEmpty() || isBlank((CharSequence) str);
|
545
|
-
}
|
546
|
-
|
547
|
-
public static boolean isNullOrEmpty(String str) {
|
548
|
-
return str == null || str.isEmpty();
|
549
|
-
}
|
550
401
|
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
char c = str.charAt(i);
|
557
|
-
if ( c == '\n' ) {
|
558
|
-
if ( ! newline_added ) {
|
559
|
-
sb.append(c); newline_added = true;
|
560
|
-
}
|
561
|
-
} else {
|
562
|
-
sb.append(c);
|
402
|
+
if (occur == 0) {
|
403
|
+
tmp = cur.getNextSibling();
|
404
|
+
while (tmp != null && occur == 0) {
|
405
|
+
if (tmp.getNodeType() == Node.COMMENT_NODE) {
|
406
|
+
occur++;
|
563
407
|
}
|
408
|
+
tmp = tmp.getNextSibling();
|
409
|
+
}
|
410
|
+
if (occur != 0) { occur = 1; }
|
411
|
+
} else {
|
412
|
+
occur = 1;
|
564
413
|
}
|
565
|
-
return sb;
|
566
|
-
}
|
567
|
-
|
568
|
-
public static String newQName(String newPrefix, Node node) {
|
569
|
-
String tagName = getLocalPart(node.getNodeName());
|
570
|
-
if (newPrefix == null) return tagName;
|
571
|
-
return newPrefix + ':' + tagName;
|
572
|
-
}
|
573
414
|
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
415
|
+
} else if (cur.getNodeType() == Node.TEXT_NODE ||
|
416
|
+
cur.getNodeType() == Node.CDATA_SECTION_NODE) {
|
417
|
+
// I'm here. gist:129
|
418
|
+
// http://gist.github.com/144923
|
419
|
+
|
420
|
+
sep = "/";
|
421
|
+
name = "text()";
|
422
|
+
next = cur.getParentNode();
|
423
|
+
|
424
|
+
/*
|
425
|
+
* Thumbler index computation.
|
426
|
+
*/
|
427
|
+
|
428
|
+
tmp = cur.getPreviousSibling();
|
429
|
+
while (tmp != null) {
|
430
|
+
if (tmp.getNodeType() == Node.TEXT_NODE ||
|
431
|
+
tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
|
432
|
+
occur++;
|
433
|
+
}
|
434
|
+
tmp = tmp.getPreviousSibling();
|
578
435
|
}
|
579
|
-
return array;
|
580
|
-
}
|
581
436
|
|
582
|
-
|
583
|
-
|
584
|
-
for (int i = 0; i < result.length; i++) {
|
585
|
-
result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
|
586
|
-
}
|
587
|
-
return result;
|
588
|
-
}
|
437
|
+
if (occur == 0) {
|
438
|
+
tmp = cur.getNextSibling();
|
589
439
|
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
440
|
+
while (tmp != null && occur == 0) {
|
441
|
+
if (tmp.getNodeType() == Node.TEXT_NODE ||
|
442
|
+
tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
|
443
|
+
occur++;
|
444
|
+
}
|
445
|
+
tmp = tmp.getNextSibling();
|
446
|
+
}
|
447
|
+
} else {
|
448
|
+
occur++;
|
594
449
|
}
|
595
|
-
return n;
|
596
|
-
}
|
597
450
|
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
return null;
|
603
|
-
}
|
451
|
+
} else if (cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
|
452
|
+
sep = "/";
|
453
|
+
name = "processing-instruction('" + cur.getLocalName() + "')";
|
454
|
+
next = cur.getParentNode();
|
604
455
|
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
return Charset.defaultCharset().name();
|
609
|
-
}
|
456
|
+
/*
|
457
|
+
* Thumbler index computation.
|
458
|
+
*/
|
610
459
|
|
611
|
-
|
612
|
-
|
613
|
-
public static String adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId) {
|
614
|
-
if (systemId == null) return systemId;
|
615
|
-
File file = new File(systemId);
|
616
|
-
if (file.isAbsolute()) return systemId;
|
617
|
-
String path = resolveSystemId(baseURI, systemId);
|
618
|
-
if (path != null) return path;
|
619
|
-
path = resolveSystemId(currentDir, systemId);
|
620
|
-
if (path != null) return path;
|
621
|
-
return resolveSystemId(scriptFileName, systemId);
|
622
|
-
}
|
623
|
-
|
624
|
-
private static String resolveSystemId(String baseName, String systemId) {
|
625
|
-
if (baseName == null || baseName.length() < 1) return null;
|
626
|
-
String parentName;
|
627
|
-
baseName = baseName.replace("%20", " ");
|
628
|
-
File base = new File(baseName);
|
629
|
-
if (base.isDirectory()) parentName = baseName;
|
630
|
-
else parentName = base.getParent();
|
631
|
-
if (parentName == null) return null;
|
632
|
-
if (parentName.toLowerCase().startsWith("file:")) parentName = parentName.substring("file:".length());
|
633
|
-
File dtdFile = new File(parentName + "/" + systemId);
|
634
|
-
if (dtdFile.exists()) return dtdFile.getPath();
|
635
|
-
return null;
|
636
|
-
}
|
460
|
+
tmp = cur.getParentNode();
|
637
461
|
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
462
|
+
while (tmp != null) {
|
463
|
+
if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
|
464
|
+
tmp.getLocalName().equals(cur.getLocalName())) {
|
465
|
+
occur++;
|
466
|
+
}
|
467
|
+
tmp = tmp.getPreviousSibling();
|
468
|
+
}
|
642
469
|
|
643
|
-
if (
|
644
|
-
|
645
|
-
}
|
470
|
+
if (occur == 0) {
|
471
|
+
tmp = cur.getNextSibling();
|
646
472
|
|
647
|
-
|
648
|
-
|
649
|
-
|
473
|
+
while (tmp != null && occur == 0) {
|
474
|
+
if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
|
475
|
+
tmp.getLocalName().equals(cur.getLocalName())) {
|
476
|
+
occur++;
|
477
|
+
}
|
478
|
+
tmp = tmp.getNextSibling();
|
479
|
+
}
|
650
480
|
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
if (parsed_encoding == null) return str;
|
655
|
-
String ruby_encoding = rubyStringToString(doc.getEncoding());
|
656
|
-
if (ruby_encoding == null) return str;
|
657
|
-
Charset encoding = Charset.forName(ruby_encoding);
|
658
|
-
if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) return str;
|
659
|
-
if (str.length() == 0) return str; // no need to convert
|
660
|
-
return NokogiriHelpers.nkf(context, encoding, str);
|
661
|
-
}
|
481
|
+
if (occur != 0) {
|
482
|
+
occur = 1;
|
483
|
+
}
|
662
484
|
|
663
|
-
|
664
|
-
|
665
|
-
private static final ByteList _Ew = new ByteList(new byte[] { '-','E','w' }, false);
|
666
|
-
private static final ByteList _Ww = new ByteList(new byte[] { '-','W','w' }, false);
|
667
|
-
|
668
|
-
// This method is used from HTML documents. HTML meta tag with encoding specification
|
669
|
-
// might appear after non-ascii characters are used. For example, a title tag before
|
670
|
-
// a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
|
671
|
-
// Nokogiri uses NKF library to convert characters correct encoding. This means the method
|
672
|
-
// works only for JIS/Shift_JIS/EUC-JP.
|
673
|
-
private static CharSequence nkf(ThreadContext context, Charset encoding, CharSequence str) {
|
674
|
-
final Ruby runtime = context.getRuntime();
|
675
|
-
final ByteList opt;
|
676
|
-
if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) opt = _Sw;
|
677
|
-
else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) opt = _Jw;
|
678
|
-
else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) opt = _Ew;
|
679
|
-
else opt = _Ww; // should not come here. should be treated before this method.
|
680
|
-
|
681
|
-
Class nkfClass;
|
682
|
-
try {
|
683
|
-
// JRuby 1.7 and later
|
684
|
-
nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
|
685
|
-
} catch (ClassNotFoundException e1) {
|
686
|
-
return str;
|
687
|
-
}
|
688
|
-
Method nkf_method;
|
689
|
-
try {
|
690
|
-
nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
|
691
|
-
RubyString r_str =
|
692
|
-
(RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
|
693
|
-
return NokogiriHelpers.rubyStringToString(r_str);
|
694
|
-
} catch (SecurityException e) {
|
695
|
-
return str;
|
696
|
-
} catch (NoSuchMethodException e) {
|
697
|
-
return str;
|
698
|
-
} catch (IllegalArgumentException e) {
|
699
|
-
return str;
|
700
|
-
} catch (IllegalAccessException e) {
|
701
|
-
return str;
|
702
|
-
} catch (InvocationTargetException e) {
|
703
|
-
return str;
|
485
|
+
} else {
|
486
|
+
occur++;
|
704
487
|
}
|
705
|
-
}
|
706
|
-
|
707
|
-
private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
|
708
|
-
private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
|
709
|
-
private static final Charset EUC_JP = Charset.forName("EUC-JP");
|
710
488
|
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
}
|
489
|
+
} else if (cur.getNodeType() == Node.ATTRIBUTE_NODE) {
|
490
|
+
sep = "/@";
|
491
|
+
name = cur.getLocalName();
|
715
492
|
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
|
722
|
-
return xmlDoc.getNamespaceCache();
|
723
|
-
}
|
493
|
+
if (cur.getNamespaceURI() != null) {
|
494
|
+
if (cur.getPrefix() != null) {
|
495
|
+
name = cur.getPrefix() + ":" + name;
|
496
|
+
}
|
497
|
+
}
|
724
498
|
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
499
|
+
next = ((Attr) cur).getOwnerElement();
|
500
|
+
|
501
|
+
} else {
|
502
|
+
next = cur.getParentNode();
|
503
|
+
}
|
504
|
+
|
505
|
+
if (occur == 0) {
|
506
|
+
buffer = sep + name + buffer;
|
507
|
+
} else {
|
508
|
+
buffer = sep + name + "[" + occur + "]" + buffer;
|
509
|
+
}
|
510
|
+
|
511
|
+
cur = next;
|
512
|
+
|
513
|
+
} while (cur != null);
|
514
|
+
|
515
|
+
return buffer;
|
516
|
+
}
|
517
|
+
|
518
|
+
static boolean
|
519
|
+
compareTwoNodes(Node m, Node n)
|
520
|
+
{
|
521
|
+
return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
|
522
|
+
nodesAreEqual(m.getPrefix(), n.getPrefix());
|
523
|
+
}
|
524
|
+
|
525
|
+
private static boolean
|
526
|
+
nodesAreEqual(Object a, Object b)
|
527
|
+
{
|
528
|
+
return (((a == null) && (b == null)) ||
|
529
|
+
((a != null) && (b != null) && (b.equals(a))));
|
530
|
+
}
|
531
|
+
|
532
|
+
private static boolean
|
533
|
+
fullNamesMatch(Node a, Node b)
|
534
|
+
{
|
535
|
+
return a.getNodeName().equals(b.getNodeName());
|
536
|
+
}
|
537
|
+
|
538
|
+
private static final Pattern encoded_pattern = Pattern.compile("&|>|<| ");
|
539
|
+
private static final String[] encoded = {"&", ">", "<", " "};
|
540
|
+
private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
|
541
|
+
private static final String[] decoded = {"&", ">", "<", "\r"};
|
542
|
+
|
543
|
+
private static StringBuffer
|
544
|
+
convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars)
|
545
|
+
{
|
546
|
+
Matcher matcher = ptn.matcher(input);
|
547
|
+
boolean result = matcher.find();
|
548
|
+
StringBuffer sb = new StringBuffer(input.length() + 8);
|
549
|
+
while (result) {
|
550
|
+
String matched = matcher.group();
|
551
|
+
String replacement = "";
|
552
|
+
for (int i = 0; i < oldChars.length; i++) {
|
553
|
+
if (matched.contains(oldChars[i])) {
|
554
|
+
replacement = matched.replace(oldChars[i], newChars[i]);
|
555
|
+
break;
|
731
556
|
}
|
732
|
-
|
733
|
-
|
557
|
+
}
|
558
|
+
matcher.appendReplacement(sb, replacement);
|
559
|
+
result = matcher.find();
|
560
|
+
}
|
561
|
+
matcher.appendTail(sb);
|
562
|
+
return sb;
|
563
|
+
}
|
564
|
+
|
565
|
+
public static CharSequence
|
566
|
+
encodeJavaString(CharSequence str)
|
567
|
+
{
|
568
|
+
return convert(decoded_pattern, str, decoded, encoded);
|
569
|
+
}
|
570
|
+
|
571
|
+
public static CharSequence
|
572
|
+
decodeJavaString(CharSequence str)
|
573
|
+
{
|
574
|
+
return convert(encoded_pattern, str, encoded, decoded);
|
575
|
+
}
|
576
|
+
|
577
|
+
public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
|
578
|
+
public static boolean
|
579
|
+
isNamespace(Node node)
|
580
|
+
{
|
581
|
+
return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
|
582
|
+
}
|
583
|
+
|
584
|
+
public static boolean
|
585
|
+
isNamespace(String nodeName)
|
586
|
+
{
|
587
|
+
return (nodeName.startsWith("xmlns"));
|
588
|
+
}
|
589
|
+
|
590
|
+
public static boolean
|
591
|
+
isNonDefaultNamespace(Node node)
|
592
|
+
{
|
593
|
+
return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
|
594
|
+
}
|
595
|
+
|
596
|
+
public static boolean
|
597
|
+
isXmlBase(String attrName)
|
598
|
+
{
|
599
|
+
return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
|
600
|
+
}
|
601
|
+
|
602
|
+
public static boolean
|
603
|
+
isBlank(IRubyObject obj)
|
604
|
+
{
|
605
|
+
if (!(obj instanceof XmlText)) { return false; }
|
606
|
+
|
607
|
+
CharSequence content = ((XmlNode) obj).getContentImpl();
|
608
|
+
return content == null || isBlank(content);
|
609
|
+
}
|
610
|
+
|
611
|
+
public static boolean
|
612
|
+
isBlank(CharSequence str)
|
613
|
+
{
|
614
|
+
int len = str.length();
|
615
|
+
int beg = 0;
|
616
|
+
while ((beg < len) && (str.charAt(beg) <= ' ')) { beg++; }
|
617
|
+
return beg == len;
|
618
|
+
}
|
619
|
+
|
620
|
+
public static boolean
|
621
|
+
isBlank(String str)
|
622
|
+
{
|
623
|
+
return str.isEmpty() || isBlank((CharSequence) str);
|
624
|
+
}
|
625
|
+
|
626
|
+
public static boolean
|
627
|
+
isNullOrEmpty(String str)
|
628
|
+
{
|
629
|
+
return str == null || str.isEmpty();
|
630
|
+
}
|
631
|
+
|
632
|
+
public static CharSequence
|
633
|
+
canonicalizeWhitespace(CharSequence str)
|
634
|
+
{
|
635
|
+
final int len = str.length();
|
636
|
+
StringBuilder sb = new StringBuilder(len);
|
637
|
+
boolean newline_added = false;
|
638
|
+
for (int i = 0; i < len; i++) {
|
639
|
+
char c = str.charAt(i);
|
640
|
+
if (c == '\n') {
|
641
|
+
if (! newline_added) {
|
642
|
+
sb.append(c);
|
643
|
+
newline_added = true;
|
644
|
+
}
|
645
|
+
} else {
|
646
|
+
sb.append(c);
|
647
|
+
}
|
648
|
+
}
|
649
|
+
return sb;
|
650
|
+
}
|
651
|
+
|
652
|
+
public static String
|
653
|
+
newQName(String newPrefix, Node node)
|
654
|
+
{
|
655
|
+
String tagName = getLocalPart(node.getNodeName());
|
656
|
+
if (newPrefix == null) { return tagName; }
|
657
|
+
return newPrefix + ':' + tagName;
|
658
|
+
}
|
659
|
+
|
660
|
+
public static IRubyObject[]
|
661
|
+
nodeListToRubyArray(Ruby runtime, NodeList nodes)
|
662
|
+
{
|
663
|
+
IRubyObject[] array = new IRubyObject[nodes.getLength()];
|
664
|
+
for (int i = 0; i < nodes.getLength(); i++) {
|
665
|
+
array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
|
666
|
+
}
|
667
|
+
return array;
|
668
|
+
}
|
669
|
+
|
670
|
+
public static IRubyObject[]
|
671
|
+
nodeListToArray(Ruby ruby, List<Node> nodes)
|
672
|
+
{
|
673
|
+
IRubyObject[] result = new IRubyObject[nodes.size()];
|
674
|
+
for (int i = 0; i < result.length; i++) {
|
675
|
+
result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
|
676
|
+
}
|
677
|
+
return result;
|
678
|
+
}
|
679
|
+
|
680
|
+
public static RubyArray
|
681
|
+
nodeArrayToRubyArray(Ruby ruby, Node[] nodes)
|
682
|
+
{
|
683
|
+
RubyArray n = RubyArray.newArray(ruby, nodes.length);
|
684
|
+
for (int i = 0; i < nodes.length; i++) {
|
685
|
+
n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
|
686
|
+
}
|
687
|
+
return n;
|
688
|
+
}
|
689
|
+
|
690
|
+
public static String
|
691
|
+
getValidEncodingOrNull(IRubyObject encoding)
|
692
|
+
{
|
693
|
+
if (encoding.isNil()) { return null; } // charsetNames does not like contains(null)
|
694
|
+
String enc = rubyStringToString(encoding.convertToString());
|
695
|
+
if (CharsetNames.contains(enc)) { return enc; }
|
696
|
+
return null;
|
697
|
+
}
|
698
|
+
|
699
|
+
public static String
|
700
|
+
getValidEncoding(IRubyObject encoding)
|
701
|
+
{
|
702
|
+
String validEncoding = getValidEncodingOrNull(encoding);
|
703
|
+
if (validEncoding != null) { return validEncoding; }
|
704
|
+
return Charset.defaultCharset().name();
|
705
|
+
}
|
706
|
+
|
707
|
+
private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
|
708
|
+
|
709
|
+
public static String
|
710
|
+
adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId)
|
711
|
+
{
|
712
|
+
if (systemId == null) { return systemId; }
|
713
|
+
File file = new File(systemId);
|
714
|
+
if (file.isAbsolute()) { return systemId; }
|
715
|
+
String path = resolveSystemId(baseURI, systemId);
|
716
|
+
if (path != null) { return path; }
|
717
|
+
path = resolveSystemId(currentDir, systemId);
|
718
|
+
if (path != null) { return path; }
|
719
|
+
return resolveSystemId(scriptFileName, systemId);
|
720
|
+
}
|
721
|
+
|
722
|
+
private static String
|
723
|
+
resolveSystemId(String baseName, String systemId)
|
724
|
+
{
|
725
|
+
if (baseName == null || baseName.length() < 1) { return null; }
|
726
|
+
String parentName;
|
727
|
+
baseName = baseName.replace("%20", " ");
|
728
|
+
File base = new File(baseName);
|
729
|
+
if (base.isDirectory()) { parentName = baseName; }
|
730
|
+
else { parentName = base.getParent(); }
|
731
|
+
if (parentName == null) { return null; }
|
732
|
+
if (parentName.toLowerCase().startsWith("file:")) { parentName = parentName.substring("file:".length()); }
|
733
|
+
File dtdFile = new File(parentName + "/" + systemId);
|
734
|
+
if (dtdFile.exists()) { return dtdFile.getPath(); }
|
735
|
+
return null;
|
736
|
+
}
|
737
|
+
|
738
|
+
private static final Charset UTF8 = Charset.forName("UTF-8");
|
739
|
+
|
740
|
+
public static boolean
|
741
|
+
isUTF8(String encoding)
|
742
|
+
{
|
743
|
+
if (encoding == null) { return true; } // no need to convert encoding
|
744
|
+
|
745
|
+
if ("UTF-8".equals(encoding)) { return true; }
|
746
|
+
return UTF8.aliases().contains(encoding);
|
747
|
+
}
|
748
|
+
|
749
|
+
public static ByteBuffer
|
750
|
+
convertEncoding(Charset output_charset, CharSequence input_string)
|
751
|
+
{
|
752
|
+
return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
|
753
|
+
}
|
754
|
+
|
755
|
+
public static CharSequence
|
756
|
+
convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
|
757
|
+
{
|
758
|
+
if (!(doc instanceof HtmlDocument)) { return str; }
|
759
|
+
String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
|
760
|
+
if (parsed_encoding == null) { return str; }
|
761
|
+
String ruby_encoding = rubyStringToString(doc.getEncoding());
|
762
|
+
if (ruby_encoding == null) { return str; }
|
763
|
+
Charset encoding = Charset.forName(ruby_encoding);
|
764
|
+
if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) { return str; }
|
765
|
+
if (str.length() == 0) { return str; } // no need to convert
|
766
|
+
return NokogiriHelpers.nkf(context, encoding, str);
|
767
|
+
}
|
768
|
+
|
769
|
+
private static final ByteList _Sw = new ByteList(new byte[] { '-', 'S', 'w' }, false);
|
770
|
+
private static final ByteList _Jw = new ByteList(new byte[] { '-', 'J', 'w' }, false);
|
771
|
+
private static final ByteList _Ew = new ByteList(new byte[] { '-', 'E', 'w' }, false);
|
772
|
+
private static final ByteList _Ww = new ByteList(new byte[] { '-', 'W', 'w' }, false);
|
773
|
+
|
774
|
+
// This method is used from HTML documents. HTML meta tag with encoding specification
|
775
|
+
// might appear after non-ascii characters are used. For example, a title tag before
|
776
|
+
// a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
|
777
|
+
// Nokogiri uses NKF library to convert characters correct encoding. This means the method
|
778
|
+
// works only for JIS/Shift_JIS/EUC-JP.
|
779
|
+
private static CharSequence
|
780
|
+
nkf(ThreadContext context, Charset encoding, CharSequence str)
|
781
|
+
{
|
782
|
+
final Ruby runtime = context.getRuntime();
|
783
|
+
final ByteList opt;
|
784
|
+
if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) { opt = _Sw; }
|
785
|
+
else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) { opt = _Jw; }
|
786
|
+
else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) { opt = _Ew; }
|
787
|
+
else { opt = _Ww; } // should not come here. should be treated before this method.
|
788
|
+
|
789
|
+
Class nkfClass;
|
790
|
+
try {
|
791
|
+
// JRuby 1.7 and later
|
792
|
+
nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
|
793
|
+
} catch (ClassNotFoundException e1) {
|
794
|
+
return str;
|
795
|
+
}
|
796
|
+
Method nkf_method;
|
797
|
+
try {
|
798
|
+
nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
|
799
|
+
RubyString r_str =
|
800
|
+
(RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
|
801
|
+
return NokogiriHelpers.rubyStringToString(r_str);
|
802
|
+
} catch (SecurityException e) {
|
803
|
+
return str;
|
804
|
+
} catch (NoSuchMethodException e) {
|
805
|
+
return str;
|
806
|
+
} catch (IllegalArgumentException e) {
|
807
|
+
return str;
|
808
|
+
} catch (IllegalAccessException e) {
|
809
|
+
return str;
|
810
|
+
} catch (InvocationTargetException e) {
|
811
|
+
return str;
|
812
|
+
}
|
813
|
+
}
|
814
|
+
|
815
|
+
private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
|
816
|
+
private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
|
817
|
+
private static final Charset EUC_JP = Charset.forName("EUC-JP");
|
818
|
+
|
819
|
+
public static boolean
|
820
|
+
shouldEncode(Node text)
|
821
|
+
{
|
822
|
+
final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
|
823
|
+
return encoded == null || ! encoded;
|
824
|
+
}
|
825
|
+
|
826
|
+
public static boolean
|
827
|
+
shouldDecode(Node text)
|
828
|
+
{
|
829
|
+
return !shouldEncode(text);
|
830
|
+
}
|
831
|
+
|
832
|
+
public static NokogiriNamespaceCache
|
833
|
+
getNamespaceCache(Node node)
|
834
|
+
{
|
835
|
+
XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
|
836
|
+
return xmlDoc.getNamespaceCache();
|
837
|
+
}
|
838
|
+
|
839
|
+
public static Node
|
840
|
+
renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException
|
841
|
+
{
|
842
|
+
Document doc = node.getOwnerDocument();
|
843
|
+
NokogiriNamespaceCache nsCache = getNamespaceCache(node);
|
844
|
+
Node result = doc.renameNode(node, namespaceURI, qualifiedName);
|
845
|
+
if (result != node) {
|
846
|
+
nsCache.replaceNode(node, result);
|
847
|
+
}
|
848
|
+
return result;
|
849
|
+
}
|
734
850
|
}
|