nokogiri 1.11.0.rc3-java → 1.11.4-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/LICENSE.md +1 -1
- data/README.md +168 -91
- data/dependencies.yml +12 -12
- data/ext/java/nokogiri/EncodingHandler.java +76 -89
- data/ext/java/nokogiri/HtmlDocument.java +135 -144
- data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
- data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
- data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
- data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
- data/ext/java/nokogiri/NokogiriService.java +595 -556
- data/ext/java/nokogiri/XmlAttr.java +118 -126
- data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
- data/ext/java/nokogiri/XmlCdata.java +35 -58
- data/ext/java/nokogiri/XmlComment.java +46 -67
- data/ext/java/nokogiri/XmlDocument.java +645 -572
- data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
- data/ext/java/nokogiri/XmlDtd.java +448 -414
- data/ext/java/nokogiri/XmlElement.java +23 -48
- data/ext/java/nokogiri/XmlElementContent.java +343 -316
- data/ext/java/nokogiri/XmlElementDecl.java +124 -125
- data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
- data/ext/java/nokogiri/XmlEntityReference.java +49 -72
- data/ext/java/nokogiri/XmlNamespace.java +175 -175
- data/ext/java/nokogiri/XmlNode.java +1843 -1622
- data/ext/java/nokogiri/XmlNodeSet.java +361 -331
- data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
- data/ext/java/nokogiri/XmlReader.java +513 -450
- data/ext/java/nokogiri/XmlRelaxng.java +89 -101
- data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
- data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
- data/ext/java/nokogiri/XmlSchema.java +335 -242
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
- data/ext/java/nokogiri/XmlText.java +55 -76
- data/ext/java/nokogiri/XmlXpathContext.java +242 -210
- data/ext/java/nokogiri/XsltStylesheet.java +280 -269
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
- data/ext/java/nokogiri/internals/ParserContext.java +206 -211
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
- data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
- data/ext/nokogiri/depend +37 -358
- data/ext/nokogiri/extconf.rb +581 -374
- data/ext/nokogiri/html_document.c +78 -82
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +69 -66
- data/ext/nokogiri/html_sax_push_parser.c +42 -34
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +192 -93
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +246 -188
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +371 -320
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +118 -118
- data/ext/nokogiri/xml_sax_parser_context.c +103 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +95 -47
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +206 -123
- data/ext/nokogiri/xslt_stylesheet.c +158 -161
- data/lib/nokogiri.rb +3 -7
- data/lib/nokogiri/css/parser.rb +3 -3
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +70 -42
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document.rb +12 -26
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +2 -149
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +205 -0
- data/lib/nokogiri/xml/document.rb +91 -35
- data/lib/nokogiri/xml/document_fragment.rb +4 -6
- data/lib/nokogiri/xml/node.rb +89 -69
- data/lib/nokogiri/xml/parse_options.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/relax_ng.rb +6 -2
- data/lib/nokogiri/xml/schema.rb +12 -4
- data/lib/nokogiri/xml/searchable.rb +3 -1
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +86 -177
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/nokogiri.h +0 -134
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,35 +1,3 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2014:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
1
|
package nokogiri.internals;
|
34
2
|
|
35
3
|
import java.io.ByteArrayInputStream;
|
@@ -74,661 +42,777 @@ import nokogiri.XmlXpathContext;
|
|
74
42
|
|
75
43
|
/**
|
76
44
|
* A class for various utility methods.
|
77
|
-
*
|
45
|
+
*
|
78
46
|
* @author serabe
|
79
47
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
80
48
|
* @author Yoko Harada <yokolet@gmail.com>
|
81
49
|
*/
|
82
|
-
public class NokogiriHelpers
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
}
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
return
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
50
|
+
public class NokogiriHelpers
|
51
|
+
{
|
52
|
+
public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
|
53
|
+
public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
|
54
|
+
public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
|
55
|
+
|
56
|
+
public static XmlNode
|
57
|
+
getCachedNode(Node node)
|
58
|
+
{
|
59
|
+
return (XmlNode) node.getUserData(CACHED_NODE);
|
60
|
+
}
|
61
|
+
|
62
|
+
public static void
|
63
|
+
clearCachedNode(Node node)
|
64
|
+
{
|
65
|
+
node.setUserData(CACHED_NODE, null, null);
|
66
|
+
}
|
67
|
+
|
68
|
+
public static void
|
69
|
+
clearXpathContext(Node node)
|
70
|
+
{
|
71
|
+
if (node == null) { return; }
|
72
|
+
|
73
|
+
Node ownerDocument = node.getOwnerDocument();
|
74
|
+
if (ownerDocument == null) {
|
75
|
+
ownerDocument = node;
|
76
|
+
}
|
77
|
+
ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
|
78
|
+
}
|
79
|
+
|
80
|
+
/**
|
81
|
+
* Get the XmlNode associated with the underlying
|
82
|
+
* <code>node</code>. Creates a new XmlNode (or appropriate subclass)
|
83
|
+
* or XmlNamespace wrapping <code>node</code> if there is no cached
|
84
|
+
* value.
|
85
|
+
*/
|
86
|
+
public static IRubyObject
|
87
|
+
getCachedNodeOrCreate(Ruby runtime, Node node)
|
88
|
+
{
|
89
|
+
if (node == null) { return runtime.getNil(); }
|
90
|
+
if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
|
91
|
+
XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
|
92
|
+
if (!(xmlDocument instanceof HtmlDocument)) {
|
93
|
+
String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
|
94
|
+
String href = ((Attr) node).getValue();
|
95
|
+
XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
|
96
|
+
if (xmlNamespace != null) { return xmlNamespace; }
|
97
|
+
return XmlNamespace.createFromAttr(runtime, (Attr) node);
|
98
|
+
}
|
99
|
+
}
|
100
|
+
XmlNode xmlNode = getCachedNode(node);
|
101
|
+
if (xmlNode == null) {
|
102
|
+
xmlNode = (XmlNode) constructNode(runtime, node);
|
103
|
+
node.setUserData(CACHED_NODE, xmlNode, null);
|
104
|
+
}
|
105
|
+
return xmlNode;
|
106
|
+
}
|
107
|
+
|
108
|
+
/**
|
109
|
+
* Construct a new XmlNode wrapping <code>node</code>. The proper
|
110
|
+
* subclass of XmlNode is chosen based on the type of
|
111
|
+
* <code>node</code>.
|
112
|
+
*/
|
113
|
+
public static IRubyObject
|
114
|
+
constructNode(Ruby runtime, Node node)
|
115
|
+
{
|
116
|
+
if (node == null) { return runtime.getNil(); }
|
117
|
+
// this is slow; need a way to cache nokogiri classes/modules somewhere
|
118
|
+
switch (node.getNodeType()) {
|
119
|
+
case Node.ELEMENT_NODE:
|
120
|
+
XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
121
|
+
"Nokogiri::XML::Element"));
|
122
|
+
xmlElement.setNode(runtime, node);
|
123
|
+
return xmlElement;
|
124
|
+
case Node.ATTRIBUTE_NODE:
|
125
|
+
XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
126
|
+
"Nokogiri::XML::Attr"));
|
127
|
+
xmlAttr.setNode(runtime, node);
|
128
|
+
return xmlAttr;
|
129
|
+
case Node.TEXT_NODE:
|
130
|
+
XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
131
|
+
"Nokogiri::XML::Text"));
|
132
|
+
xmlText.setNode(runtime, node);
|
133
|
+
return xmlText;
|
134
|
+
case Node.COMMENT_NODE:
|
135
|
+
XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
136
|
+
"Nokogiri::XML::Comment"));
|
137
|
+
xmlComment.setNode(runtime, node);
|
138
|
+
return xmlComment;
|
139
|
+
case Node.ENTITY_NODE:
|
140
|
+
return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
|
141
|
+
case Node.ENTITY_REFERENCE_NODE:
|
142
|
+
XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime,
|
143
|
+
getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
|
144
|
+
xmlEntityRef.setNode(runtime, node);
|
145
|
+
return xmlEntityRef;
|
146
|
+
case Node.PROCESSING_INSTRUCTION_NODE:
|
147
|
+
XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction)
|
148
|
+
NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
149
|
+
"Nokogiri::XML::ProcessingInstruction"));
|
150
|
+
xmlProcessingInstruction.setNode(runtime, node);
|
151
|
+
return xmlProcessingInstruction;
|
152
|
+
case Node.CDATA_SECTION_NODE:
|
153
|
+
XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
154
|
+
"Nokogiri::XML::CDATA"));
|
155
|
+
xmlCdata.setNode(runtime, node);
|
156
|
+
return xmlCdata;
|
157
|
+
case Node.DOCUMENT_NODE:
|
158
|
+
XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime,
|
159
|
+
getNokogiriClass(runtime, "Nokogiri::XML::Document"));
|
160
|
+
xmlDocument.setDocumentNode(runtime, (Document) node);
|
161
|
+
return xmlDocument;
|
162
|
+
case Node.DOCUMENT_TYPE_NODE:
|
163
|
+
XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
164
|
+
"Nokogiri::XML::DTD"));
|
165
|
+
xmlDtd.setNode(runtime, node);
|
166
|
+
return xmlDtd;
|
167
|
+
default:
|
168
|
+
XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
|
169
|
+
"Nokogiri::XML::Node"));
|
170
|
+
xmlNode.setNode(runtime, node);
|
171
|
+
return xmlNode;
|
172
|
+
}
|
173
|
+
}
|
174
|
+
|
175
|
+
public static RubyClass
|
176
|
+
getNokogiriClass(Ruby ruby, String name)
|
177
|
+
{
|
178
|
+
return NokogiriService.getNokogiriClassCache(ruby).get(name);
|
179
|
+
}
|
180
|
+
|
181
|
+
public static IRubyObject
|
182
|
+
stringOrNil(Ruby runtime, String str)
|
183
|
+
{
|
184
|
+
return str == null ? runtime.getNil() : convertString(runtime, str);
|
185
|
+
}
|
186
|
+
|
187
|
+
public static IRubyObject
|
188
|
+
stringOrNil(Ruby runtime, CharSequence str)
|
189
|
+
{
|
190
|
+
return str == null ? runtime.getNil() : convertString(runtime, str);
|
191
|
+
}
|
192
|
+
|
193
|
+
public static IRubyObject
|
194
|
+
stringOrNil(Ruby runtime, byte[] bytes)
|
195
|
+
{
|
196
|
+
return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
|
197
|
+
}
|
198
|
+
|
199
|
+
public static IRubyObject
|
200
|
+
stringOrBlank(Ruby runtime, String str)
|
201
|
+
{
|
202
|
+
return str == null ? runtime.newString() : convertString(runtime, str);
|
203
|
+
}
|
204
|
+
|
205
|
+
public static RubyString
|
206
|
+
convertString(Ruby runtime, String str)
|
207
|
+
{
|
208
|
+
return RubyString.newUTF8String(runtime, str);
|
209
|
+
}
|
210
|
+
|
211
|
+
public static RubyString
|
212
|
+
convertString(Ruby runtime, CharSequence str)
|
213
|
+
{
|
214
|
+
return RubyString.newUTF8String(runtime, str);
|
215
|
+
}
|
216
|
+
|
217
|
+
/**
|
218
|
+
* Convert <code>s</code> to a RubyString, or if s is null or
|
219
|
+
* empty return RubyNil.
|
220
|
+
*/
|
221
|
+
public static IRubyObject
|
222
|
+
nonEmptyStringOrNil(Ruby runtime, String s)
|
223
|
+
{
|
224
|
+
if (s == null || s.length() == 0) { return runtime.getNil(); }
|
225
|
+
return RubyString.newString(runtime, s);
|
226
|
+
}
|
227
|
+
|
228
|
+
/**
|
229
|
+
* Return the prefix of a qualified name like "prefix:local".
|
230
|
+
* Returns null if there is no prefix.
|
231
|
+
*/
|
232
|
+
public static String
|
233
|
+
getPrefix(String qName)
|
234
|
+
{
|
235
|
+
if (qName == null) { return null; }
|
236
|
+
|
237
|
+
final int pos = qName.indexOf(':');
|
238
|
+
return pos > 0 ? qName.substring(0, pos) : null;
|
239
|
+
}
|
240
|
+
|
241
|
+
/**
|
242
|
+
* Return the local part of a qualified name like "prefix:local".
|
243
|
+
* Returns <code>qName</code> if there is no prefix.
|
244
|
+
*/
|
245
|
+
public static String
|
246
|
+
getLocalPart(String qName)
|
247
|
+
{
|
248
|
+
if (qName == null) { return null; }
|
249
|
+
|
250
|
+
final int pos = qName.indexOf(':');
|
251
|
+
return pos > 0 ? qName.substring(pos + 1) : qName;
|
252
|
+
}
|
253
|
+
|
254
|
+
public static String
|
255
|
+
getLocalNameForNamespace(String name, String defValue)
|
256
|
+
{
|
257
|
+
String localName = getLocalPart(name);
|
258
|
+
return ("xmlns".equals(localName)) ? defValue : localName;
|
259
|
+
}
|
260
|
+
|
261
|
+
public static String
|
262
|
+
rubyStringToString(IRubyObject str)
|
263
|
+
{
|
264
|
+
if (str.isNil()) { return null; }
|
265
|
+
return str.convertToString().decodeString();
|
266
|
+
}
|
267
|
+
|
268
|
+
public static String
|
269
|
+
rubyStringToString(RubyString str)
|
270
|
+
{
|
271
|
+
return str.decodeString(); // if encoding UTF-8 will decode UTF-8
|
272
|
+
}
|
273
|
+
|
274
|
+
public static ByteArrayInputStream
|
275
|
+
stringBytesToStream(final IRubyObject str)
|
276
|
+
{
|
277
|
+
if (str instanceof RubyString || str.respondsTo("to_str")) {
|
278
|
+
final ByteList bytes = str.convertToString().getByteList();
|
279
|
+
return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
|
280
|
+
}
|
281
|
+
return null;
|
282
|
+
}
|
283
|
+
|
284
|
+
public static String
|
285
|
+
getNodeCompletePath(Node node)
|
286
|
+
{
|
287
|
+
|
288
|
+
Node cur, tmp, next;
|
289
|
+
|
290
|
+
String buffer = "";
|
291
|
+
|
292
|
+
cur = node;
|
293
|
+
|
294
|
+
do {
|
295
|
+
String name = "";
|
296
|
+
String sep = "?";
|
297
|
+
int occur = 0;
|
298
|
+
boolean generic = false;
|
299
|
+
|
300
|
+
if (cur.getNodeType() == Node.DOCUMENT_NODE) {
|
301
|
+
if (buffer.startsWith("/")) { break; }
|
302
|
+
|
303
|
+
sep = "/";
|
304
|
+
next = null;
|
305
|
+
} else if (cur.getNodeType() == Node.ELEMENT_NODE) {
|
306
|
+
generic = false;
|
307
|
+
sep = "/";
|
308
|
+
|
309
|
+
name = cur.getLocalName();
|
310
|
+
if (name == null) { name = cur.getNodeName(); }
|
311
|
+
if (cur.getNamespaceURI() != null) {
|
312
|
+
if (cur.getPrefix() != null) {
|
313
|
+
name = cur.getPrefix() + ":" + name;
|
314
|
+
} else {
|
315
|
+
generic = true;
|
316
|
+
name = "*";
|
317
|
+
}
|
182
318
|
}
|
183
|
-
}
|
184
|
-
|
185
|
-
public static RubyClass getNokogiriClass(Ruby ruby, String name) {
|
186
|
-
return NokogiriService.getNokogiriClassCache(ruby).get(name);
|
187
|
-
}
|
188
319
|
|
189
|
-
|
190
|
-
return str == null ? runtime.getNil() : convertString(runtime, str);
|
191
|
-
}
|
320
|
+
next = cur.getParentNode();
|
192
321
|
|
193
|
-
|
194
|
-
|
195
|
-
|
322
|
+
/*
|
323
|
+
* Thumbler index computation
|
324
|
+
*/
|
196
325
|
|
197
|
-
|
198
|
-
return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
|
199
|
-
}
|
326
|
+
tmp = cur.getPreviousSibling();
|
200
327
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
}
|
208
|
-
|
209
|
-
public static RubyString convertString(Ruby runtime, CharSequence str) {
|
210
|
-
return RubyString.newUTF8String(runtime, str);
|
211
|
-
}
|
212
|
-
|
213
|
-
/**
|
214
|
-
* Convert <code>s</code> to a RubyString, or if s is null or
|
215
|
-
* empty return RubyNil.
|
216
|
-
*/
|
217
|
-
public static IRubyObject nonEmptyStringOrNil(Ruby runtime, String s) {
|
218
|
-
if (s == null || s.length() == 0) return runtime.getNil();
|
219
|
-
return RubyString.newString(runtime, s);
|
220
|
-
}
|
221
|
-
|
222
|
-
/**
|
223
|
-
* Return the prefix of a qualified name like "prefix:local".
|
224
|
-
* Returns null if there is no prefix.
|
225
|
-
*/
|
226
|
-
public static String getPrefix(String qName) {
|
227
|
-
if (qName == null) return null;
|
228
|
-
|
229
|
-
final int pos = qName.indexOf(':');
|
230
|
-
return pos > 0 ? qName.substring(0, pos) : null;
|
231
|
-
}
|
232
|
-
|
233
|
-
/**
|
234
|
-
* Return the local part of a qualified name like "prefix:local".
|
235
|
-
* Returns <code>qName</code> if there is no prefix.
|
236
|
-
*/
|
237
|
-
public static String getLocalPart(String qName) {
|
238
|
-
if (qName == null) return null;
|
239
|
-
|
240
|
-
final int pos = qName.indexOf(':');
|
241
|
-
return pos > 0 ? qName.substring(pos + 1) : qName;
|
242
|
-
}
|
243
|
-
|
244
|
-
public static String getLocalNameForNamespace(String name, String defValue) {
|
245
|
-
String localName = getLocalPart(name);
|
246
|
-
return ("xmlns".equals(localName)) ? defValue : localName;
|
247
|
-
}
|
248
|
-
|
249
|
-
public static String rubyStringToString(IRubyObject str) {
|
250
|
-
if (str.isNil()) return null;
|
251
|
-
return str.convertToString().decodeString();
|
252
|
-
}
|
253
|
-
|
254
|
-
public static String rubyStringToString(RubyString str) {
|
255
|
-
return str.decodeString(); // if encoding UTF-8 will decode UTF-8
|
256
|
-
}
|
257
|
-
|
258
|
-
public static ByteArrayInputStream stringBytesToStream(final IRubyObject str) {
|
259
|
-
if (str instanceof RubyString || str.respondsTo("to_str")) {
|
260
|
-
final ByteList bytes = str.convertToString().getByteList();
|
261
|
-
return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
|
328
|
+
while (tmp != null) {
|
329
|
+
if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
|
330
|
+
(generic || fullNamesMatch(tmp, cur))) {
|
331
|
+
occur++;
|
332
|
+
}
|
333
|
+
tmp = tmp.getPreviousSibling();
|
262
334
|
}
|
263
|
-
return null;
|
264
|
-
}
|
265
335
|
|
266
|
-
|
267
|
-
|
268
|
-
Node cur, tmp, next;
|
269
|
-
|
270
|
-
String buffer = "";
|
271
|
-
|
272
|
-
cur = node;
|
273
|
-
|
274
|
-
do {
|
275
|
-
String name = "";
|
276
|
-
String sep = "?";
|
277
|
-
int occur = 0;
|
278
|
-
boolean generic = false;
|
279
|
-
|
280
|
-
if(cur.getNodeType() == Node.DOCUMENT_NODE) {
|
281
|
-
if(buffer.startsWith("/")) break;
|
282
|
-
|
283
|
-
sep = "/";
|
284
|
-
next = null;
|
285
|
-
} else if(cur.getNodeType() == Node.ELEMENT_NODE) {
|
286
|
-
generic = false;
|
287
|
-
sep = "/";
|
288
|
-
|
289
|
-
name = cur.getLocalName();
|
290
|
-
if (name == null) name = cur.getNodeName();
|
291
|
-
if(cur.getNamespaceURI() != null) {
|
292
|
-
if(cur.getPrefix() != null) {
|
293
|
-
name = cur.getPrefix() + ":" + name;
|
294
|
-
} else {
|
295
|
-
generic = true;
|
296
|
-
name = "*";
|
297
|
-
}
|
298
|
-
}
|
299
|
-
|
300
|
-
next = cur.getParentNode();
|
301
|
-
|
302
|
-
/*
|
303
|
-
* Thumbler index computation
|
304
|
-
*/
|
305
|
-
|
306
|
-
tmp = cur.getPreviousSibling();
|
307
|
-
|
308
|
-
while(tmp != null) {
|
309
|
-
if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
|
310
|
-
(generic || fullNamesMatch(tmp, cur))) {
|
311
|
-
occur++;
|
312
|
-
}
|
313
|
-
tmp = tmp.getPreviousSibling();
|
314
|
-
}
|
315
|
-
|
316
|
-
if(occur == 0) {
|
317
|
-
tmp = cur.getNextSibling();
|
318
|
-
|
319
|
-
while(tmp != null && occur == 0) {
|
320
|
-
if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
|
321
|
-
(generic || fullNamesMatch(tmp,cur))) {
|
322
|
-
occur++;
|
323
|
-
}
|
324
|
-
tmp = tmp.getNextSibling();
|
325
|
-
}
|
326
|
-
|
327
|
-
if(occur != 0) occur = 1;
|
328
|
-
|
329
|
-
} else {
|
330
|
-
occur++;
|
331
|
-
}
|
332
|
-
} else if(cur.getNodeType() == Node.COMMENT_NODE) {
|
333
|
-
sep = "/";
|
334
|
-
name = "comment()";
|
335
|
-
next = cur.getParentNode();
|
336
|
-
|
337
|
-
/*
|
338
|
-
* Thumbler index computation.
|
339
|
-
*/
|
340
|
-
|
341
|
-
tmp = cur.getPreviousSibling();
|
342
|
-
|
343
|
-
while(tmp != null) {
|
344
|
-
if(tmp.getNodeType() == Node.COMMENT_NODE) {
|
345
|
-
occur++;
|
346
|
-
}
|
347
|
-
tmp = tmp.getPreviousSibling();
|
348
|
-
}
|
349
|
-
|
350
|
-
if(occur == 0) {
|
351
|
-
tmp = cur.getNextSibling();
|
352
|
-
while(tmp != null && occur == 0) {
|
353
|
-
if(tmp.getNodeType() == Node.COMMENT_NODE) {
|
354
|
-
occur++;
|
355
|
-
}
|
356
|
-
tmp = tmp.getNextSibling();
|
357
|
-
}
|
358
|
-
if(occur != 0) occur = 1;
|
359
|
-
} else {
|
360
|
-
occur = 1;
|
361
|
-
}
|
362
|
-
|
363
|
-
} else if(cur.getNodeType() == Node.TEXT_NODE ||
|
364
|
-
cur.getNodeType() == Node.CDATA_SECTION_NODE) {
|
365
|
-
// I'm here. gist:129
|
366
|
-
// http://gist.github.com/144923
|
367
|
-
|
368
|
-
sep = "/";
|
369
|
-
name = "text()";
|
370
|
-
next = cur.getParentNode();
|
371
|
-
|
372
|
-
/*
|
373
|
-
* Thumbler index computation.
|
374
|
-
*/
|
375
|
-
|
376
|
-
tmp = cur.getPreviousSibling();
|
377
|
-
while(tmp != null) {
|
378
|
-
if(tmp.getNodeType() == Node.TEXT_NODE ||
|
379
|
-
tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
|
380
|
-
occur++;
|
381
|
-
}
|
382
|
-
tmp = tmp.getPreviousSibling();
|
383
|
-
}
|
384
|
-
|
385
|
-
if(occur == 0) {
|
386
|
-
tmp = cur.getNextSibling();
|
387
|
-
|
388
|
-
while(tmp != null && occur == 0) {
|
389
|
-
if(tmp.getNodeType() == Node.TEXT_NODE ||
|
390
|
-
tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
|
391
|
-
occur++;
|
392
|
-
}
|
393
|
-
tmp = tmp.getNextSibling();
|
394
|
-
}
|
395
|
-
} else {
|
396
|
-
occur++;
|
397
|
-
}
|
398
|
-
|
399
|
-
} else if(cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
|
400
|
-
sep = "/";
|
401
|
-
name = "processing-instruction('"+cur.getLocalName()+"')";
|
402
|
-
next = cur.getParentNode();
|
403
|
-
|
404
|
-
/*
|
405
|
-
* Thumbler index computation.
|
406
|
-
*/
|
407
|
-
|
408
|
-
tmp = cur.getParentNode();
|
409
|
-
|
410
|
-
while(tmp != null) {
|
411
|
-
if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
|
412
|
-
tmp.getLocalName().equals(cur.getLocalName())) {
|
413
|
-
occur++;
|
414
|
-
}
|
415
|
-
tmp = tmp.getPreviousSibling();
|
416
|
-
}
|
417
|
-
|
418
|
-
if(occur == 0) {
|
419
|
-
tmp = cur.getNextSibling();
|
420
|
-
|
421
|
-
while(tmp != null && occur == 0) {
|
422
|
-
if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
|
423
|
-
tmp.getLocalName().equals(cur.getLocalName())){
|
424
|
-
occur++;
|
425
|
-
}
|
426
|
-
tmp = tmp.getNextSibling();
|
427
|
-
}
|
428
|
-
|
429
|
-
if(occur != 0) {
|
430
|
-
occur = 1;
|
431
|
-
}
|
432
|
-
|
433
|
-
} else {
|
434
|
-
occur++;
|
435
|
-
}
|
436
|
-
|
437
|
-
} else if(cur.getNodeType() == Node.ATTRIBUTE_NODE) {
|
438
|
-
sep = "/@";
|
439
|
-
name = cur.getLocalName();
|
440
|
-
|
441
|
-
if(cur.getNamespaceURI() != null) {
|
442
|
-
if(cur.getPrefix() != null) {
|
443
|
-
name = cur.getPrefix() + ":" + name;
|
444
|
-
}
|
445
|
-
}
|
446
|
-
|
447
|
-
next = ((Attr) cur).getOwnerElement();
|
448
|
-
|
449
|
-
} else {
|
450
|
-
next = cur.getParentNode();
|
451
|
-
}
|
336
|
+
if (occur == 0) {
|
337
|
+
tmp = cur.getNextSibling();
|
452
338
|
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
339
|
+
while (tmp != null && occur == 0) {
|
340
|
+
if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
|
341
|
+
(generic || fullNamesMatch(tmp, cur))) {
|
342
|
+
occur++;
|
457
343
|
}
|
344
|
+
tmp = tmp.getNextSibling();
|
345
|
+
}
|
458
346
|
|
459
|
-
|
460
|
-
|
461
|
-
} while(cur != null);
|
347
|
+
if (occur != 0) { occur = 1; }
|
462
348
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
|
483
|
-
private static final String[] decoded = {"&", ">", "<", "\r"};
|
484
|
-
|
485
|
-
private static StringBuffer convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars) {
|
486
|
-
Matcher matcher = ptn.matcher(input);
|
487
|
-
boolean result = matcher.find();
|
488
|
-
StringBuffer sb = new StringBuffer(input.length() + 8);
|
489
|
-
while (result) {
|
490
|
-
String matched = matcher.group();
|
491
|
-
String replacement = "";
|
492
|
-
for (int i=0; i<oldChars.length; i++) {
|
493
|
-
if (matched.contains(oldChars[i])) {
|
494
|
-
replacement = matched.replace(oldChars[i], newChars[i]);
|
495
|
-
break;
|
496
|
-
}
|
497
|
-
}
|
498
|
-
matcher.appendReplacement(sb, replacement);
|
499
|
-
result = matcher.find();
|
349
|
+
} else {
|
350
|
+
occur++;
|
351
|
+
}
|
352
|
+
} else if (cur.getNodeType() == Node.COMMENT_NODE) {
|
353
|
+
sep = "/";
|
354
|
+
name = "comment()";
|
355
|
+
next = cur.getParentNode();
|
356
|
+
|
357
|
+
/*
|
358
|
+
* Thumbler index computation.
|
359
|
+
*/
|
360
|
+
|
361
|
+
tmp = cur.getPreviousSibling();
|
362
|
+
|
363
|
+
while (tmp != null) {
|
364
|
+
if (tmp.getNodeType() == Node.COMMENT_NODE) {
|
365
|
+
occur++;
|
366
|
+
}
|
367
|
+
tmp = tmp.getPreviousSibling();
|
500
368
|
}
|
501
|
-
matcher.appendTail(sb);
|
502
|
-
return sb;
|
503
|
-
}
|
504
|
-
|
505
|
-
public static CharSequence encodeJavaString(CharSequence str) {
|
506
|
-
return convert(decoded_pattern, str, decoded, encoded);
|
507
|
-
}
|
508
|
-
|
509
|
-
public static CharSequence decodeJavaString(CharSequence str) {
|
510
|
-
return convert(encoded_pattern, str, encoded, decoded);
|
511
|
-
}
|
512
|
-
|
513
|
-
public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
|
514
|
-
public static boolean isNamespace(Node node) {
|
515
|
-
return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
|
516
|
-
}
|
517
|
-
|
518
|
-
public static boolean isNamespace(String nodeName) {
|
519
|
-
return (nodeName.startsWith("xmlns"));
|
520
|
-
}
|
521
|
-
|
522
|
-
public static boolean isNonDefaultNamespace(Node node) {
|
523
|
-
return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
|
524
|
-
}
|
525
|
-
|
526
|
-
public static boolean isXmlBase(String attrName) {
|
527
|
-
return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
|
528
|
-
}
|
529
|
-
|
530
|
-
public static boolean isBlank(IRubyObject obj) {
|
531
|
-
if ( !(obj instanceof XmlText) ) return false;
|
532
|
-
|
533
|
-
CharSequence content = ((XmlNode) obj).getContentImpl();
|
534
|
-
return content == null || isBlank(content);
|
535
|
-
}
|
536
|
-
|
537
|
-
public static boolean isBlank(CharSequence str) {
|
538
|
-
int len = str.length(); int beg = 0;
|
539
|
-
while ((beg < len) && (str.charAt(beg) <= ' ')) beg++;
|
540
|
-
return beg == len;
|
541
|
-
}
|
542
|
-
|
543
|
-
public static boolean isBlank(String str) {
|
544
|
-
return str.isEmpty() || isBlank((CharSequence) str);
|
545
|
-
}
|
546
|
-
|
547
|
-
public static boolean isNullOrEmpty(String str) {
|
548
|
-
return str == null || str.isEmpty();
|
549
|
-
}
|
550
369
|
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
char c = str.charAt(i);
|
557
|
-
if ( c == '\n' ) {
|
558
|
-
if ( ! newline_added ) {
|
559
|
-
sb.append(c); newline_added = true;
|
560
|
-
}
|
561
|
-
} else {
|
562
|
-
sb.append(c);
|
370
|
+
if (occur == 0) {
|
371
|
+
tmp = cur.getNextSibling();
|
372
|
+
while (tmp != null && occur == 0) {
|
373
|
+
if (tmp.getNodeType() == Node.COMMENT_NODE) {
|
374
|
+
occur++;
|
563
375
|
}
|
376
|
+
tmp = tmp.getNextSibling();
|
377
|
+
}
|
378
|
+
if (occur != 0) { occur = 1; }
|
379
|
+
} else {
|
380
|
+
occur = 1;
|
564
381
|
}
|
565
|
-
return sb;
|
566
|
-
}
|
567
|
-
|
568
|
-
public static String newQName(String newPrefix, Node node) {
|
569
|
-
String tagName = getLocalPart(node.getNodeName());
|
570
|
-
if (newPrefix == null) return tagName;
|
571
|
-
return newPrefix + ':' + tagName;
|
572
|
-
}
|
573
382
|
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
383
|
+
} else if (cur.getNodeType() == Node.TEXT_NODE ||
|
384
|
+
cur.getNodeType() == Node.CDATA_SECTION_NODE) {
|
385
|
+
// I'm here. gist:129
|
386
|
+
// http://gist.github.com/144923
|
387
|
+
|
388
|
+
sep = "/";
|
389
|
+
name = "text()";
|
390
|
+
next = cur.getParentNode();
|
391
|
+
|
392
|
+
/*
|
393
|
+
* Thumbler index computation.
|
394
|
+
*/
|
395
|
+
|
396
|
+
tmp = cur.getPreviousSibling();
|
397
|
+
while (tmp != null) {
|
398
|
+
if (tmp.getNodeType() == Node.TEXT_NODE ||
|
399
|
+
tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
|
400
|
+
occur++;
|
401
|
+
}
|
402
|
+
tmp = tmp.getPreviousSibling();
|
578
403
|
}
|
579
|
-
return array;
|
580
|
-
}
|
581
404
|
|
582
|
-
|
583
|
-
|
584
|
-
for (int i = 0; i < result.length; i++) {
|
585
|
-
result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
|
586
|
-
}
|
587
|
-
return result;
|
588
|
-
}
|
405
|
+
if (occur == 0) {
|
406
|
+
tmp = cur.getNextSibling();
|
589
407
|
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
408
|
+
while (tmp != null && occur == 0) {
|
409
|
+
if (tmp.getNodeType() == Node.TEXT_NODE ||
|
410
|
+
tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
|
411
|
+
occur++;
|
412
|
+
}
|
413
|
+
tmp = tmp.getNextSibling();
|
414
|
+
}
|
415
|
+
} else {
|
416
|
+
occur++;
|
594
417
|
}
|
595
|
-
return n;
|
596
|
-
}
|
597
|
-
|
598
|
-
public static String getValidEncodingOrNull(IRubyObject encoding) {
|
599
|
-
if (encoding.isNil()) return null; // charsetNames does not like contains(null)
|
600
|
-
String enc = rubyStringToString(encoding.convertToString());
|
601
|
-
if (CharsetNames.contains(enc)) return enc;
|
602
|
-
return null;
|
603
|
-
}
|
604
418
|
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
}
|
419
|
+
} else if (cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
|
420
|
+
sep = "/";
|
421
|
+
name = "processing-instruction('" + cur.getLocalName() + "')";
|
422
|
+
next = cur.getParentNode();
|
610
423
|
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
if (systemId == null) return systemId;
|
615
|
-
File file = new File(systemId);
|
616
|
-
if (file.isAbsolute()) return systemId;
|
617
|
-
String path = resolveSystemId(baseURI, systemId);
|
618
|
-
if (path != null) return path;
|
619
|
-
path = resolveSystemId(currentDir, systemId);
|
620
|
-
if (path != null) return path;
|
621
|
-
return resolveSystemId(scriptFileName, systemId);
|
622
|
-
}
|
424
|
+
/*
|
425
|
+
* Thumbler index computation.
|
426
|
+
*/
|
623
427
|
|
624
|
-
|
625
|
-
if (baseName == null || baseName.length() < 1) return null;
|
626
|
-
String parentName;
|
627
|
-
baseName = baseName.replace("%20", " ");
|
628
|
-
File base = new File(baseName);
|
629
|
-
if (base.isDirectory()) parentName = baseName;
|
630
|
-
else parentName = base.getParent();
|
631
|
-
if (parentName == null) return null;
|
632
|
-
if (parentName.toLowerCase().startsWith("file:")) parentName = parentName.substring("file:".length());
|
633
|
-
File dtdFile = new File(parentName + "/" + systemId);
|
634
|
-
if (dtdFile.exists()) return dtdFile.getPath();
|
635
|
-
return null;
|
636
|
-
}
|
428
|
+
tmp = cur.getParentNode();
|
637
429
|
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
430
|
+
while (tmp != null) {
|
431
|
+
if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
|
432
|
+
tmp.getLocalName().equals(cur.getLocalName())) {
|
433
|
+
occur++;
|
434
|
+
}
|
435
|
+
tmp = tmp.getPreviousSibling();
|
436
|
+
}
|
642
437
|
|
643
|
-
if (
|
644
|
-
|
645
|
-
}
|
438
|
+
if (occur == 0) {
|
439
|
+
tmp = cur.getNextSibling();
|
646
440
|
|
647
|
-
|
648
|
-
|
649
|
-
|
441
|
+
while (tmp != null && occur == 0) {
|
442
|
+
if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
|
443
|
+
tmp.getLocalName().equals(cur.getLocalName())) {
|
444
|
+
occur++;
|
445
|
+
}
|
446
|
+
tmp = tmp.getNextSibling();
|
447
|
+
}
|
650
448
|
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
if (parsed_encoding == null) return str;
|
655
|
-
String ruby_encoding = rubyStringToString(doc.getEncoding());
|
656
|
-
if (ruby_encoding == null) return str;
|
657
|
-
Charset encoding = Charset.forName(ruby_encoding);
|
658
|
-
if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) return str;
|
659
|
-
if (str.length() == 0) return str; // no need to convert
|
660
|
-
return NokogiriHelpers.nkf(context, encoding, str);
|
661
|
-
}
|
449
|
+
if (occur != 0) {
|
450
|
+
occur = 1;
|
451
|
+
}
|
662
452
|
|
663
|
-
|
664
|
-
|
665
|
-
private static final ByteList _Ew = new ByteList(new byte[] { '-','E','w' }, false);
|
666
|
-
private static final ByteList _Ww = new ByteList(new byte[] { '-','W','w' }, false);
|
667
|
-
|
668
|
-
// This method is used from HTML documents. HTML meta tag with encoding specification
|
669
|
-
// might appear after non-ascii characters are used. For example, a title tag before
|
670
|
-
// a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
|
671
|
-
// Nokogiri uses NKF library to convert characters correct encoding. This means the method
|
672
|
-
// works only for JIS/Shift_JIS/EUC-JP.
|
673
|
-
private static CharSequence nkf(ThreadContext context, Charset encoding, CharSequence str) {
|
674
|
-
final Ruby runtime = context.getRuntime();
|
675
|
-
final ByteList opt;
|
676
|
-
if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) opt = _Sw;
|
677
|
-
else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) opt = _Jw;
|
678
|
-
else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) opt = _Ew;
|
679
|
-
else opt = _Ww; // should not come here. should be treated before this method.
|
680
|
-
|
681
|
-
Class nkfClass;
|
682
|
-
try {
|
683
|
-
// JRuby 1.7 and later
|
684
|
-
nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
|
685
|
-
} catch (ClassNotFoundException e1) {
|
686
|
-
return str;
|
687
|
-
}
|
688
|
-
Method nkf_method;
|
689
|
-
try {
|
690
|
-
nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
|
691
|
-
RubyString r_str =
|
692
|
-
(RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
|
693
|
-
return NokogiriHelpers.rubyStringToString(r_str);
|
694
|
-
} catch (SecurityException e) {
|
695
|
-
return str;
|
696
|
-
} catch (NoSuchMethodException e) {
|
697
|
-
return str;
|
698
|
-
} catch (IllegalArgumentException e) {
|
699
|
-
return str;
|
700
|
-
} catch (IllegalAccessException e) {
|
701
|
-
return str;
|
702
|
-
} catch (InvocationTargetException e) {
|
703
|
-
return str;
|
453
|
+
} else {
|
454
|
+
occur++;
|
704
455
|
}
|
705
|
-
}
|
706
|
-
|
707
|
-
private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
|
708
|
-
private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
|
709
|
-
private static final Charset EUC_JP = Charset.forName("EUC-JP");
|
710
456
|
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
}
|
457
|
+
} else if (cur.getNodeType() == Node.ATTRIBUTE_NODE) {
|
458
|
+
sep = "/@";
|
459
|
+
name = cur.getLocalName();
|
715
460
|
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
|
722
|
-
return xmlDoc.getNamespaceCache();
|
723
|
-
}
|
461
|
+
if (cur.getNamespaceURI() != null) {
|
462
|
+
if (cur.getPrefix() != null) {
|
463
|
+
name = cur.getPrefix() + ":" + name;
|
464
|
+
}
|
465
|
+
}
|
724
466
|
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
467
|
+
next = ((Attr) cur).getOwnerElement();
|
468
|
+
|
469
|
+
} else {
|
470
|
+
next = cur.getParentNode();
|
471
|
+
}
|
472
|
+
|
473
|
+
if (occur == 0) {
|
474
|
+
buffer = sep + name + buffer;
|
475
|
+
} else {
|
476
|
+
buffer = sep + name + "[" + occur + "]" + buffer;
|
477
|
+
}
|
478
|
+
|
479
|
+
cur = next;
|
480
|
+
|
481
|
+
} while (cur != null);
|
482
|
+
|
483
|
+
return buffer;
|
484
|
+
}
|
485
|
+
|
486
|
+
static boolean
|
487
|
+
compareTwoNodes(Node m, Node n)
|
488
|
+
{
|
489
|
+
return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
|
490
|
+
nodesAreEqual(m.getPrefix(), n.getPrefix());
|
491
|
+
}
|
492
|
+
|
493
|
+
private static boolean
|
494
|
+
nodesAreEqual(Object a, Object b)
|
495
|
+
{
|
496
|
+
return (((a == null) && (b == null)) ||
|
497
|
+
((a != null) && (b != null) && (b.equals(a))));
|
498
|
+
}
|
499
|
+
|
500
|
+
private static boolean
|
501
|
+
fullNamesMatch(Node a, Node b)
|
502
|
+
{
|
503
|
+
return a.getNodeName().equals(b.getNodeName());
|
504
|
+
}
|
505
|
+
|
506
|
+
private static final Pattern encoded_pattern = Pattern.compile("&|>|<| ");
|
507
|
+
private static final String[] encoded = {"&", ">", "<", " "};
|
508
|
+
private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
|
509
|
+
private static final String[] decoded = {"&", ">", "<", "\r"};
|
510
|
+
|
511
|
+
private static StringBuffer
|
512
|
+
convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars)
|
513
|
+
{
|
514
|
+
Matcher matcher = ptn.matcher(input);
|
515
|
+
boolean result = matcher.find();
|
516
|
+
StringBuffer sb = new StringBuffer(input.length() + 8);
|
517
|
+
while (result) {
|
518
|
+
String matched = matcher.group();
|
519
|
+
String replacement = "";
|
520
|
+
for (int i = 0; i < oldChars.length; i++) {
|
521
|
+
if (matched.contains(oldChars[i])) {
|
522
|
+
replacement = matched.replace(oldChars[i], newChars[i]);
|
523
|
+
break;
|
731
524
|
}
|
732
|
-
|
733
|
-
|
525
|
+
}
|
526
|
+
matcher.appendReplacement(sb, replacement);
|
527
|
+
result = matcher.find();
|
528
|
+
}
|
529
|
+
matcher.appendTail(sb);
|
530
|
+
return sb;
|
531
|
+
}
|
532
|
+
|
533
|
+
public static CharSequence
|
534
|
+
encodeJavaString(CharSequence str)
|
535
|
+
{
|
536
|
+
return convert(decoded_pattern, str, decoded, encoded);
|
537
|
+
}
|
538
|
+
|
539
|
+
public static CharSequence
|
540
|
+
decodeJavaString(CharSequence str)
|
541
|
+
{
|
542
|
+
return convert(encoded_pattern, str, encoded, decoded);
|
543
|
+
}
|
544
|
+
|
545
|
+
public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
|
546
|
+
public static boolean
|
547
|
+
isNamespace(Node node)
|
548
|
+
{
|
549
|
+
return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
|
550
|
+
}
|
551
|
+
|
552
|
+
public static boolean
|
553
|
+
isNamespace(String nodeName)
|
554
|
+
{
|
555
|
+
return (nodeName.startsWith("xmlns"));
|
556
|
+
}
|
557
|
+
|
558
|
+
public static boolean
|
559
|
+
isNonDefaultNamespace(Node node)
|
560
|
+
{
|
561
|
+
return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
|
562
|
+
}
|
563
|
+
|
564
|
+
public static boolean
|
565
|
+
isXmlBase(String attrName)
|
566
|
+
{
|
567
|
+
return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
|
568
|
+
}
|
569
|
+
|
570
|
+
public static boolean
|
571
|
+
isBlank(IRubyObject obj)
|
572
|
+
{
|
573
|
+
if (!(obj instanceof XmlText)) { return false; }
|
574
|
+
|
575
|
+
CharSequence content = ((XmlNode) obj).getContentImpl();
|
576
|
+
return content == null || isBlank(content);
|
577
|
+
}
|
578
|
+
|
579
|
+
public static boolean
|
580
|
+
isBlank(CharSequence str)
|
581
|
+
{
|
582
|
+
int len = str.length();
|
583
|
+
int beg = 0;
|
584
|
+
while ((beg < len) && (str.charAt(beg) <= ' ')) { beg++; }
|
585
|
+
return beg == len;
|
586
|
+
}
|
587
|
+
|
588
|
+
public static boolean
|
589
|
+
isBlank(String str)
|
590
|
+
{
|
591
|
+
return str.isEmpty() || isBlank((CharSequence) str);
|
592
|
+
}
|
593
|
+
|
594
|
+
public static boolean
|
595
|
+
isNullOrEmpty(String str)
|
596
|
+
{
|
597
|
+
return str == null || str.isEmpty();
|
598
|
+
}
|
599
|
+
|
600
|
+
public static CharSequence
|
601
|
+
canonicalizeWhitespace(CharSequence str)
|
602
|
+
{
|
603
|
+
final int len = str.length();
|
604
|
+
StringBuilder sb = new StringBuilder(len);
|
605
|
+
boolean newline_added = false;
|
606
|
+
for (int i = 0; i < len; i++) {
|
607
|
+
char c = str.charAt(i);
|
608
|
+
if (c == '\n') {
|
609
|
+
if (! newline_added) {
|
610
|
+
sb.append(c);
|
611
|
+
newline_added = true;
|
612
|
+
}
|
613
|
+
} else {
|
614
|
+
sb.append(c);
|
615
|
+
}
|
616
|
+
}
|
617
|
+
return sb;
|
618
|
+
}
|
619
|
+
|
620
|
+
public static String
|
621
|
+
newQName(String newPrefix, Node node)
|
622
|
+
{
|
623
|
+
String tagName = getLocalPart(node.getNodeName());
|
624
|
+
if (newPrefix == null) { return tagName; }
|
625
|
+
return newPrefix + ':' + tagName;
|
626
|
+
}
|
627
|
+
|
628
|
+
public static IRubyObject[]
|
629
|
+
nodeListToRubyArray(Ruby runtime, NodeList nodes)
|
630
|
+
{
|
631
|
+
IRubyObject[] array = new IRubyObject[nodes.getLength()];
|
632
|
+
for (int i = 0; i < nodes.getLength(); i++) {
|
633
|
+
array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
|
634
|
+
}
|
635
|
+
return array;
|
636
|
+
}
|
637
|
+
|
638
|
+
public static IRubyObject[]
|
639
|
+
nodeListToArray(Ruby ruby, List<Node> nodes)
|
640
|
+
{
|
641
|
+
IRubyObject[] result = new IRubyObject[nodes.size()];
|
642
|
+
for (int i = 0; i < result.length; i++) {
|
643
|
+
result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
|
644
|
+
}
|
645
|
+
return result;
|
646
|
+
}
|
647
|
+
|
648
|
+
public static RubyArray
|
649
|
+
nodeArrayToRubyArray(Ruby ruby, Node[] nodes)
|
650
|
+
{
|
651
|
+
RubyArray n = RubyArray.newArray(ruby, nodes.length);
|
652
|
+
for (int i = 0; i < nodes.length; i++) {
|
653
|
+
n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
|
654
|
+
}
|
655
|
+
return n;
|
656
|
+
}
|
657
|
+
|
658
|
+
public static String
|
659
|
+
getValidEncodingOrNull(IRubyObject encoding)
|
660
|
+
{
|
661
|
+
if (encoding.isNil()) { return null; } // charsetNames does not like contains(null)
|
662
|
+
String enc = rubyStringToString(encoding.convertToString());
|
663
|
+
if (CharsetNames.contains(enc)) { return enc; }
|
664
|
+
return null;
|
665
|
+
}
|
666
|
+
|
667
|
+
public static String
|
668
|
+
getValidEncoding(IRubyObject encoding)
|
669
|
+
{
|
670
|
+
String validEncoding = getValidEncodingOrNull(encoding);
|
671
|
+
if (validEncoding != null) { return validEncoding; }
|
672
|
+
return Charset.defaultCharset().name();
|
673
|
+
}
|
674
|
+
|
675
|
+
private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
|
676
|
+
|
677
|
+
public static String
|
678
|
+
adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId)
|
679
|
+
{
|
680
|
+
if (systemId == null) { return systemId; }
|
681
|
+
File file = new File(systemId);
|
682
|
+
if (file.isAbsolute()) { return systemId; }
|
683
|
+
String path = resolveSystemId(baseURI, systemId);
|
684
|
+
if (path != null) { return path; }
|
685
|
+
path = resolveSystemId(currentDir, systemId);
|
686
|
+
if (path != null) { return path; }
|
687
|
+
return resolveSystemId(scriptFileName, systemId);
|
688
|
+
}
|
689
|
+
|
690
|
+
private static String
|
691
|
+
resolveSystemId(String baseName, String systemId)
|
692
|
+
{
|
693
|
+
if (baseName == null || baseName.length() < 1) { return null; }
|
694
|
+
String parentName;
|
695
|
+
baseName = baseName.replace("%20", " ");
|
696
|
+
File base = new File(baseName);
|
697
|
+
if (base.isDirectory()) { parentName = baseName; }
|
698
|
+
else { parentName = base.getParent(); }
|
699
|
+
if (parentName == null) { return null; }
|
700
|
+
if (parentName.toLowerCase().startsWith("file:")) { parentName = parentName.substring("file:".length()); }
|
701
|
+
File dtdFile = new File(parentName + "/" + systemId);
|
702
|
+
if (dtdFile.exists()) { return dtdFile.getPath(); }
|
703
|
+
return null;
|
704
|
+
}
|
705
|
+
|
706
|
+
private static final Charset UTF8 = Charset.forName("UTF-8");
|
707
|
+
|
708
|
+
public static boolean
|
709
|
+
isUTF8(String encoding)
|
710
|
+
{
|
711
|
+
if (encoding == null) { return true; } // no need to convert encoding
|
712
|
+
|
713
|
+
if ("UTF-8".equals(encoding)) { return true; }
|
714
|
+
return UTF8.aliases().contains(encoding);
|
715
|
+
}
|
716
|
+
|
717
|
+
public static ByteBuffer
|
718
|
+
convertEncoding(Charset output_charset, CharSequence input_string)
|
719
|
+
{
|
720
|
+
return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
|
721
|
+
}
|
722
|
+
|
723
|
+
public static CharSequence
|
724
|
+
convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
|
725
|
+
{
|
726
|
+
if (!(doc instanceof HtmlDocument)) { return str; }
|
727
|
+
String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
|
728
|
+
if (parsed_encoding == null) { return str; }
|
729
|
+
String ruby_encoding = rubyStringToString(doc.getEncoding());
|
730
|
+
if (ruby_encoding == null) { return str; }
|
731
|
+
Charset encoding = Charset.forName(ruby_encoding);
|
732
|
+
if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) { return str; }
|
733
|
+
if (str.length() == 0) { return str; } // no need to convert
|
734
|
+
return NokogiriHelpers.nkf(context, encoding, str);
|
735
|
+
}
|
736
|
+
|
737
|
+
private static final ByteList _Sw = new ByteList(new byte[] { '-', 'S', 'w' }, false);
|
738
|
+
private static final ByteList _Jw = new ByteList(new byte[] { '-', 'J', 'w' }, false);
|
739
|
+
private static final ByteList _Ew = new ByteList(new byte[] { '-', 'E', 'w' }, false);
|
740
|
+
private static final ByteList _Ww = new ByteList(new byte[] { '-', 'W', 'w' }, false);
|
741
|
+
|
742
|
+
// This method is used from HTML documents. HTML meta tag with encoding specification
|
743
|
+
// might appear after non-ascii characters are used. For example, a title tag before
|
744
|
+
// a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
|
745
|
+
// Nokogiri uses NKF library to convert characters correct encoding. This means the method
|
746
|
+
// works only for JIS/Shift_JIS/EUC-JP.
|
747
|
+
private static CharSequence
|
748
|
+
nkf(ThreadContext context, Charset encoding, CharSequence str)
|
749
|
+
{
|
750
|
+
final Ruby runtime = context.getRuntime();
|
751
|
+
final ByteList opt;
|
752
|
+
if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) { opt = _Sw; }
|
753
|
+
else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) { opt = _Jw; }
|
754
|
+
else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) { opt = _Ew; }
|
755
|
+
else { opt = _Ww; } // should not come here. should be treated before this method.
|
756
|
+
|
757
|
+
Class nkfClass;
|
758
|
+
try {
|
759
|
+
// JRuby 1.7 and later
|
760
|
+
nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
|
761
|
+
} catch (ClassNotFoundException e1) {
|
762
|
+
return str;
|
763
|
+
}
|
764
|
+
Method nkf_method;
|
765
|
+
try {
|
766
|
+
nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
|
767
|
+
RubyString r_str =
|
768
|
+
(RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
|
769
|
+
return NokogiriHelpers.rubyStringToString(r_str);
|
770
|
+
} catch (SecurityException e) {
|
771
|
+
return str;
|
772
|
+
} catch (NoSuchMethodException e) {
|
773
|
+
return str;
|
774
|
+
} catch (IllegalArgumentException e) {
|
775
|
+
return str;
|
776
|
+
} catch (IllegalAccessException e) {
|
777
|
+
return str;
|
778
|
+
} catch (InvocationTargetException e) {
|
779
|
+
return str;
|
780
|
+
}
|
781
|
+
}
|
782
|
+
|
783
|
+
private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
|
784
|
+
private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
|
785
|
+
private static final Charset EUC_JP = Charset.forName("EUC-JP");
|
786
|
+
|
787
|
+
public static boolean
|
788
|
+
shouldEncode(Node text)
|
789
|
+
{
|
790
|
+
final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
|
791
|
+
return encoded == null || ! encoded;
|
792
|
+
}
|
793
|
+
|
794
|
+
public static boolean
|
795
|
+
shouldDecode(Node text)
|
796
|
+
{
|
797
|
+
return !shouldEncode(text);
|
798
|
+
}
|
799
|
+
|
800
|
+
public static NokogiriNamespaceCache
|
801
|
+
getNamespaceCache(Node node)
|
802
|
+
{
|
803
|
+
XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
|
804
|
+
return xmlDoc.getNamespaceCache();
|
805
|
+
}
|
806
|
+
|
807
|
+
public static Node
|
808
|
+
renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException
|
809
|
+
{
|
810
|
+
Document doc = node.getOwnerDocument();
|
811
|
+
NokogiriNamespaceCache nsCache = getNamespaceCache(node);
|
812
|
+
Node result = doc.renameNode(node, namespaceURI, qualifiedName);
|
813
|
+
if (result != node) {
|
814
|
+
nsCache.replaceNode(node, result);
|
815
|
+
}
|
816
|
+
return result;
|
817
|
+
}
|
734
818
|
}
|