nokogiri 1.10.10-java → 1.11.0-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/README.md +165 -91
- data/ext/java/nokogiri/HtmlDocument.java +34 -46
- data/ext/java/nokogiri/HtmlSaxParserContext.java +88 -58
- data/ext/java/nokogiri/HtmlSaxPushParser.java +1 -1
- data/ext/java/nokogiri/NokogiriService.java +1 -1
- data/ext/java/nokogiri/XmlAttr.java +13 -20
- data/ext/java/nokogiri/XmlAttributeDecl.java +11 -12
- data/ext/java/nokogiri/XmlCdata.java +3 -4
- data/ext/java/nokogiri/XmlComment.java +1 -1
- data/ext/java/nokogiri/XmlDocument.java +148 -175
- data/ext/java/nokogiri/XmlDocumentFragment.java +13 -31
- data/ext/java/nokogiri/XmlDtd.java +5 -8
- data/ext/java/nokogiri/XmlElement.java +1 -20
- data/ext/java/nokogiri/XmlElementDecl.java +23 -28
- data/ext/java/nokogiri/XmlEntityDecl.java +23 -27
- data/ext/java/nokogiri/XmlEntityReference.java +2 -2
- data/ext/java/nokogiri/XmlNamespace.java +72 -89
- data/ext/java/nokogiri/XmlNode.java +303 -406
- data/ext/java/nokogiri/XmlNodeSet.java +70 -76
- data/ext/java/nokogiri/XmlReader.java +12 -13
- data/ext/java/nokogiri/XmlRelaxng.java +10 -3
- data/ext/java/nokogiri/XmlSaxParserContext.java +15 -10
- data/ext/java/nokogiri/XmlSchema.java +87 -27
- data/ext/java/nokogiri/XmlSyntaxError.java +2 -6
- data/ext/java/nokogiri/XmlText.java +12 -9
- data/ext/java/nokogiri/XmlXpathContext.java +55 -25
- data/ext/java/nokogiri/XsltStylesheet.java +7 -15
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +52 -46
- data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +71 -135
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +90 -58
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +9 -2
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +67 -10
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +4 -2
- data/ext/java/nokogiri/internals/ParserContext.java +27 -73
- data/ext/java/nokogiri/internals/ReaderNode.java +2 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +18 -33
- data/ext/nokogiri/depend +476 -357
- data/ext/nokogiri/extconf.rb +507 -357
- data/ext/nokogiri/html_document.c +79 -78
- data/ext/nokogiri/html_sax_parser_context.c +2 -2
- data/ext/nokogiri/nokogiri.c +34 -40
- data/ext/nokogiri/xml_document.c +18 -4
- data/ext/nokogiri/xml_io.c +8 -6
- data/ext/nokogiri/xml_node.c +21 -1
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_reader.c +6 -17
- data/ext/nokogiri/xml_relax_ng.c +29 -11
- data/ext/nokogiri/xml_sax_parser.c +2 -7
- data/ext/nokogiri/xml_sax_parser_context.c +2 -2
- data/ext/nokogiri/xml_schema.c +55 -13
- data/ext/nokogiri/xml_xpath_context.c +80 -4
- data/ext/nokogiri/xslt_stylesheet.c +1 -8
- data/lib/nokogiri.rb +22 -22
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +63 -62
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/builder.rb +1 -0
- data/lib/nokogiri/html/document.rb +13 -26
- data/lib/nokogiri/html/document_fragment.rb +1 -0
- data/lib/nokogiri/html/element_description.rb +1 -0
- data/lib/nokogiri/html/element_description_defaults.rb +1 -0
- data/lib/nokogiri/html/entity_lookup.rb +1 -0
- data/lib/nokogiri/html/sax/parser.rb +1 -0
- data/lib/nokogiri/html/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html/sax/push_parser.rb +1 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +3 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +20 -15
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node.rb +587 -249
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +10 -3
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +7 -3
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -0
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xslt.rb +1 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- metadata +86 -159
- data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +0 -107
- data/ext/java/nokogiri/internals/UncloseableInputStream.java +0 -102
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/nokogiri.h +0 -121
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -35,9 +35,11 @@ package nokogiri.internals;
|
|
35
35
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
36
36
|
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
37
37
|
import static nokogiri.internals.NokogiriHelpers.stringOrNil;
|
38
|
+
|
38
39
|
import nokogiri.HtmlDocument;
|
39
40
|
import nokogiri.NokogiriService;
|
40
41
|
import nokogiri.XmlDocument;
|
42
|
+
import nokogiri.XmlSyntaxError;
|
41
43
|
|
42
44
|
import org.apache.xerces.xni.Augmentations;
|
43
45
|
import org.apache.xerces.xni.QName;
|
@@ -65,21 +67,13 @@ import org.w3c.dom.NodeList;
|
|
65
67
|
*/
|
66
68
|
public class HtmlDomParserContext extends XmlDomParserContext {
|
67
69
|
|
68
|
-
|
69
|
-
|
70
|
+
public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
|
71
|
+
this(runtime, runtime.getNil(), options);
|
70
72
|
}
|
71
|
-
|
73
|
+
|
72
74
|
public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
|
73
75
|
super(runtime, encoding, options);
|
74
|
-
|
75
|
-
|
76
|
-
@Override
|
77
|
-
protected void initErrorHandler() {
|
78
|
-
if (options.strict) {
|
79
|
-
errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
|
80
|
-
} else {
|
81
|
-
errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning);
|
82
|
-
}
|
76
|
+
java_encoding = NokogiriHelpers.getValidEncoding(encoding);
|
83
77
|
}
|
84
78
|
|
85
79
|
@Override
|
@@ -105,7 +99,7 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
105
99
|
|
106
100
|
@Override
|
107
101
|
public void setEncoding(String encoding) {
|
108
|
-
|
102
|
+
super.setEncoding(encoding);
|
109
103
|
}
|
110
104
|
|
111
105
|
/**
|
@@ -118,15 +112,27 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
118
112
|
}
|
119
113
|
|
120
114
|
@Override
|
121
|
-
|
122
|
-
|
123
|
-
|
115
|
+
public XmlDocument parse(ThreadContext context, RubyClass klass, IRubyObject url) {
|
116
|
+
XmlDocument xmlDoc = super.parse(context, klass, url);
|
117
|
+
|
118
|
+
// let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
|
119
|
+
// https://github.com/sparklemotion/nokogiri/issues/2130
|
120
|
+
if (!options.recover && errorHandler.getErrors().size() > 0) {
|
121
|
+
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
|
122
|
+
String exceptionMsg = String.format("%s: '%s'",
|
123
|
+
"Parser without recover option encountered error or warning",
|
124
|
+
errorHandler.getErrors().get(0));
|
125
|
+
xmlSyntaxError.setException(new Exception(exceptionMsg));
|
126
|
+
throw xmlSyntaxError.toThrowable();
|
127
|
+
}
|
128
|
+
|
129
|
+
return xmlDoc;
|
124
130
|
}
|
125
131
|
|
126
132
|
@Override
|
127
|
-
protected XmlDocument wrapDocument(ThreadContext context, RubyClass
|
128
|
-
HtmlDocument htmlDocument =
|
129
|
-
htmlDocument.setDocumentNode(context, document);
|
133
|
+
protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) {
|
134
|
+
HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
|
135
|
+
htmlDocument.setDocumentNode(context.runtime, document);
|
130
136
|
if (ruby_encoding.isNil()) {
|
131
137
|
// ruby_encoding might have detected by HtmlDocument::EncodingReader
|
132
138
|
if (detected_encoding != null && !detected_encoding.isNil()) {
|
@@ -134,7 +140,7 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
134
140
|
} else {
|
135
141
|
// no encoding given & no encoding detected, then try to get it
|
136
142
|
String charset = tryGetCharsetFromHtml5MetaTag(document);
|
137
|
-
ruby_encoding = stringOrNil(context.
|
143
|
+
ruby_encoding = stringOrNil(context.runtime, charset);
|
138
144
|
}
|
139
145
|
}
|
140
146
|
htmlDocument.setEncoding(ruby_encoding);
|
@@ -195,32 +201,32 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
195
201
|
|
196
202
|
// element names from xhtml1-strict.dtd
|
197
203
|
private static String[][] element_names = {
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
204
|
+
{"a", "abbr", "acronym", "address", "area"},
|
205
|
+
{"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
|
206
|
+
{"caption", "cite", "code", "col", "colgroup"},
|
207
|
+
{"dd", "del", "dfn", "div", "dl", "dt"},
|
208
|
+
{"em"},
|
209
|
+
{"fieldset", "font", "form", "frame", "frameset"},
|
210
|
+
{}, // g
|
211
|
+
{"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
|
212
|
+
{"i", "iframe", "img", "input", "ins"},
|
213
|
+
{}, // j
|
214
|
+
{"kbd"},
|
215
|
+
{"label", "legend", "li", "link"},
|
216
|
+
{"map", "meta"},
|
217
|
+
{"noframes", "noscript"},
|
218
|
+
{"object", "ol", "optgroup", "option"},
|
219
|
+
{"p", "param", "pre"},
|
220
|
+
{"q"},
|
221
|
+
{}, // r
|
222
|
+
{"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
|
223
|
+
{"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
|
224
|
+
{"u", "ul"},
|
225
|
+
{"var"},
|
226
|
+
{}, // w
|
227
|
+
{}, // x
|
228
|
+
{}, // y
|
229
|
+
{} // z
|
224
230
|
};
|
225
231
|
|
226
232
|
private static boolean isValid(final String name) {
|
@@ -249,7 +249,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
|
|
249
249
|
try {
|
250
250
|
final String msg = ex.getMessage();
|
251
251
|
call("error", runtime.newString(msg == null ? "" : msg));
|
252
|
-
addError(
|
252
|
+
addError(XmlSyntaxError.createError(runtime, ex).toThrowable());
|
253
253
|
} catch( RaiseException e) {
|
254
254
|
addError(e);
|
255
255
|
throw e;
|
@@ -39,6 +39,7 @@ import java.lang.reflect.Method;
|
|
39
39
|
import java.nio.ByteBuffer;
|
40
40
|
import java.nio.CharBuffer;
|
41
41
|
import java.nio.charset.Charset;
|
42
|
+
import java.util.List;
|
42
43
|
import java.util.Set;
|
43
44
|
import java.util.regex.Matcher;
|
44
45
|
import java.util.regex.Pattern;
|
@@ -53,7 +54,6 @@ import org.jruby.util.ByteList;
|
|
53
54
|
import org.w3c.dom.Attr;
|
54
55
|
import org.w3c.dom.DOMException;
|
55
56
|
import org.w3c.dom.Document;
|
56
|
-
import org.w3c.dom.NamedNodeMap;
|
57
57
|
import org.w3c.dom.Node;
|
58
58
|
import org.w3c.dom.NodeList;
|
59
59
|
|
@@ -81,7 +81,7 @@ import nokogiri.XmlXpathContext;
|
|
81
81
|
*/
|
82
82
|
public class NokogiriHelpers {
|
83
83
|
public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
|
84
|
-
public static final String
|
84
|
+
public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
|
85
85
|
public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
|
86
86
|
|
87
87
|
public static XmlNode getCachedNode(Node node) {
|
@@ -108,22 +108,21 @@ public class NokogiriHelpers {
|
|
108
108
|
* or XmlNamespace wrapping <code>node</code> if there is no cached
|
109
109
|
* value.
|
110
110
|
*/
|
111
|
-
public static IRubyObject getCachedNodeOrCreate(Ruby
|
112
|
-
if(node == null) return
|
111
|
+
public static IRubyObject getCachedNodeOrCreate(Ruby runtime, Node node) {
|
112
|
+
if (node == null) return runtime.getNil();
|
113
113
|
if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
|
114
|
-
XmlDocument xmlDocument = (XmlDocument)node.getOwnerDocument().getUserData(CACHED_NODE);
|
114
|
+
XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
|
115
115
|
if (!(xmlDocument instanceof HtmlDocument)) {
|
116
|
-
String prefix = getLocalNameForNamespace(((Attr)node).getName());
|
117
|
-
|
118
|
-
String href = ((Attr)node).getValue();
|
116
|
+
String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
|
117
|
+
String href = ((Attr) node).getValue();
|
119
118
|
XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
|
120
119
|
if (xmlNamespace != null) return xmlNamespace;
|
121
|
-
|
120
|
+
return XmlNamespace.createFromAttr(runtime, (Attr) node);
|
122
121
|
}
|
123
122
|
}
|
124
123
|
XmlNode xmlNode = getCachedNode(node);
|
125
|
-
if(xmlNode == null) {
|
126
|
-
xmlNode = (XmlNode)constructNode(
|
124
|
+
if (xmlNode == null) {
|
125
|
+
xmlNode = (XmlNode) constructNode(runtime, node);
|
127
126
|
node.setUserData(CACHED_NODE, xmlNode, null);
|
128
127
|
}
|
129
128
|
return xmlNode;
|
@@ -140,37 +139,37 @@ public class NokogiriHelpers {
|
|
140
139
|
switch (node.getNodeType()) {
|
141
140
|
case Node.ELEMENT_NODE:
|
142
141
|
XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Element"));
|
143
|
-
xmlElement.setNode(runtime
|
142
|
+
xmlElement.setNode(runtime, node);
|
144
143
|
return xmlElement;
|
145
144
|
case Node.ATTRIBUTE_NODE:
|
146
145
|
XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Attr"));
|
147
|
-
xmlAttr.setNode(runtime
|
146
|
+
xmlAttr.setNode(runtime, node);
|
148
147
|
return xmlAttr;
|
149
148
|
case Node.TEXT_NODE:
|
150
149
|
XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Text"));
|
151
|
-
xmlText.setNode(runtime
|
150
|
+
xmlText.setNode(runtime, node);
|
152
151
|
return xmlText;
|
153
152
|
case Node.COMMENT_NODE:
|
154
153
|
XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Comment"));
|
155
|
-
xmlComment.setNode(runtime
|
154
|
+
xmlComment.setNode(runtime, node);
|
156
155
|
return xmlComment;
|
157
156
|
case Node.ENTITY_NODE:
|
158
157
|
return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
|
159
158
|
case Node.ENTITY_REFERENCE_NODE:
|
160
159
|
XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
|
161
|
-
xmlEntityRef.setNode(runtime
|
160
|
+
xmlEntityRef.setNode(runtime, node);
|
162
161
|
return xmlEntityRef;
|
163
162
|
case Node.PROCESSING_INSTRUCTION_NODE:
|
164
163
|
XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ProcessingInstruction"));
|
165
|
-
xmlProcessingInstruction.setNode(runtime
|
164
|
+
xmlProcessingInstruction.setNode(runtime, node);
|
166
165
|
return xmlProcessingInstruction;
|
167
166
|
case Node.CDATA_SECTION_NODE:
|
168
167
|
XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::CDATA"));
|
169
|
-
xmlCdata.setNode(runtime
|
168
|
+
xmlCdata.setNode(runtime, node);
|
170
169
|
return xmlCdata;
|
171
170
|
case Node.DOCUMENT_NODE:
|
172
171
|
XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"));
|
173
|
-
xmlDocument.setDocumentNode(runtime
|
172
|
+
xmlDocument.setDocumentNode(runtime, (Document) node);
|
174
173
|
return xmlDocument;
|
175
174
|
case Node.DOCUMENT_TYPE_NODE:
|
176
175
|
XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
|
@@ -178,7 +177,7 @@ public class NokogiriHelpers {
|
|
178
177
|
return xmlDtd;
|
179
178
|
default:
|
180
179
|
XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Node"));
|
181
|
-
xmlNode.setNode(runtime
|
180
|
+
xmlNode.setNode(runtime, node);
|
182
181
|
return xmlNode;
|
183
182
|
}
|
184
183
|
}
|
@@ -242,47 +241,18 @@ public class NokogiriHelpers {
|
|
242
241
|
return pos > 0 ? qName.substring(pos + 1) : qName;
|
243
242
|
}
|
244
243
|
|
245
|
-
public static String getLocalNameForNamespace(String name) {
|
244
|
+
public static String getLocalNameForNamespace(String name, String defValue) {
|
246
245
|
String localName = getLocalPart(name);
|
247
|
-
return ("xmlns".equals(localName)) ?
|
246
|
+
return ("xmlns".equals(localName)) ? defValue : localName;
|
248
247
|
}
|
249
248
|
|
250
|
-
private static final Charset UTF8 = Charset.forName("UTF-8");
|
251
|
-
|
252
|
-
/**
|
253
|
-
* Converts a RubyString in to a Java String. Assumes the
|
254
|
-
* RubyString is encoded as UTF-8. This is generally the case for
|
255
|
-
* RubyStrings created with getRuntime().newString("java string").
|
256
|
-
* It also seems to be the case for strings created within Ruby
|
257
|
-
* where $KCODE has not been set.
|
258
|
-
*
|
259
|
-
* Note that RubyString#toString() decodes the string data as
|
260
|
-
* ISO-8859-1 (See org.jruby.util.ByteList.java). This is not
|
261
|
-
* what you want if you have any multibyte characters in your
|
262
|
-
* UTF-8 string.
|
263
|
-
*
|
264
|
-
* FIXME: This really needs to be more robust in terms of
|
265
|
-
* detecting the encoding and properly converting to a Java
|
266
|
-
* String. It's unfortunate that RubyString#toString() doesn't do
|
267
|
-
* this for us.
|
268
|
-
*/
|
269
249
|
public static String rubyStringToString(IRubyObject str) {
|
270
250
|
if (str.isNil()) return null;
|
271
|
-
|
272
|
-
return toJavaString(str.convertToString());
|
273
|
-
}
|
274
|
-
|
275
|
-
private static String toJavaString(RubyString str) {
|
276
|
-
return str.decodeString(); // toString()
|
251
|
+
return str.convertToString().decodeString();
|
277
252
|
}
|
278
253
|
|
279
254
|
public static String rubyStringToString(RubyString str) {
|
280
|
-
|
281
|
-
byte[] data = byteList.unsafeBytes();
|
282
|
-
int offset = byteList.begin();
|
283
|
-
int len = byteList.length();
|
284
|
-
ByteBuffer buf = ByteBuffer.wrap(data, offset, len);
|
285
|
-
return UTF8.decode(buf).toString();
|
255
|
+
return str.decodeString(); // if encoding UTF-8 will decode UTF-8
|
286
256
|
}
|
287
257
|
|
288
258
|
public static ByteArrayInputStream stringBytesToStream(final IRubyObject str) {
|
@@ -297,7 +267,6 @@ public class NokogiriHelpers {
|
|
297
267
|
|
298
268
|
Node cur, tmp, next;
|
299
269
|
|
300
|
-
// TODO: Rename buffer to path.
|
301
270
|
String buffer = "";
|
302
271
|
|
303
272
|
cur = node;
|
@@ -494,33 +463,20 @@ public class NokogiriHelpers {
|
|
494
463
|
return buffer;
|
495
464
|
}
|
496
465
|
|
497
|
-
|
466
|
+
static boolean compareTwoNodes(Node m, Node n) {
|
498
467
|
return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
|
499
468
|
nodesAreEqual(m.getPrefix(), n.getPrefix());
|
500
469
|
}
|
501
470
|
|
502
|
-
protected static boolean fullNamesMatch(Node a, Node b) {
|
503
|
-
return a.getNodeName().equals(b.getNodeName());
|
504
|
-
}
|
505
|
-
|
506
|
-
protected static String getFullName(Node n) {
|
507
|
-
String lname = n.getLocalName();
|
508
|
-
String prefix = n.getPrefix();
|
509
|
-
if (lname != null) {
|
510
|
-
if (prefix != null)
|
511
|
-
return prefix + ":" + lname;
|
512
|
-
else
|
513
|
-
return lname;
|
514
|
-
} else {
|
515
|
-
return n.getNodeName();
|
516
|
-
}
|
517
|
-
}
|
518
|
-
|
519
471
|
private static boolean nodesAreEqual(Object a, Object b) {
|
520
472
|
return (((a == null) && (b == null)) ||
|
521
473
|
((a != null) && (b != null) && (b.equals(a))));
|
522
474
|
}
|
523
475
|
|
476
|
+
private static boolean fullNamesMatch(Node a, Node b) {
|
477
|
+
return a.getNodeName().equals(b.getNodeName());
|
478
|
+
}
|
479
|
+
|
524
480
|
private static final Pattern encoded_pattern = Pattern.compile("&|>|<| ");
|
525
481
|
private static final String[] encoded = {"&", ">", "<", " "};
|
526
482
|
private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
|
@@ -554,20 +510,6 @@ public class NokogiriHelpers {
|
|
554
510
|
return convert(encoded_pattern, str, encoded, decoded);
|
555
511
|
}
|
556
512
|
|
557
|
-
public static String getNodeName(Node node) {
|
558
|
-
if(node == null) { System.out.println("node is null"); return ""; }
|
559
|
-
String name = node.getNodeName();
|
560
|
-
if(name == null) { System.out.println("name is null"); return ""; }
|
561
|
-
if(name.equals("#document")) {
|
562
|
-
return "document";
|
563
|
-
} else if(name.equals("#text")) {
|
564
|
-
return "text";
|
565
|
-
} else {
|
566
|
-
name = getLocalPart(name);
|
567
|
-
return (name == null) ? "" : name;
|
568
|
-
}
|
569
|
-
}
|
570
|
-
|
571
513
|
public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
|
572
514
|
public static boolean isNamespace(Node node) {
|
573
515
|
return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
|
@@ -602,6 +544,10 @@ public class NokogiriHelpers {
|
|
602
544
|
return str.isEmpty() || isBlank((CharSequence) str);
|
603
545
|
}
|
604
546
|
|
547
|
+
public static boolean isNullOrEmpty(String str) {
|
548
|
+
return str == null || str.isEmpty();
|
549
|
+
}
|
550
|
+
|
605
551
|
public static CharSequence canonicalizeWhitespace(CharSequence str) {
|
606
552
|
final int len = str.length();
|
607
553
|
StringBuilder sb = new StringBuilder(len);
|
@@ -625,59 +571,44 @@ public class NokogiriHelpers {
|
|
625
571
|
return newPrefix + ':' + tagName;
|
626
572
|
}
|
627
573
|
|
628
|
-
public static IRubyObject[] nodeListToRubyArray(Ruby
|
574
|
+
public static IRubyObject[] nodeListToRubyArray(Ruby runtime, NodeList nodes) {
|
629
575
|
IRubyObject[] array = new IRubyObject[nodes.getLength()];
|
630
576
|
for (int i = 0; i < nodes.getLength(); i++) {
|
631
|
-
|
577
|
+
array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
|
632
578
|
}
|
633
579
|
return array;
|
634
580
|
}
|
635
581
|
|
636
|
-
public static IRubyObject[]
|
637
|
-
IRubyObject[] result = new IRubyObject[nodes.
|
638
|
-
for(int i = 0; i <
|
639
|
-
result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes
|
582
|
+
public static IRubyObject[] nodeListToArray(Ruby ruby, List<Node> nodes) {
|
583
|
+
IRubyObject[] result = new IRubyObject[nodes.size()];
|
584
|
+
for (int i = 0; i < result.length; i++) {
|
585
|
+
result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
|
640
586
|
}
|
641
587
|
return result;
|
642
588
|
}
|
643
589
|
|
644
590
|
public static RubyArray nodeArrayToRubyArray(Ruby ruby, Node[] nodes) {
|
645
591
|
RubyArray n = RubyArray.newArray(ruby, nodes.length);
|
646
|
-
for(int i = 0; i < nodes.length; i++) {
|
592
|
+
for (int i = 0; i < nodes.length; i++) {
|
647
593
|
n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
|
648
594
|
}
|
649
595
|
return n;
|
650
596
|
}
|
651
597
|
|
652
|
-
public static
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
return n;
|
658
|
-
}
|
659
|
-
|
660
|
-
public static String getValidEncoding(Ruby runtime, IRubyObject encoding) {
|
661
|
-
if (encoding.isNil()) {
|
662
|
-
return guessEncoding();
|
663
|
-
} else {
|
664
|
-
return ignoreInvalidEncoding(runtime, encoding);
|
665
|
-
}
|
598
|
+
public static String getValidEncodingOrNull(IRubyObject encoding) {
|
599
|
+
if (encoding.isNil()) return null; // charsetNames does not like contains(null)
|
600
|
+
String enc = rubyStringToString(encoding.convertToString());
|
601
|
+
if (CharsetNames.contains(enc)) return enc;
|
602
|
+
return null;
|
666
603
|
}
|
667
604
|
|
668
|
-
|
669
|
-
String
|
670
|
-
if (
|
671
|
-
return name;
|
605
|
+
public static String getValidEncoding(IRubyObject encoding) {
|
606
|
+
String validEncoding = getValidEncodingOrNull(encoding);
|
607
|
+
if (validEncoding != null) return validEncoding;
|
608
|
+
return Charset.defaultCharset().name();
|
672
609
|
}
|
673
610
|
|
674
|
-
private static Set<String>
|
675
|
-
|
676
|
-
private static String ignoreInvalidEncoding(Ruby runtime, IRubyObject encoding) {
|
677
|
-
String givenEncoding = rubyStringToString(encoding);
|
678
|
-
if (charsetNames.contains(givenEncoding)) return givenEncoding;
|
679
|
-
else return guessEncoding();
|
680
|
-
}
|
611
|
+
private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
|
681
612
|
|
682
613
|
public static String adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId) {
|
683
614
|
if (systemId == null) return systemId;
|
@@ -704,9 +635,13 @@ public class NokogiriHelpers {
|
|
704
635
|
return null;
|
705
636
|
}
|
706
637
|
|
638
|
+
private static final Charset UTF8 = Charset.forName("UTF-8");
|
639
|
+
|
707
640
|
public static boolean isUTF8(String encoding) {
|
708
641
|
if (encoding == null) return true; // no need to convert encoding
|
709
|
-
|
642
|
+
|
643
|
+
if ("UTF-8".equals(encoding)) return true;
|
644
|
+
return UTF8.aliases().contains(encoding);
|
710
645
|
}
|
711
646
|
|
712
647
|
public static ByteBuffer convertEncoding(Charset output_charset, CharSequence input_string) {
|
@@ -738,15 +673,16 @@ public class NokogiriHelpers {
|
|
738
673
|
private static CharSequence nkf(ThreadContext context, Charset encoding, CharSequence str) {
|
739
674
|
final Ruby runtime = context.getRuntime();
|
740
675
|
final ByteList opt;
|
741
|
-
if (NokogiriHelpers.
|
742
|
-
else if (NokogiriHelpers.
|
743
|
-
else if (NokogiriHelpers.
|
676
|
+
if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) opt = _Sw;
|
677
|
+
else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) opt = _Jw;
|
678
|
+
else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) opt = _Ew;
|
744
679
|
else opt = _Ww; // should not come here. should be treated before this method.
|
745
680
|
|
746
681
|
Class nkfClass;
|
747
682
|
try {
|
748
|
-
|
749
|
-
|
683
|
+
// JRuby 1.7 and later
|
684
|
+
nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
|
685
|
+
} catch (ClassNotFoundException e1) {
|
750
686
|
return str;
|
751
687
|
}
|
752
688
|
Method nkf_method;
|
@@ -768,9 +704,9 @@ public class NokogiriHelpers {
|
|
768
704
|
}
|
769
705
|
}
|
770
706
|
|
771
|
-
private static final Charset
|
772
|
-
private static final Charset
|
773
|
-
private static final Charset
|
707
|
+
private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
|
708
|
+
private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
|
709
|
+
private static final Charset EUC_JP = Charset.forName("EUC-JP");
|
774
710
|
|
775
711
|
public static boolean shouldEncode(Node text) {
|
776
712
|
final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
|
@@ -781,17 +717,17 @@ public class NokogiriHelpers {
|
|
781
717
|
return !shouldEncode(text);
|
782
718
|
}
|
783
719
|
|
784
|
-
public static NokogiriNamespaceCache
|
785
|
-
XmlDocument xmlDoc = (XmlDocument)getCachedNode(
|
720
|
+
public static NokogiriNamespaceCache getNamespaceCache(Node node) {
|
721
|
+
XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
|
786
722
|
return xmlDoc.getNamespaceCache();
|
787
723
|
}
|
788
724
|
|
789
|
-
public static Node renameNode(Node
|
790
|
-
Document doc =
|
791
|
-
NokogiriNamespaceCache nsCache =
|
792
|
-
Node result = doc.renameNode(
|
793
|
-
if (result !=
|
794
|
-
nsCache.replaceNode(
|
725
|
+
public static Node renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException {
|
726
|
+
Document doc = node.getOwnerDocument();
|
727
|
+
NokogiriNamespaceCache nsCache = getNamespaceCache(node);
|
728
|
+
Node result = doc.renameNode(node, namespaceURI, qualifiedName);
|
729
|
+
if (result != node) {
|
730
|
+
nsCache.replaceNode(node, result);
|
795
731
|
}
|
796
732
|
return result;
|
797
733
|
}
|