nokogiri 1.10.8-java → 1.11.0.rc3-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +24 -22
- data/ext/java/nokogiri/HtmlDocument.java +34 -46
- data/ext/java/nokogiri/HtmlSaxParserContext.java +87 -57
- data/ext/java/nokogiri/NokogiriService.java +1 -1
- data/ext/java/nokogiri/XmlAttr.java +13 -20
- data/ext/java/nokogiri/XmlAttributeDecl.java +11 -12
- data/ext/java/nokogiri/XmlCdata.java +3 -4
- data/ext/java/nokogiri/XmlComment.java +1 -1
- data/ext/java/nokogiri/XmlDocument.java +148 -175
- data/ext/java/nokogiri/XmlDocumentFragment.java +13 -31
- data/ext/java/nokogiri/XmlDtd.java +5 -8
- data/ext/java/nokogiri/XmlElement.java +1 -20
- data/ext/java/nokogiri/XmlElementDecl.java +23 -28
- data/ext/java/nokogiri/XmlEntityDecl.java +23 -27
- data/ext/java/nokogiri/XmlEntityReference.java +2 -2
- data/ext/java/nokogiri/XmlNamespace.java +72 -89
- data/ext/java/nokogiri/XmlNode.java +300 -401
- data/ext/java/nokogiri/XmlNodeSet.java +72 -77
- data/ext/java/nokogiri/XmlReader.java +10 -11
- data/ext/java/nokogiri/XmlSaxParserContext.java +7 -7
- data/ext/java/nokogiri/XmlSchema.java +3 -3
- data/ext/java/nokogiri/XmlText.java +12 -9
- data/ext/java/nokogiri/XmlXpathContext.java +7 -7
- data/ext/java/nokogiri/XsltStylesheet.java +7 -15
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +4 -10
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +71 -135
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +90 -58
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +5 -4
- data/ext/java/nokogiri/internals/ParserContext.java +27 -73
- data/ext/java/nokogiri/internals/ReaderNode.java +2 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +17 -32
- data/ext/nokogiri/extconf.rb +50 -37
- data/ext/nokogiri/nokogiri.c +12 -6
- data/ext/nokogiri/nokogiri.h +13 -0
- data/ext/nokogiri/xml_document.c +16 -2
- data/ext/nokogiri/xml_io.c +8 -6
- data/ext/nokogiri/xml_node.c +20 -0
- data/ext/nokogiri/xml_reader.c +6 -17
- data/ext/nokogiri/xml_schema.c +29 -0
- data/ext/nokogiri/xslt_stylesheet.c +0 -4
- data/lib/nokogiri.rb +3 -20
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +61 -60
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +3 -1
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/builder.rb +1 -0
- data/lib/nokogiri/html/document.rb +1 -0
- data/lib/nokogiri/html/document_fragment.rb +1 -0
- data/lib/nokogiri/html/element_description.rb +1 -0
- data/lib/nokogiri/html/element_description_defaults.rb +1 -0
- data/lib/nokogiri/html/entity_lookup.rb +1 -0
- data/lib/nokogiri/html/sax/parser.rb +1 -0
- data/lib/nokogiri/html/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html/sax/push_parser.rb +1 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version.rb +86 -45
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +3 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +3 -8
- data/lib/nokogiri/xml/document_fragment.rb +1 -0
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node.rb +539 -224
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +4 -3
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +7 -3
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -0
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +1 -0
- data/lib/nokogiri/xml/searchable.rb +22 -15
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xslt.rb +1 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- metadata +53 -34
- data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +0 -107
- data/ext/java/nokogiri/internals/UncloseableInputStream.java +0 -102
@@ -46,49 +46,43 @@ import org.w3c.dom.NamedNodeMap;
|
|
46
46
|
import org.w3c.dom.Node;
|
47
47
|
|
48
48
|
/**
|
49
|
-
* Cache of
|
49
|
+
* Cache of namespaces of each node. XmlDocument has one cache of this class.
|
50
50
|
*
|
51
51
|
* @author sergio
|
52
52
|
* @author Yoko Harada <yokolet@gmail.com>
|
53
53
|
*/
|
54
54
|
public class NokogiriNamespaceCache {
|
55
55
|
|
56
|
-
private
|
57
|
-
private Map<String[], CacheEntry> cache; // pair of the index of a given key and entry
|
56
|
+
private final Map<CacheKey, CacheEntry> cache; // pair of the index of a given key and entry
|
58
57
|
private XmlNamespace defaultNamespace = null;
|
59
58
|
|
60
59
|
public NokogiriNamespaceCache() {
|
61
|
-
|
62
|
-
cache = new LinkedHashMap<String[], CacheEntry>();
|
60
|
+
this.cache = new LinkedHashMap<CacheKey, CacheEntry>(4);
|
63
61
|
}
|
64
62
|
|
65
|
-
public
|
66
|
-
|
63
|
+
public NokogiriNamespaceCache(NokogiriNamespaceCache cache) {
|
64
|
+
this.cache = new LinkedHashMap<CacheKey, CacheEntry>(cache.size() + 2);
|
65
|
+
this.cache.putAll(cache.cache);
|
67
66
|
}
|
68
67
|
|
69
|
-
|
70
|
-
|
71
|
-
if (key[0].equals(prefix) && key[1].equals(href)) return key;
|
72
|
-
}
|
73
|
-
return null;
|
68
|
+
public XmlNamespace getDefault() {
|
69
|
+
return defaultNamespace;
|
74
70
|
}
|
75
71
|
|
76
72
|
public XmlNamespace get(String prefix, String href) {
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
if (key != null) {
|
82
|
-
return cache.get(key).namespace;
|
83
|
-
}
|
84
|
-
return null;
|
73
|
+
if (href == null) return null;
|
74
|
+
|
75
|
+
CacheEntry value = cache.get(new CacheKey(prefix, href));
|
76
|
+
return value == null ? null : value.namespace;
|
85
77
|
}
|
86
78
|
|
87
79
|
public XmlNamespace get(Node node, String prefix) {
|
88
80
|
if (prefix == null) return defaultNamespace;
|
89
|
-
for (
|
90
|
-
if (
|
91
|
-
|
81
|
+
for (Map.Entry<CacheKey, CacheEntry> entry : cache.entrySet()) {
|
82
|
+
if (entry.getKey().prefix.equals(prefix)) {
|
83
|
+
if (entry.getValue().isOwner(node)) {
|
84
|
+
return entry.getValue().namespace;
|
85
|
+
}
|
92
86
|
}
|
93
87
|
}
|
94
88
|
return null;
|
@@ -100,9 +94,9 @@ public class NokogiriNamespaceCache {
|
|
100
94
|
namespaces.add(defaultNamespace);
|
101
95
|
return namespaces;
|
102
96
|
}
|
103
|
-
for (
|
104
|
-
if (
|
105
|
-
namespaces.add(
|
97
|
+
for (Map.Entry<CacheKey, CacheEntry> entry : cache.entrySet()) {
|
98
|
+
if (entry.getKey().prefix.equals(prefix)) {
|
99
|
+
namespaces.add(entry.getValue().namespace);
|
106
100
|
}
|
107
101
|
}
|
108
102
|
return namespaces;
|
@@ -110,63 +104,96 @@ public class NokogiriNamespaceCache {
|
|
110
104
|
|
111
105
|
public List<XmlNamespace> get(Node node) {
|
112
106
|
List<XmlNamespace> namespaces = new ArrayList<XmlNamespace>();
|
113
|
-
for (
|
114
|
-
|
115
|
-
|
116
|
-
namespaces.add(entry.namespace);
|
107
|
+
for (Map.Entry<CacheKey, CacheEntry> entry : cache.entrySet()) {
|
108
|
+
if (entry.getValue().isOwner(node)) {
|
109
|
+
namespaces.add(entry.getValue().namespace);
|
117
110
|
}
|
118
111
|
}
|
119
112
|
return namespaces;
|
120
113
|
}
|
121
114
|
|
122
115
|
public void put(XmlNamespace namespace, Node ownerNode) {
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
cache.put(key, entry);
|
132
|
-
if ("".equals(prefixString)) defaultNamespace = namespace;
|
116
|
+
String prefix = namespace.getPrefix();
|
117
|
+
String href = namespace.getHref();
|
118
|
+
if (href == null) return;
|
119
|
+
|
120
|
+
CacheKey key = new CacheKey(prefix, href);
|
121
|
+
if (cache.get(key) != null) return;
|
122
|
+
cache.put(key, new CacheEntry(namespace, ownerNode));
|
123
|
+
if ("".equals(prefix)) defaultNamespace = namespace;
|
133
124
|
}
|
134
125
|
|
135
|
-
public void remove(
|
136
|
-
String
|
137
|
-
|
138
|
-
|
139
|
-
|
126
|
+
public void remove(Node ownerNode) {
|
127
|
+
String prefix = ownerNode.getPrefix();
|
128
|
+
String href = ownerNode.getNamespaceURI();
|
129
|
+
if (href == null) return;
|
130
|
+
|
131
|
+
cache.remove(new CacheKey(prefix, href));
|
132
|
+
}
|
133
|
+
|
134
|
+
public int size() {
|
135
|
+
return cache.size();
|
140
136
|
}
|
141
137
|
|
142
138
|
public void clear() {
|
143
139
|
// removes namespace declarations from node
|
144
|
-
for (
|
145
|
-
CacheEntry entry = cache.get(key);
|
140
|
+
for (CacheEntry entry : cache.values()) {
|
146
141
|
NamedNodeMap attributes = entry.ownerNode.getAttributes();
|
147
142
|
for (int j=0; j<attributes.getLength(); j++) {
|
148
|
-
String name = ((Attr)attributes.item(j)).getName();
|
143
|
+
String name = ((Attr) attributes.item(j)).getName();
|
149
144
|
if (isNamespace(name)) {
|
150
145
|
attributes.removeNamedItem(name);
|
151
146
|
}
|
152
147
|
}
|
153
148
|
}
|
154
|
-
keys.clear();
|
155
149
|
cache.clear();
|
156
150
|
defaultNamespace = null;
|
157
151
|
}
|
158
152
|
|
159
153
|
public void replaceNode(Node oldNode, Node newNode) {
|
160
|
-
for (
|
161
|
-
|
162
|
-
|
163
|
-
|
154
|
+
for (Map.Entry<CacheKey, CacheEntry> entry : cache.entrySet()) {
|
155
|
+
if (entry.getValue().isOwner(oldNode)) {
|
156
|
+
entry.getValue().replaceOwner(newNode);
|
157
|
+
}
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
@Override
|
162
|
+
public String toString() {
|
163
|
+
return getClass().getName() + '@' + Integer.toHexString(hashCode()) + '(' + cache + "default=" + defaultNamespace + ')';
|
164
|
+
}
|
165
|
+
|
166
|
+
private static class CacheKey {
|
167
|
+
final String prefix;
|
168
|
+
final String href;
|
169
|
+
|
170
|
+
CacheKey(String prefix, String href) {
|
171
|
+
this.prefix = prefix;
|
172
|
+
this.href = href;
|
173
|
+
}
|
174
|
+
|
175
|
+
@Override
|
176
|
+
public boolean equals(final Object obj) {
|
177
|
+
if (obj instanceof CacheKey) {
|
178
|
+
CacheKey that = (CacheKey) obj;
|
179
|
+
return prefix == null ? that.prefix == null : prefix.equals(that.prefix) && href.equals(that.href);
|
164
180
|
}
|
181
|
+
return false;
|
182
|
+
}
|
183
|
+
|
184
|
+
@Override
|
185
|
+
public int hashCode() {
|
186
|
+
return (prefix == null ? 0 : prefix.hashCode()) + 37 * href.hashCode();
|
187
|
+
}
|
188
|
+
|
189
|
+
@Override
|
190
|
+
public String toString() {
|
191
|
+
return '[' + prefix + ']' + href;
|
165
192
|
}
|
166
193
|
}
|
167
194
|
|
168
|
-
private class CacheEntry {
|
169
|
-
|
195
|
+
private static class CacheEntry {
|
196
|
+
final XmlNamespace namespace;
|
170
197
|
private Node ownerNode;
|
171
198
|
|
172
199
|
CacheEntry(XmlNamespace namespace, Node ownerNode) {
|
@@ -174,12 +201,17 @@ public class NokogiriNamespaceCache {
|
|
174
201
|
this.ownerNode = ownerNode;
|
175
202
|
}
|
176
203
|
|
177
|
-
|
178
|
-
return ownerNode.isSameNode(
|
204
|
+
boolean isOwner(Node node) {
|
205
|
+
return ownerNode.isSameNode(node);
|
179
206
|
}
|
180
207
|
|
181
|
-
|
208
|
+
void replaceOwner(Node newNode) {
|
182
209
|
this.ownerNode = newNode;
|
183
210
|
}
|
211
|
+
|
212
|
+
@Override
|
213
|
+
public String toString() {
|
214
|
+
return namespace.toString();
|
215
|
+
}
|
184
216
|
}
|
185
217
|
}
|
@@ -53,6 +53,8 @@ import org.w3c.dom.NodeList;
|
|
53
53
|
import nokogiri.XmlNode;
|
54
54
|
import nokogiri.XmlNodeSet;
|
55
55
|
|
56
|
+
import static nokogiri.internals.NokogiriHelpers.nodeListToRubyArray;
|
57
|
+
|
56
58
|
/**
|
57
59
|
* Xpath function handler.
|
58
60
|
*
|
@@ -99,9 +101,8 @@ public class NokogiriXPathFunction implements XPathFunction {
|
|
99
101
|
private static IRubyObject fromObjectToRuby(final Ruby runtime, Object obj) {
|
100
102
|
// argument object type is one of NodeList, String, Boolean, or Double.
|
101
103
|
if (obj instanceof NodeList) {
|
102
|
-
|
103
|
-
|
104
|
-
return xmlNodeSet;
|
104
|
+
IRubyObject[] nodes = nodeListToRubyArray(runtime, (NodeList) obj);
|
105
|
+
return XmlNodeSet.newNodeSet(runtime, nodes);
|
105
106
|
}
|
106
107
|
return JavaUtil.convertJavaToUsableRubyObject(runtime, obj);
|
107
108
|
}
|
@@ -116,7 +117,7 @@ public class NokogiriXPathFunction implements XPathFunction {
|
|
116
117
|
}
|
117
118
|
if (obj instanceof XmlNodeSet) return obj;
|
118
119
|
if (obj instanceof RubyArray) {
|
119
|
-
return XmlNodeSet.
|
120
|
+
return XmlNodeSet.newNodeSet(runtime, ((RubyArray) obj).toJavaArray());
|
120
121
|
}
|
121
122
|
/*if (o instanceof XmlNode)*/ return ((XmlNode) obj).getNode();
|
122
123
|
}
|
@@ -33,27 +33,22 @@
|
|
33
33
|
package nokogiri.internals;
|
34
34
|
|
35
35
|
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
36
|
-
import static org.jruby.runtime.Helpers.invoke;
|
37
36
|
|
38
37
|
import java.io.ByteArrayInputStream;
|
39
38
|
import java.io.File;
|
40
39
|
import java.io.IOException;
|
41
40
|
import java.io.InputStream;
|
42
|
-
import java.io.StringReader;
|
43
41
|
import java.net.URI;
|
44
|
-
import java.nio.charset.Charset;
|
45
|
-
import java.nio.charset.UnsupportedCharsetException;
|
46
42
|
import java.util.concurrent.Callable;
|
47
43
|
|
48
44
|
import org.jruby.Ruby;
|
49
45
|
import org.jruby.RubyClass;
|
50
|
-
import org.jruby.RubyIO;
|
51
46
|
import org.jruby.RubyObject;
|
52
47
|
import org.jruby.RubyString;
|
53
48
|
import org.jruby.runtime.ThreadContext;
|
54
49
|
import org.jruby.runtime.builtin.IRubyObject;
|
55
50
|
import org.jruby.util.ByteList;
|
56
|
-
import org.jruby.util.
|
51
|
+
import org.jruby.util.IOInputStream;
|
57
52
|
import org.xml.sax.InputSource;
|
58
53
|
|
59
54
|
/**
|
@@ -67,6 +62,7 @@ public abstract class ParserContext extends RubyObject {
|
|
67
62
|
protected InputSource source = null;
|
68
63
|
protected IRubyObject detected_encoding = null;
|
69
64
|
protected int stringDataSize = -1;
|
65
|
+
protected String java_encoding;
|
70
66
|
|
71
67
|
public ParserContext(Ruby runtime) {
|
72
68
|
// default to class 'Object' because this class isn't exposed to Ruby
|
@@ -81,68 +77,42 @@ public abstract class ParserContext extends RubyObject {
|
|
81
77
|
return source;
|
82
78
|
}
|
83
79
|
|
84
|
-
|
85
|
-
* Set the InputSource from <code>url</code> or <code>data</code>,
|
86
|
-
* which may be an IO object, a String, or a StringIO.
|
87
|
-
*/
|
88
|
-
public void setInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
|
80
|
+
public void setIOInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
|
89
81
|
source = new InputSource();
|
90
|
-
|
91
|
-
Ruby ruby = context.getRuntime();
|
92
|
-
|
93
82
|
ParserContext.setUrl(context, source, url);
|
94
83
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
RubyString stringData = null;
|
101
|
-
if (invoke(context, data, "respond_to?", ruby.newSymbol("to_io")).isTrue()) {
|
102
|
-
RubyIO io =
|
103
|
-
(RubyIO) TypeConverter.convertToType(data,
|
104
|
-
ruby.getIO(),
|
105
|
-
"to_io");
|
106
|
-
// use unclosedable input stream to fix #495
|
107
|
-
source.setByteStream(new UncloseableInputStream(io.getInStream()));
|
108
|
-
|
109
|
-
} else if (invoke(context, data, "respond_to?", ruby.newSymbol("read")).isTrue()) {
|
110
|
-
stringData = invoke(context, data, "read").convertToString();
|
84
|
+
source.setByteStream(new IOInputStream(data));
|
85
|
+
if (java_encoding != null) {
|
86
|
+
source.setEncoding(java_encoding);
|
87
|
+
}
|
88
|
+
}
|
111
89
|
|
112
|
-
|
113
|
-
|
90
|
+
public void setStringInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
|
91
|
+
source = new InputSource();
|
92
|
+
ParserContext.setUrl(context, source, url);
|
114
93
|
|
115
|
-
|
116
|
-
stringData = (RubyString) data;
|
94
|
+
Ruby ruby = context.getRuntime();
|
117
95
|
|
118
|
-
|
119
|
-
throw ruby.newArgumentError("must be kind_of String
|
96
|
+
if (!(data instanceof RubyString)) {
|
97
|
+
throw ruby.newArgumentError("must be kind_of String");
|
120
98
|
}
|
121
99
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
Charset charset = null;
|
100
|
+
RubyString stringData = (RubyString) data;
|
101
|
+
|
102
|
+
if (stringData.encoding(context) != null) {
|
103
|
+
RubyString stringEncoding = stringData.encoding(context).asString();
|
104
|
+
String encName = NokogiriHelpers.getValidEncodingOrNull(stringEncoding);
|
128
105
|
if (encName != null) {
|
129
|
-
|
130
|
-
charset = Charset.forName(encName);
|
131
|
-
} catch (UnsupportedCharsetException e) {
|
132
|
-
// do nothing;
|
133
|
-
}
|
134
|
-
}
|
135
|
-
ByteList bytes = stringData.getByteList();
|
136
|
-
if (charset != null) {
|
137
|
-
StringReader reader = new StringReader(new String(bytes.unsafeBytes(), bytes.begin(), bytes.length(), charset));
|
138
|
-
source.setCharacterStream(reader);
|
139
|
-
source.setEncoding(charset.name());
|
140
|
-
} else {
|
141
|
-
stringDataSize = bytes.length() - bytes.begin();
|
142
|
-
ByteArrayInputStream stream = new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
|
143
|
-
source.setByteStream(stream);
|
106
|
+
java_encoding = encName;
|
144
107
|
}
|
145
108
|
}
|
109
|
+
|
110
|
+
ByteList bytes = stringData.getByteList();
|
111
|
+
|
112
|
+
stringDataSize = bytes.length() - bytes.begin();
|
113
|
+
ByteArrayInputStream stream = new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
|
114
|
+
source.setByteStream(stream);
|
115
|
+
source.setEncoding(java_encoding);
|
146
116
|
}
|
147
117
|
|
148
118
|
public static void setUrl(ThreadContext context, InputSource source, IRubyObject url) {
|
@@ -171,22 +141,6 @@ public abstract class ParserContext extends RubyObject {
|
|
171
141
|
}
|
172
142
|
}
|
173
143
|
|
174
|
-
private boolean setEncoding(ThreadContext context, IRubyObject data) {
|
175
|
-
if (data.getType().respondsTo("detect_encoding")) {
|
176
|
-
// in case of EncodingReader is used
|
177
|
-
// since EncodingReader won't respond to :to_io
|
178
|
-
NokogiriEncodingReaderWrapper reader = new NokogiriEncodingReaderWrapper(context, (RubyObject) data);
|
179
|
-
source.setByteStream(reader);
|
180
|
-
// data is EnocodingReader
|
181
|
-
if(reader.detectEncoding()) {
|
182
|
-
detected_encoding = reader.getEncoding();
|
183
|
-
source.setEncoding(detected_encoding.asJavaString());
|
184
|
-
}
|
185
|
-
return true;
|
186
|
-
}
|
187
|
-
return false;
|
188
|
-
}
|
189
|
-
|
190
144
|
protected void setEncoding(String encoding) {
|
191
145
|
source.setEncoding(encoding);
|
192
146
|
}
|
@@ -113,15 +113,13 @@ public abstract class ReaderNode {
|
|
113
113
|
RubyArray array = RubyArray.newArray(ruby);
|
114
114
|
if (attributeList != null && attributeList.length > 0) {
|
115
115
|
if (document == null) {
|
116
|
-
|
117
|
-
document = doc.getDocument();
|
116
|
+
document = XmlDocument.createNewDocument(ruby);
|
118
117
|
}
|
119
118
|
for (int i=0; i<attributeList.length; i++) {
|
120
119
|
if (!isNamespace(attributeList.names.get(i))) {
|
121
120
|
Attr attr = document.createAttributeNS(attributeList.namespaces.get(i), attributeList.names.get(i));
|
122
121
|
attr.setValue(attributeList.values.get(i));
|
123
|
-
XmlAttr xmlAttr =
|
124
|
-
xmlAttr.setNode(ruby.getCurrentContext(), attr);
|
122
|
+
XmlAttr xmlAttr = new XmlAttr(ruby, attr);
|
125
123
|
array.append(xmlAttr);
|
126
124
|
}
|
127
125
|
}
|
@@ -39,11 +39,6 @@ import java.io.IOException;
|
|
39
39
|
import java.util.ArrayList;
|
40
40
|
import java.util.List;
|
41
41
|
|
42
|
-
import nokogiri.NokogiriService;
|
43
|
-
import nokogiri.XmlDocument;
|
44
|
-
import nokogiri.XmlDtd;
|
45
|
-
import nokogiri.XmlSyntaxError;
|
46
|
-
|
47
42
|
import org.apache.xerces.parsers.DOMParser;
|
48
43
|
import org.jruby.Ruby;
|
49
44
|
import org.jruby.RubyArray;
|
@@ -57,6 +52,11 @@ import org.w3c.dom.Node;
|
|
57
52
|
import org.w3c.dom.NodeList;
|
58
53
|
import org.xml.sax.SAXException;
|
59
54
|
|
55
|
+
import nokogiri.NokogiriService;
|
56
|
+
import nokogiri.XmlDocument;
|
57
|
+
import nokogiri.XmlDtd;
|
58
|
+
import nokogiri.XmlSyntaxError;
|
59
|
+
|
60
60
|
/**
|
61
61
|
* Parser class for XML DOM processing. This class actually parses XML document
|
62
62
|
* and creates DOM tree in Java side. However, DOM tree in Ruby side is not since
|
@@ -84,7 +84,6 @@ public class XmlDomParserContext extends ParserContext {
|
|
84
84
|
protected ParserContext.Options options;
|
85
85
|
protected DOMParser parser;
|
86
86
|
protected NokogiriErrorHandler errorHandler;
|
87
|
-
protected String java_encoding;
|
88
87
|
protected IRubyObject ruby_encoding;
|
89
88
|
|
90
89
|
public XmlDomParserContext(Ruby runtime, IRubyObject options) {
|
@@ -94,7 +93,7 @@ public class XmlDomParserContext extends ParserContext {
|
|
94
93
|
public XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
|
95
94
|
super(runtime);
|
96
95
|
this.options = new ParserContext.Options(RubyFixnum.fix2long(options));
|
97
|
-
java_encoding = NokogiriHelpers.getValidEncoding(
|
96
|
+
java_encoding = NokogiriHelpers.getValidEncoding(encoding);
|
98
97
|
ruby_encoding = encoding;
|
99
98
|
initErrorHandler();
|
100
99
|
initParser(runtime);
|
@@ -201,37 +200,25 @@ public class XmlDomParserContext extends ParserContext {
|
|
201
200
|
}
|
202
201
|
}
|
203
202
|
|
204
|
-
private XmlDocument getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass
|
203
|
+
private XmlDocument getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klass) {
|
205
204
|
Document document = parser.getDocument();
|
206
|
-
XmlDocument xmlDocument =
|
207
|
-
if (document != null) {
|
208
|
-
xmlDocument.setDocumentNode(context, document);
|
209
|
-
}
|
205
|
+
XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, document);
|
210
206
|
xmlDocument.setEncoding(ruby_encoding);
|
211
207
|
return xmlDocument;
|
212
208
|
}
|
213
209
|
|
214
|
-
protected XmlDocument getNewEmptyDocument(ThreadContext context) {
|
215
|
-
IRubyObject[] args = new IRubyObject[0];
|
216
|
-
return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::XML::Document"), args);
|
217
|
-
}
|
218
|
-
|
219
210
|
/**
|
220
211
|
* This method is broken out so that HtmlDomParserContext can
|
221
212
|
* override it.
|
222
213
|
*/
|
223
|
-
protected XmlDocument wrapDocument(ThreadContext context,
|
224
|
-
|
225
|
-
Document doc) {
|
226
|
-
XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
|
227
|
-
xmlDocument.setDocumentNode(context, doc);
|
214
|
+
protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document doc) {
|
215
|
+
XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, doc);
|
228
216
|
xmlDocument.setEncoding(ruby_encoding);
|
229
217
|
|
230
218
|
if (options.dtdLoad) {
|
231
|
-
IRubyObject
|
232
|
-
if (!
|
233
|
-
|
234
|
-
doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, xmlDtd, null);
|
219
|
+
IRubyObject dtd = XmlDtd.newFromExternalSubset(context.runtime, doc);
|
220
|
+
if (!dtd.isNil()) {
|
221
|
+
doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, (XmlDtd) dtd, null);
|
235
222
|
}
|
236
223
|
}
|
237
224
|
return xmlDocument;
|
@@ -240,20 +227,18 @@ public class XmlDomParserContext extends ParserContext {
|
|
240
227
|
/**
|
241
228
|
* Must call setInputSource() before this method.
|
242
229
|
*/
|
243
|
-
public XmlDocument parse(ThreadContext context,
|
244
|
-
IRubyObject klazz,
|
245
|
-
IRubyObject url) {
|
230
|
+
public XmlDocument parse(ThreadContext context, RubyClass klass, IRubyObject url) {
|
246
231
|
XmlDocument xmlDoc;
|
247
232
|
try {
|
248
233
|
Document doc = do_parse();
|
249
|
-
xmlDoc = wrapDocument(context,
|
234
|
+
xmlDoc = wrapDocument(context, klass, doc);
|
250
235
|
xmlDoc.setUrl(url);
|
251
236
|
addErrorsIfNecessary(context, xmlDoc);
|
252
237
|
return xmlDoc;
|
253
238
|
} catch (SAXException e) {
|
254
|
-
return getDocumentWithErrorsOrRaiseException(context,
|
239
|
+
return getDocumentWithErrorsOrRaiseException(context, klass, e);
|
255
240
|
} catch (IOException e) {
|
256
|
-
return getDocumentWithErrorsOrRaiseException(context,
|
241
|
+
return getDocumentWithErrorsOrRaiseException(context, klass, e);
|
257
242
|
}
|
258
243
|
}
|
259
244
|
|