nokogiri 1.10.10-java → 1.11.0-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/README.md +165 -91
- data/ext/java/nokogiri/HtmlDocument.java +34 -46
- data/ext/java/nokogiri/HtmlSaxParserContext.java +88 -58
- data/ext/java/nokogiri/HtmlSaxPushParser.java +1 -1
- data/ext/java/nokogiri/NokogiriService.java +1 -1
- data/ext/java/nokogiri/XmlAttr.java +13 -20
- data/ext/java/nokogiri/XmlAttributeDecl.java +11 -12
- data/ext/java/nokogiri/XmlCdata.java +3 -4
- data/ext/java/nokogiri/XmlComment.java +1 -1
- data/ext/java/nokogiri/XmlDocument.java +148 -175
- data/ext/java/nokogiri/XmlDocumentFragment.java +13 -31
- data/ext/java/nokogiri/XmlDtd.java +5 -8
- data/ext/java/nokogiri/XmlElement.java +1 -20
- data/ext/java/nokogiri/XmlElementDecl.java +23 -28
- data/ext/java/nokogiri/XmlEntityDecl.java +23 -27
- data/ext/java/nokogiri/XmlEntityReference.java +2 -2
- data/ext/java/nokogiri/XmlNamespace.java +72 -89
- data/ext/java/nokogiri/XmlNode.java +303 -406
- data/ext/java/nokogiri/XmlNodeSet.java +70 -76
- data/ext/java/nokogiri/XmlReader.java +12 -13
- data/ext/java/nokogiri/XmlRelaxng.java +10 -3
- data/ext/java/nokogiri/XmlSaxParserContext.java +15 -10
- data/ext/java/nokogiri/XmlSchema.java +87 -27
- data/ext/java/nokogiri/XmlSyntaxError.java +2 -6
- data/ext/java/nokogiri/XmlText.java +12 -9
- data/ext/java/nokogiri/XmlXpathContext.java +55 -25
- data/ext/java/nokogiri/XsltStylesheet.java +7 -15
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +52 -46
- data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +71 -135
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +90 -58
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +9 -2
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +67 -10
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +4 -2
- data/ext/java/nokogiri/internals/ParserContext.java +27 -73
- data/ext/java/nokogiri/internals/ReaderNode.java +2 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +18 -33
- data/ext/nokogiri/depend +476 -357
- data/ext/nokogiri/extconf.rb +507 -357
- data/ext/nokogiri/html_document.c +79 -78
- data/ext/nokogiri/html_sax_parser_context.c +2 -2
- data/ext/nokogiri/nokogiri.c +34 -40
- data/ext/nokogiri/xml_document.c +18 -4
- data/ext/nokogiri/xml_io.c +8 -6
- data/ext/nokogiri/xml_node.c +21 -1
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_reader.c +6 -17
- data/ext/nokogiri/xml_relax_ng.c +29 -11
- data/ext/nokogiri/xml_sax_parser.c +2 -7
- data/ext/nokogiri/xml_sax_parser_context.c +2 -2
- data/ext/nokogiri/xml_schema.c +55 -13
- data/ext/nokogiri/xml_xpath_context.c +80 -4
- data/ext/nokogiri/xslt_stylesheet.c +1 -8
- data/lib/nokogiri.rb +22 -22
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +63 -62
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/builder.rb +1 -0
- data/lib/nokogiri/html/document.rb +13 -26
- data/lib/nokogiri/html/document_fragment.rb +1 -0
- data/lib/nokogiri/html/element_description.rb +1 -0
- data/lib/nokogiri/html/element_description_defaults.rb +1 -0
- data/lib/nokogiri/html/entity_lookup.rb +1 -0
- data/lib/nokogiri/html/sax/parser.rb +1 -0
- data/lib/nokogiri/html/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html/sax/push_parser.rb +1 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +3 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +20 -15
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node.rb +587 -249
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +10 -3
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +7 -3
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -0
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xslt.rb +1 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- metadata +86 -159
- data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +0 -107
- data/ext/java/nokogiri/internals/UncloseableInputStream.java +0 -102
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/nokogiri.h +0 -121
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -32,28 +32,29 @@
|
|
32
32
|
|
33
33
|
package nokogiri;
|
34
34
|
|
35
|
-
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
36
|
-
|
37
35
|
import java.io.ByteArrayInputStream;
|
38
36
|
import java.io.InputStream;
|
39
37
|
import java.nio.charset.Charset;
|
40
38
|
import java.nio.charset.IllegalCharsetNameException;
|
41
39
|
import java.nio.charset.UnsupportedCharsetException;
|
42
|
-
import java.util.EnumSet;
|
43
40
|
import java.util.regex.Matcher;
|
44
41
|
import java.util.regex.Pattern;
|
45
42
|
|
46
|
-
import nokogiri.internals.NokogiriHandler;
|
47
|
-
|
48
43
|
import org.apache.xerces.parsers.AbstractSAXParser;
|
49
44
|
import org.cyberneko.html.parsers.SAXParser;
|
50
|
-
import org.jruby
|
45
|
+
import org.jruby.Ruby;
|
46
|
+
import org.jruby.RubyClass;
|
47
|
+
import org.jruby.RubyFixnum;
|
48
|
+
import org.jruby.RubyString;
|
51
49
|
import org.jruby.anno.JRubyClass;
|
52
50
|
import org.jruby.anno.JRubyMethod;
|
53
51
|
import org.jruby.runtime.ThreadContext;
|
54
52
|
import org.jruby.runtime.builtin.IRubyObject;
|
55
53
|
import org.xml.sax.SAXException;
|
56
54
|
|
55
|
+
import nokogiri.internals.NokogiriHandler;
|
56
|
+
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
57
|
+
|
57
58
|
/**
|
58
59
|
* Class for Nokogiri::HTML::SAX::ParserContext.
|
59
60
|
*
|
@@ -65,10 +66,16 @@ import org.xml.sax.SAXException;
|
|
65
66
|
@JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext")
|
66
67
|
public class HtmlSaxParserContext extends XmlSaxParserContext {
|
67
68
|
|
69
|
+
static HtmlSaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) {
|
70
|
+
HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
|
71
|
+
instance.initialize(runtime);
|
72
|
+
return instance;
|
73
|
+
}
|
74
|
+
|
68
75
|
public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) {
|
69
76
|
super(ruby, rubyClass);
|
70
77
|
}
|
71
|
-
|
78
|
+
|
72
79
|
@Override
|
73
80
|
protected AbstractSAXParser createParser() throws SAXException {
|
74
81
|
SAXParser parser = new SAXParser();
|
@@ -78,6 +85,11 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
|
|
78
85
|
"http://cyberneko.org/html/properties/names/elems", "lower");
|
79
86
|
parser.setProperty(
|
80
87
|
"http://cyberneko.org/html/properties/names/attrs", "lower");
|
88
|
+
|
89
|
+
// NekoHTML should not try to guess the encoding based on the meta
|
90
|
+
// tags or other information in the document. This is already
|
91
|
+
// handled by the EncodingReader.
|
92
|
+
parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
|
81
93
|
return parser;
|
82
94
|
} catch(SAXException ex) {
|
83
95
|
throw new SAXException(
|
@@ -90,18 +102,17 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
|
|
90
102
|
IRubyObject klazz,
|
91
103
|
IRubyObject data,
|
92
104
|
IRubyObject encoding) {
|
93
|
-
HtmlSaxParserContext ctx =
|
94
|
-
|
95
|
-
String javaEncoding = findEncoding(context, encoding);
|
105
|
+
HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
|
106
|
+
String javaEncoding = findEncodingName(context, encoding);
|
96
107
|
if (javaEncoding != null) {
|
97
|
-
|
98
|
-
ByteArrayInputStream istream = new ByteArrayInputStream(input.getBytes());
|
108
|
+
CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
|
109
|
+
ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
|
99
110
|
ctx.setInputSource(istream);
|
100
111
|
ctx.getInputSource().setEncoding(javaEncoding);
|
101
112
|
}
|
102
113
|
return ctx;
|
103
114
|
}
|
104
|
-
|
115
|
+
|
105
116
|
public enum EncodingType {
|
106
117
|
NONE(0, "NONE"),
|
107
118
|
UTF_8(1, "UTF-8"),
|
@@ -142,23 +153,38 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
|
|
142
153
|
public String toString() {
|
143
154
|
return name;
|
144
155
|
}
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
156
|
+
|
157
|
+
private static transient EncodingType[] values;
|
158
|
+
|
159
|
+
// NOTE: assuming ordinal == value
|
160
|
+
static EncodingType get(final int ordinal) {
|
161
|
+
EncodingType[] values = EncodingType.values;
|
162
|
+
if (values == null) {
|
163
|
+
values = EncodingType.values();
|
164
|
+
EncodingType.values = values;
|
165
|
+
}
|
166
|
+
if (ordinal >= 0 && ordinal < values.length) {
|
167
|
+
return values[ordinal];
|
168
|
+
}
|
169
|
+
return null;
|
150
170
|
}
|
151
|
-
|
171
|
+
|
152
172
|
}
|
153
|
-
|
154
|
-
private static String
|
173
|
+
|
174
|
+
private static String findEncodingName(final int value) {
|
175
|
+
EncodingType type = EncodingType.get(value);
|
176
|
+
if (type == null) return null;
|
177
|
+
assert type.value == value;
|
178
|
+
return type.name;
|
179
|
+
}
|
180
|
+
|
181
|
+
private static String findEncodingName(ThreadContext context, IRubyObject encoding) {
|
155
182
|
String rubyEncoding = null;
|
156
183
|
if (encoding instanceof RubyString) {
|
157
|
-
rubyEncoding = rubyStringToString(encoding);
|
184
|
+
rubyEncoding = rubyStringToString((RubyString) encoding);
|
158
185
|
}
|
159
186
|
else if (encoding instanceof RubyFixnum) {
|
160
|
-
|
161
|
-
rubyEncoding = findName(value);
|
187
|
+
rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
|
162
188
|
}
|
163
189
|
if (rubyEncoding == null) return null;
|
164
190
|
try {
|
@@ -168,39 +194,56 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
|
|
168
194
|
throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
|
169
195
|
}
|
170
196
|
catch (IllegalCharsetNameException e) {
|
171
|
-
throw context.getRuntime().
|
197
|
+
throw context.getRuntime().newEncodingError(e.getMessage());
|
172
198
|
}
|
173
199
|
}
|
174
200
|
|
175
|
-
private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+");
|
201
|
+
private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+", Pattern.CASE_INSENSITIVE);
|
176
202
|
|
177
|
-
private static
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
if (input.contains("meta") && input.contains("charset")) {
|
182
|
-
Matcher m = CHARSET_PATTERN.matcher(str);
|
203
|
+
private static CharSequence applyEncoding(final String input, final String enc) {
|
204
|
+
int start_pos = 0; int end_pos = 0;
|
205
|
+
if (containsIgnoreCase(input, "charset")) {
|
206
|
+
Matcher m = CHARSET_PATTERN.matcher(input);
|
183
207
|
while (m.find()) {
|
184
208
|
start_pos = m.start();
|
185
209
|
end_pos = m.end();
|
186
210
|
}
|
187
211
|
}
|
188
212
|
if (start_pos != end_pos) {
|
189
|
-
|
190
|
-
input = input.replace(substr, "charset=" + enc);
|
213
|
+
return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
|
191
214
|
}
|
192
215
|
return input;
|
193
216
|
}
|
194
217
|
|
218
|
+
private static boolean containsIgnoreCase(final String str, final String sub) {
|
219
|
+
final int len = sub.length();
|
220
|
+
final int max = str.length() - len;
|
221
|
+
|
222
|
+
if (len == 0) return true;
|
223
|
+
final char c0Lower = Character.toLowerCase(sub.charAt(0));
|
224
|
+
final char c0Upper = Character.toUpperCase(sub.charAt(0));
|
225
|
+
|
226
|
+
for (int i = 0; i <= max; i++) {
|
227
|
+
final char ch = str.charAt(i);
|
228
|
+
if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
|
229
|
+
continue; // first char doesn't match
|
230
|
+
}
|
231
|
+
|
232
|
+
if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
|
233
|
+
return true;
|
234
|
+
}
|
235
|
+
}
|
236
|
+
return false;
|
237
|
+
}
|
238
|
+
|
195
239
|
@JRubyMethod(name="file", meta=true)
|
196
240
|
public static IRubyObject parse_file(ThreadContext context,
|
197
|
-
IRubyObject
|
241
|
+
IRubyObject klass,
|
198
242
|
IRubyObject data,
|
199
243
|
IRubyObject encoding) {
|
200
|
-
HtmlSaxParserContext ctx =
|
201
|
-
ctx.initialize(context.getRuntime());
|
244
|
+
HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
|
202
245
|
ctx.setInputSourceFile(context, data);
|
203
|
-
String javaEncoding =
|
246
|
+
String javaEncoding = findEncodingName(context, encoding);
|
204
247
|
if (javaEncoding != null) {
|
205
248
|
ctx.getInputSource().setEncoding(javaEncoding);
|
206
249
|
}
|
@@ -209,13 +252,12 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
|
|
209
252
|
|
210
253
|
@JRubyMethod(name="io", meta=true)
|
211
254
|
public static IRubyObject parse_io(ThreadContext context,
|
212
|
-
IRubyObject
|
255
|
+
IRubyObject klass,
|
213
256
|
IRubyObject data,
|
214
257
|
IRubyObject encoding) {
|
215
|
-
HtmlSaxParserContext ctx =
|
216
|
-
ctx.
|
217
|
-
|
218
|
-
String javaEncoding = findEncoding(context, encoding);
|
258
|
+
HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
|
259
|
+
ctx.setIOInputSource(context, data, context.nil);
|
260
|
+
String javaEncoding = findEncodingName(context, encoding);
|
219
261
|
if (javaEncoding != null) {
|
220
262
|
ctx.getInputSource().setEncoding(javaEncoding);
|
221
263
|
}
|
@@ -226,27 +268,15 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
|
|
226
268
|
* Create a new parser context that will read from a raw input stream.
|
227
269
|
* Meant to be run in a separate thread by HtmlSaxPushParser.
|
228
270
|
*/
|
229
|
-
static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass
|
230
|
-
HtmlSaxParserContext ctx =
|
231
|
-
ctx.initialize(runtime);
|
271
|
+
static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) {
|
272
|
+
HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
|
232
273
|
ctx.setInputSource(stream);
|
233
274
|
return ctx;
|
234
275
|
}
|
235
276
|
|
236
277
|
@Override
|
237
278
|
protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
|
238
|
-
//
|
239
|
-
// final String docFrag =
|
240
|
-
// "http://cyberneko.org/html/features/balance-tags/document-fragment";
|
241
|
-
// RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
|
242
|
-
// IRubyObject doc = adapter.getInstanceVariable(handlerRuby, "@document");
|
243
|
-
// RubyModule mod = runtime.getClassFromPath(path);
|
244
|
-
// try {
|
245
|
-
// if (doc != null && !doc.isNil() && adapter.isKindOf(doc, mod))
|
246
|
-
// parser.setFeature(docFrag, true);
|
247
|
-
// } catch (Exception e) {
|
248
|
-
// // ignore
|
249
|
-
// }
|
279
|
+
// this function is meant to be empty. It overrides the one in XmlSaxParserContext
|
250
280
|
}
|
251
281
|
|
252
282
|
}
|
@@ -129,7 +129,7 @@ public class HtmlSaxPushParser extends RubyObject {
|
|
129
129
|
final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk);
|
130
130
|
if (data == null) {
|
131
131
|
terminateTask(context.runtime);
|
132
|
-
throw
|
132
|
+
throw XmlSyntaxError.createHTMLSyntaxError(context.runtime).toThrowable(); // Nokogiri::HTML::SyntaxError
|
133
133
|
}
|
134
134
|
|
135
135
|
int errorCount0 = parserTask.getErrorCount();
|
@@ -258,7 +258,7 @@ public class NokogiriService implements BasicLibraryService {
|
|
258
258
|
}
|
259
259
|
};
|
260
260
|
|
261
|
-
|
261
|
+
private static final ObjectAllocator HTML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() {
|
262
262
|
private HtmlSaxParserContext htmlSaxParserContext = null;
|
263
263
|
public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
|
264
264
|
if (htmlSaxParserContext == null) htmlSaxParserContext = new HtmlSaxParserContext(runtime, klazz);
|
@@ -79,22 +79,20 @@ public class XmlAttr extends XmlNode {
|
|
79
79
|
@Override
|
80
80
|
protected void init(ThreadContext context, IRubyObject[] args) {
|
81
81
|
if (args.length < 2) {
|
82
|
-
throw
|
82
|
+
throw context.runtime.newArgumentError(args.length, 2);
|
83
83
|
}
|
84
84
|
|
85
85
|
IRubyObject doc = args[0];
|
86
86
|
IRubyObject content = args[1];
|
87
87
|
|
88
|
-
if(!(doc instanceof XmlDocument)) {
|
89
|
-
|
90
|
-
"document must be an instance of Nokogiri::XML::Document";
|
91
|
-
throw getRuntime().newArgumentError(msg);
|
88
|
+
if (!(doc instanceof XmlDocument)) {
|
89
|
+
throw context.runtime.newArgumentError("document must be an instance of Nokogiri::XML::Document");
|
92
90
|
}
|
93
91
|
|
94
92
|
XmlDocument xmlDoc = (XmlDocument)doc;
|
95
93
|
String str = rubyStringToString(content);
|
96
94
|
Node attr = xmlDoc.getDocument().createAttribute(str);
|
97
|
-
setNode(context, attr);
|
95
|
+
setNode(context.runtime, attr);
|
98
96
|
}
|
99
97
|
|
100
98
|
|
@@ -103,20 +101,10 @@ public class XmlAttr extends XmlNode {
|
|
103
101
|
// the default namespace should be registered for this attribute
|
104
102
|
void setNamespaceIfNecessary(Ruby runtime) {
|
105
103
|
if ("xml".equals(node.getPrefix())) {
|
106
|
-
|
104
|
+
XmlNamespace.createDefaultNamespace(runtime, node);
|
107
105
|
}
|
108
106
|
}
|
109
107
|
|
110
|
-
private boolean isHtmlBooleanAttr() {
|
111
|
-
String name = node.getNodeName().toLowerCase();
|
112
|
-
|
113
|
-
for(String s : HTML_BOOLEAN_ATTRS) {
|
114
|
-
if(s.equals(name)) return true;
|
115
|
-
}
|
116
|
-
|
117
|
-
return false;
|
118
|
-
}
|
119
|
-
|
120
108
|
@Override
|
121
109
|
@JRubyMethod(name = {"content", "value", "to_s"})
|
122
110
|
public IRubyObject content(ThreadContext context) {
|
@@ -140,11 +128,16 @@ public class XmlAttr extends XmlNode {
|
|
140
128
|
@Override
|
141
129
|
protected IRubyObject getNodeName(ThreadContext context) {
|
142
130
|
if (name != null) return name;
|
143
|
-
|
144
|
-
|
131
|
+
|
132
|
+
String attrName = ((Attr) node).getName();
|
133
|
+
if (attrName == null) return context.nil;
|
134
|
+
|
135
|
+
if (node.getNamespaceURI() != null && !(document(context.runtime) instanceof HtmlDocument)) {
|
145
136
|
attrName = NokogiriHelpers.getLocalPart(attrName);
|
137
|
+
if (attrName == null) return context.nil;
|
146
138
|
}
|
147
|
-
|
139
|
+
|
140
|
+
return name = RubyString.newString(context.runtime, attrName);
|
148
141
|
}
|
149
142
|
|
150
143
|
@Override
|
@@ -67,12 +67,11 @@ public class XmlAttributeDecl extends XmlNode {
|
|
67
67
|
super(ruby, klass, attrDeclNode);
|
68
68
|
}
|
69
69
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
return self;
|
70
|
+
static XmlAttributeDecl create(ThreadContext context, Node attrDeclNode) {
|
71
|
+
return new XmlAttributeDecl(context.runtime,
|
72
|
+
getNokogiriClass(context.runtime, "Nokogiri::XML::AttributeDecl"),
|
73
|
+
attrDeclNode
|
74
|
+
);
|
76
75
|
}
|
77
76
|
|
78
77
|
@Override
|
@@ -84,8 +83,7 @@ public class XmlAttributeDecl extends XmlNode {
|
|
84
83
|
@Override
|
85
84
|
@JRubyMethod(name = "node_name=")
|
86
85
|
public IRubyObject node_name_set(ThreadContext context, IRubyObject name) {
|
87
|
-
throw context.
|
88
|
-
.newRuntimeError("cannot change name of DTD decl");
|
86
|
+
throw context.runtime.newRuntimeError("cannot change name of DTD decl");
|
89
87
|
}
|
90
88
|
|
91
89
|
public IRubyObject element_name(ThreadContext context) {
|
@@ -112,19 +110,20 @@ public class XmlAttributeDecl extends XmlNode {
|
|
112
110
|
*/
|
113
111
|
@JRubyMethod
|
114
112
|
public IRubyObject enumeration(ThreadContext context) {
|
115
|
-
|
116
|
-
String atype = ((Element)node).getAttribute("atype");
|
113
|
+
final String atype = ((Element) node).getAttribute("atype");
|
117
114
|
|
118
115
|
if (atype != null && atype.length() != 0 && atype.charAt(0) == '(') {
|
119
116
|
// removed enclosing parens
|
120
117
|
String valueStr = atype.substring(1, atype.length() - 1);
|
121
118
|
String[] values = valueStr.split("\\|");
|
119
|
+
RubyArray enumVals = RubyArray.newArray(context.runtime, values.length);
|
122
120
|
for (int i = 0; i < values.length; i++) {
|
123
|
-
enumVals.append(context.
|
121
|
+
enumVals.append(context.runtime.newString(values[i]));
|
124
122
|
}
|
123
|
+
return enumVals;
|
125
124
|
}
|
126
125
|
|
127
|
-
return
|
126
|
+
return context.runtime.newEmptyArray();
|
128
127
|
}
|
129
128
|
|
130
129
|
}
|
@@ -69,10 +69,9 @@ public class XmlCdata extends XmlText {
|
|
69
69
|
}
|
70
70
|
IRubyObject doc = args[0];
|
71
71
|
content = args[1];
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
setNode(context, node);
|
72
|
+
Document document = ((XmlNode) doc).getOwnerDocument();
|
73
|
+
Node node = document.createCDATASection(rubyStringToString(content));
|
74
|
+
setNode(context.runtime, node);
|
76
75
|
}
|
77
76
|
|
78
77
|
@Override
|
@@ -39,25 +39,28 @@ import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
|
39
39
|
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
40
40
|
import static nokogiri.internals.NokogiriHelpers.stringOrNil;
|
41
41
|
|
42
|
-
import java.io.UnsupportedEncodingException;
|
43
42
|
import java.util.List;
|
44
43
|
|
45
44
|
import javax.xml.parsers.DocumentBuilderFactory;
|
46
45
|
import javax.xml.parsers.ParserConfigurationException;
|
47
46
|
|
47
|
+
import org.jcodings.specific.USASCIIEncoding;
|
48
|
+
import org.jcodings.specific.UTF8Encoding;
|
48
49
|
import org.jruby.Ruby;
|
49
50
|
import org.jruby.RubyArray;
|
50
51
|
import org.jruby.RubyClass;
|
51
52
|
import org.jruby.RubyFixnum;
|
52
|
-
import org.jruby.
|
53
|
+
import org.jruby.RubyString;
|
53
54
|
import org.jruby.anno.JRubyClass;
|
54
55
|
import org.jruby.anno.JRubyMethod;
|
56
|
+
import org.jruby.exceptions.RaiseException;
|
55
57
|
import org.jruby.javasupport.JavaUtil;
|
56
|
-
import org.jruby.runtime.Arity;
|
57
58
|
import org.jruby.runtime.Block;
|
58
59
|
import org.jruby.runtime.Helpers;
|
59
60
|
import org.jruby.runtime.ThreadContext;
|
61
|
+
import org.jruby.runtime.Visibility;
|
60
62
|
import org.jruby.runtime.builtin.IRubyObject;
|
63
|
+
import org.jruby.util.ByteList;
|
61
64
|
import org.w3c.dom.Attr;
|
62
65
|
import org.w3c.dom.Document;
|
63
66
|
import org.w3c.dom.DocumentType;
|
@@ -95,41 +98,43 @@ public class XmlDocument extends XmlNode {
|
|
95
98
|
*/
|
96
99
|
private static final String DOCUMENTBUILDERFACTORY_IMPLE_NAME = "org.apache.xerces.jaxp.DocumentBuilderFactoryImpl";
|
97
100
|
|
101
|
+
private static final ByteList DOCUMENT = ByteList.create("document");
|
102
|
+
static { DOCUMENT.setEncoding(USASCIIEncoding.INSTANCE); }
|
103
|
+
|
98
104
|
private static boolean substituteEntities = false;
|
99
105
|
private static boolean loadExternalSubset = false; // TODO: Verify this.
|
100
106
|
|
101
107
|
/** cache variables */
|
102
|
-
protected IRubyObject encoding
|
103
|
-
protected IRubyObject url
|
108
|
+
protected IRubyObject encoding;
|
109
|
+
protected IRubyObject url;
|
104
110
|
|
105
|
-
public XmlDocument(Ruby
|
106
|
-
super(
|
111
|
+
public XmlDocument(Ruby runtime, RubyClass klazz) {
|
112
|
+
super(runtime, klazz, createNewDocument(runtime));
|
107
113
|
}
|
108
114
|
|
109
|
-
public XmlDocument(Ruby
|
110
|
-
this(
|
115
|
+
public XmlDocument(Ruby runtime, Document document) {
|
116
|
+
this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document);
|
111
117
|
}
|
112
118
|
|
113
|
-
public XmlDocument(Ruby
|
114
|
-
super(
|
115
|
-
|
116
|
-
createAndCacheNamespaces(ruby, document.getDocumentElement());
|
117
|
-
stabilizeTextContent(document);
|
118
|
-
setInstanceVariable("@decorators", ruby.getNil());
|
119
|
+
public XmlDocument(Ruby runtime, RubyClass klass, Document document) {
|
120
|
+
super(runtime, klass, document);
|
121
|
+
init(runtime, document);
|
119
122
|
}
|
120
123
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
Ruby runtime = context.getRuntime();
|
125
|
-
if (node != null) {
|
126
|
-
Document document = (Document)node;
|
127
|
-
stabilizeTextContent(document);
|
124
|
+
void init(Ruby runtime, Document document) {
|
125
|
+
stabilizeTextContent(document);
|
126
|
+
if (document.getDocumentElement() != null) {
|
128
127
|
createAndCacheNamespaces(runtime, document.getDocumentElement());
|
129
128
|
}
|
130
129
|
setInstanceVariable("@decorators", runtime.getNil());
|
131
130
|
}
|
132
131
|
|
132
|
+
public final void setDocumentNode(Ruby runtime, Document node) {
|
133
|
+
super.setNode(runtime, node);
|
134
|
+
if (node != null) init(runtime, node);
|
135
|
+
else setInstanceVariable("@decorators", runtime.getNil());
|
136
|
+
}
|
137
|
+
|
133
138
|
public void setEncoding(IRubyObject encoding) {
|
134
139
|
this.encoding = encoding;
|
135
140
|
}
|
@@ -141,72 +146,64 @@ public class XmlDocument extends XmlNode {
|
|
141
146
|
// not sure, but like attribute values, text value will be lost
|
142
147
|
// unless it is referred once before this document is used.
|
143
148
|
// this seems to happen only when the fragment is parsed from Node#in_context.
|
144
|
-
protected void stabilizeTextContent(Document document) {
|
149
|
+
protected static void stabilizeTextContent(Document document) {
|
145
150
|
if (document.getDocumentElement() != null) document.getDocumentElement().getTextContent();
|
146
151
|
}
|
147
152
|
|
148
|
-
private void createAndCacheNamespaces(Ruby
|
149
|
-
if (node == null) return;
|
153
|
+
private static void createAndCacheNamespaces(Ruby runtime, Node node) {
|
150
154
|
if (node.hasAttributes()) {
|
151
155
|
NamedNodeMap nodeMap = node.getAttributes();
|
152
156
|
for (int i=0; i<nodeMap.getLength(); i++) {
|
153
157
|
Node n = nodeMap.item(i);
|
154
158
|
if (n instanceof Attr) {
|
155
|
-
Attr attr = (Attr)n;
|
156
|
-
|
157
|
-
|
158
|
-
// or lose attribute value
|
159
|
-
String attrValue = attr.getValue(); // don't delete this line
|
160
|
-
if (isNamespace(attrName)) {
|
159
|
+
Attr attr = (Attr) n;
|
160
|
+
stabilizeAttr(attr);
|
161
|
+
if (isNamespace(attr.getName())) {
|
161
162
|
// create and cache
|
162
|
-
XmlNamespace.createFromAttr(
|
163
|
+
XmlNamespace.createFromAttr(runtime, attr);
|
163
164
|
}
|
164
165
|
}
|
165
166
|
}
|
166
167
|
}
|
167
168
|
NodeList children = node.getChildNodes();
|
168
169
|
for (int i=0; i<children.getLength(); i++) {
|
169
|
-
createAndCacheNamespaces(
|
170
|
+
createAndCacheNamespaces(runtime, children.item(i));
|
170
171
|
}
|
171
172
|
}
|
172
173
|
|
174
|
+
static void stabilizeAttr(final Attr attr) {
|
175
|
+
// TODO not sure, but need to get value always before document is referred or lose attribute value
|
176
|
+
attr.getValue(); // don't delete this line
|
177
|
+
}
|
178
|
+
|
173
179
|
// When a document is created from fragment with a context (reference) document,
|
174
180
|
// namespace should be resolved based on the context document.
|
175
181
|
public XmlDocument(Ruby ruby, RubyClass klass, Document document, XmlDocument contextDoc) {
|
176
182
|
super(ruby, klass, document);
|
177
183
|
nsCache = contextDoc.getNamespaceCache();
|
178
|
-
|
179
|
-
|
180
|
-
resolveNamespaceIfNecessary(ruby.getCurrentContext(), document.getDocumentElement(), default_href);
|
184
|
+
String default_href = nsCache.getDefault().getHref();
|
185
|
+
resolveNamespaceIfNecessary(document.getDocumentElement(), default_href);
|
181
186
|
}
|
182
187
|
|
183
|
-
private void resolveNamespaceIfNecessary(
|
188
|
+
private void resolveNamespaceIfNecessary(Node node, String default_href) {
|
184
189
|
if (node == null) return;
|
185
190
|
String nodePrefix = node.getPrefix();
|
186
191
|
if (nodePrefix == null) { // default namespace
|
187
192
|
NokogiriHelpers.renameNode(node, default_href, node.getNodeName());
|
188
193
|
} else {
|
189
|
-
|
190
|
-
String href = rubyStringToString(xmlNamespace.href(context));
|
194
|
+
String href = getNamespaceCache().get(node, nodePrefix).getHref();
|
191
195
|
NokogiriHelpers.renameNode(node, href, node.getNodeName());
|
192
196
|
}
|
193
|
-
resolveNamespaceIfNecessary(
|
197
|
+
resolveNamespaceIfNecessary(node.getNextSibling(), default_href);
|
194
198
|
NodeList children = node.getChildNodes();
|
195
199
|
for (int i=0; i<children.getLength(); i++) {
|
196
|
-
resolveNamespaceIfNecessary(
|
200
|
+
resolveNamespaceIfNecessary(children.item(i), default_href);
|
197
201
|
}
|
198
202
|
}
|
199
203
|
|
200
204
|
public NokogiriNamespaceCache getNamespaceCache() {
|
201
|
-
return nsCache;
|
202
|
-
}
|
203
|
-
|
204
|
-
public void initializeNamespaceCacheIfNecessary() {
|
205
205
|
if (nsCache == null) nsCache = new NokogiriNamespaceCache();
|
206
|
-
|
207
|
-
|
208
|
-
public void setNamespaceCache(NokogiriNamespaceCache nsCache) {
|
209
|
-
this.nsCache = nsCache;
|
206
|
+
return nsCache;
|
210
207
|
}
|
211
208
|
|
212
209
|
public Document getDocument() {
|
@@ -215,7 +212,7 @@ public class XmlDocument extends XmlNode {
|
|
215
212
|
|
216
213
|
@Override
|
217
214
|
protected IRubyObject getNodeName(ThreadContext context) {
|
218
|
-
if (name == null) name =
|
215
|
+
if (name == null) name = RubyString.newStringShared(context.runtime, DOCUMENT);
|
219
216
|
return name;
|
220
217
|
}
|
221
218
|
|
@@ -232,15 +229,31 @@ public class XmlDocument extends XmlNode {
|
|
232
229
|
return getUrl();
|
233
230
|
}
|
234
231
|
|
235
|
-
public static Document createNewDocument() {
|
232
|
+
public static Document createNewDocument(final Ruby runtime) {
|
236
233
|
try {
|
237
|
-
|
238
|
-
return factory.newDocumentBuilder().newDocument();
|
234
|
+
return DocumentBuilderFactoryHolder.INSTANCE.newDocumentBuilder().newDocument();
|
239
235
|
} catch (ParserConfigurationException e) {
|
240
|
-
|
236
|
+
throw asRuntimeError(runtime, null, e);
|
241
237
|
}
|
242
238
|
}
|
243
239
|
|
240
|
+
private static class DocumentBuilderFactoryHolder {
|
241
|
+
static final DocumentBuilderFactory INSTANCE;
|
242
|
+
static {
|
243
|
+
INSTANCE = DocumentBuilderFactory.newInstance(DOCUMENTBUILDERFACTORY_IMPLE_NAME, NokogiriService.class.getClassLoader());
|
244
|
+
}
|
245
|
+
}
|
246
|
+
|
247
|
+
static RaiseException asRuntimeError(Ruby runtime, String message, Exception cause) {
|
248
|
+
if (cause instanceof RaiseException) return (RaiseException) cause;
|
249
|
+
|
250
|
+
if (message == null) message = cause.toString();
|
251
|
+
else message = message + '(' + cause.toString() + ')';
|
252
|
+
RaiseException ex = runtime.newRuntimeError(message);
|
253
|
+
ex.initCause(cause);
|
254
|
+
return ex;
|
255
|
+
}
|
256
|
+
|
244
257
|
/*
|
245
258
|
* call-seq:
|
246
259
|
* new(version = default)
|
@@ -249,19 +262,17 @@ public class XmlDocument extends XmlNode {
|
|
249
262
|
*/
|
250
263
|
@JRubyMethod(name="new", meta = true, rest = true, required=0)
|
251
264
|
public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyObject[] args) {
|
265
|
+
final Ruby runtime = context.runtime;
|
252
266
|
XmlDocument xmlDocument;
|
253
267
|
try {
|
254
|
-
Document docNode = createNewDocument();
|
268
|
+
Document docNode = createNewDocument(runtime);
|
255
269
|
if ("Nokogiri::HTML::Document".equals(((RubyClass)klazz).getName())) {
|
256
|
-
xmlDocument =
|
257
|
-
xmlDocument.setDocumentNode(context, docNode);
|
270
|
+
xmlDocument = new HtmlDocument(context.runtime, (RubyClass) klazz, docNode);
|
258
271
|
} else {
|
259
|
-
|
260
|
-
xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass) klazz);
|
261
|
-
xmlDocument.setDocumentNode(context, docNode);
|
272
|
+
xmlDocument = new XmlDocument(context.runtime, (RubyClass) klazz, docNode);
|
262
273
|
}
|
263
274
|
} catch (Exception ex) {
|
264
|
-
throw
|
275
|
+
throw asRuntimeError(runtime, "couldn't create document: ", ex);
|
265
276
|
}
|
266
277
|
|
267
278
|
Helpers.invoke(context, xmlDocument, "initialize", args);
|
@@ -274,19 +285,19 @@ public class XmlDocument extends XmlNode {
|
|
274
285
|
// FIXME: Entity node should be create by some right way.
|
275
286
|
// this impl passes tests, but entity doesn't exists in DTD, which
|
276
287
|
// would cause validation failure.
|
277
|
-
if (argv.length == 0) throw context.
|
288
|
+
if (argv.length == 0) throw context.runtime.newRuntimeError("Could not create entity");
|
278
289
|
String tagName = rubyStringToString(argv[0]);
|
279
|
-
Node
|
280
|
-
return XmlEntityDecl.create(context,
|
290
|
+
Node node = getOwnerDocument().createElement(tagName);
|
291
|
+
return XmlEntityDecl.create(context, node, argv);
|
281
292
|
}
|
282
293
|
|
283
294
|
@Override
|
284
|
-
|
295
|
+
XmlDocument document(Ruby runtime) {
|
285
296
|
return this;
|
286
297
|
}
|
287
298
|
|
288
299
|
@JRubyMethod(name="encoding=")
|
289
|
-
public IRubyObject encoding_set(
|
300
|
+
public IRubyObject encoding_set(IRubyObject encoding) {
|
290
301
|
this.encoding = encoding;
|
291
302
|
return this;
|
292
303
|
}
|
@@ -294,10 +305,11 @@ public class XmlDocument extends XmlNode {
|
|
294
305
|
@JRubyMethod
|
295
306
|
public IRubyObject encoding(ThreadContext context) {
|
296
307
|
if (this.encoding == null || this.encoding.isNil()) {
|
297
|
-
|
298
|
-
|
308
|
+
final String enc = getDocument().getXmlEncoding();
|
309
|
+
if (enc == null) {
|
310
|
+
this.encoding = context.nil;
|
299
311
|
} else {
|
300
|
-
this.encoding = context.
|
312
|
+
this.encoding = context.runtime.newString(enc);
|
301
313
|
}
|
302
314
|
}
|
303
315
|
|
@@ -307,59 +319,32 @@ public class XmlDocument extends XmlNode {
|
|
307
319
|
@JRubyMethod(meta = true)
|
308
320
|
public static IRubyObject load_external_subsets_set(ThreadContext context, IRubyObject cls, IRubyObject value) {
|
309
321
|
XmlDocument.loadExternalSubset = value.isTrue();
|
310
|
-
return context.
|
311
|
-
}
|
312
|
-
|
313
|
-
/**
|
314
|
-
* TODO: handle encoding?
|
315
|
-
*
|
316
|
-
* @param args[0] a Ruby IO or StringIO
|
317
|
-
* @param args[1] url or nil
|
318
|
-
* @param args[2] encoding
|
319
|
-
* @param args[3] bitset of parser options
|
320
|
-
*/
|
321
|
-
public static IRubyObject newFromData(ThreadContext context,
|
322
|
-
IRubyObject klass,
|
323
|
-
IRubyObject[] args) {
|
324
|
-
Ruby ruby = context.getRuntime();
|
325
|
-
Arity.checkArgumentCount(ruby, args, 4, 4);
|
326
|
-
XmlDomParserContext ctx =
|
327
|
-
new XmlDomParserContext(ruby, args[2], args[3]);
|
328
|
-
ctx.setInputSource(context, args[0], args[1]);
|
329
|
-
return ctx.parse(context, klass, args[1]);
|
330
|
-
}
|
331
|
-
|
332
|
-
@JRubyMethod(meta = true, rest = true)
|
333
|
-
public static IRubyObject read_io(ThreadContext context,
|
334
|
-
IRubyObject klass,
|
335
|
-
IRubyObject[] args) {
|
336
|
-
return newFromData(context, klass, args);
|
322
|
+
return context.nil;
|
337
323
|
}
|
338
324
|
|
339
|
-
@JRubyMethod(meta = true,
|
340
|
-
public static IRubyObject
|
341
|
-
|
342
|
-
|
343
|
-
return
|
325
|
+
@JRubyMethod(meta = true, required = 4)
|
326
|
+
public static IRubyObject read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args) {
|
327
|
+
XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[2], args[3]);
|
328
|
+
ctx.setIOInputSource(context, args[0], args[1]);
|
329
|
+
return ctx.parse(context, (RubyClass) klass, args[1]);
|
344
330
|
}
|
345
331
|
|
346
|
-
|
347
|
-
public static IRubyObject read_memory(ThreadContext context,
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
args);
|
332
|
+
@JRubyMethod(meta = true, required = 4)
|
333
|
+
public static IRubyObject read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args) {
|
334
|
+
XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[2], args[3]);
|
335
|
+
ctx.setStringInputSource(context, args[0], args[1]);
|
336
|
+
return ctx.parse(context, (RubyClass) klass, args[1]);
|
352
337
|
}
|
353
338
|
|
354
339
|
@JRubyMethod(name="remove_namespaces!")
|
355
340
|
public IRubyObject remove_namespaces(ThreadContext context) {
|
356
|
-
|
357
|
-
nsCache.clear();
|
341
|
+
removeNamespaceRecursively(this);
|
342
|
+
if (nsCache != null) nsCache.clear();
|
358
343
|
clearXpathContext(getNode());
|
359
344
|
return this;
|
360
345
|
}
|
361
346
|
|
362
|
-
private void
|
347
|
+
private void removeNamespaceRecursively(XmlNode xmlNode) {
|
363
348
|
Node node = xmlNode.node;
|
364
349
|
if (node.getNodeType() == Node.ELEMENT_NODE) {
|
365
350
|
node.setPrefix(null);
|
@@ -368,33 +353,29 @@ public class XmlDocument extends XmlNode {
|
|
368
353
|
for (int i=0; i<attrs.getLength(); i++) {
|
369
354
|
Attr attr = (Attr) attrs.item(i);
|
370
355
|
if (isNamespace(attr.getNodeName())) {
|
371
|
-
((org.w3c.dom.Element)node).removeAttributeNode(attr);
|
356
|
+
((org.w3c.dom.Element) node).removeAttributeNode(attr);
|
372
357
|
} else {
|
373
358
|
attr.setPrefix(null);
|
374
359
|
NokogiriHelpers.renameNode(attr, null, attr.getLocalName());
|
375
360
|
}
|
376
361
|
}
|
377
362
|
}
|
378
|
-
|
379
|
-
for (
|
380
|
-
XmlNode childNode = (XmlNode)
|
381
|
-
|
363
|
+
IRubyObject[] nodes = xmlNode.getChildren();
|
364
|
+
for (int i=0; i < nodes.length; i++) {
|
365
|
+
XmlNode childNode = (XmlNode) nodes[i];
|
366
|
+
removeNamespaceRecursively(childNode);
|
382
367
|
}
|
383
368
|
}
|
384
369
|
|
385
370
|
@JRubyMethod
|
386
371
|
public IRubyObject root(ThreadContext context) {
|
387
372
|
Node rootNode = getDocument().getDocumentElement();
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
if (rootNode == null)
|
395
|
-
return context.getRuntime().getNil();
|
396
|
-
else
|
397
|
-
return getCachedNodeOrCreate(context.getRuntime(), rootNode);
|
373
|
+
if (rootNode == null) return context.nil;
|
374
|
+
|
375
|
+
Object invalid = rootNode.getUserData(NokogiriHelpers.ROOT_NODE_INVALID);
|
376
|
+
if (invalid != null && ((Boolean) invalid)) return context.nil;
|
377
|
+
|
378
|
+
return getCachedNodeOrCreate(context.runtime, rootNode);
|
398
379
|
}
|
399
380
|
|
400
381
|
protected IRubyObject dup_implementation(Ruby runtime, boolean deep) {
|
@@ -412,17 +393,17 @@ public class XmlDocument extends XmlNode {
|
|
412
393
|
}
|
413
394
|
|
414
395
|
@JRubyMethod(name="root=")
|
415
|
-
public IRubyObject root_set(ThreadContext context, IRubyObject
|
396
|
+
public IRubyObject root_set(ThreadContext context, IRubyObject new_root) {
|
416
397
|
// in case of document fragment, temporary root node should be deleted.
|
417
398
|
|
418
399
|
// Java can't have a root whose value is null. Instead of setting null,
|
419
400
|
// the method sets user data so that other methods are able to know the root
|
420
401
|
// should be nil.
|
421
|
-
if (
|
422
|
-
getDocument().getDocumentElement().setUserData(NokogiriHelpers.
|
423
|
-
return
|
402
|
+
if (new_root == context.nil) {
|
403
|
+
getDocument().getDocumentElement().setUserData(NokogiriHelpers.ROOT_NODE_INVALID, Boolean.TRUE, null);
|
404
|
+
return new_root;
|
424
405
|
}
|
425
|
-
XmlNode newRoot = asXmlNode(context,
|
406
|
+
XmlNode newRoot = asXmlNode(context, new_root);
|
426
407
|
|
427
408
|
IRubyObject root = root(context);
|
428
409
|
if (root.isNil()) {
|
@@ -434,10 +415,10 @@ public class XmlDocument extends XmlNode {
|
|
434
415
|
// with different owner document.
|
435
416
|
newRootNode = getDocument().importNode(newRoot.node, true);
|
436
417
|
}
|
437
|
-
add_child_node(context, getCachedNodeOrCreate(context.
|
418
|
+
add_child_node(context, getCachedNodeOrCreate(context.runtime, newRootNode));
|
438
419
|
} else {
|
439
420
|
Node rootNode = asXmlNode(context, root).node;
|
440
|
-
((XmlNode)getCachedNodeOrCreate(context.
|
421
|
+
((XmlNode) getCachedNodeOrCreate(context.runtime, rootNode)).replace_node(context, newRoot);
|
441
422
|
}
|
442
423
|
|
443
424
|
return newRoot;
|
@@ -445,13 +426,13 @@ public class XmlDocument extends XmlNode {
|
|
445
426
|
|
446
427
|
@JRubyMethod
|
447
428
|
public IRubyObject version(ThreadContext context) {
|
448
|
-
return stringOrNil(context.
|
429
|
+
return stringOrNil(context.runtime, getDocument().getXmlVersion());
|
449
430
|
}
|
450
431
|
|
451
432
|
@JRubyMethod(meta = true)
|
452
433
|
public static IRubyObject substitute_entities_set(ThreadContext context, IRubyObject cls, IRubyObject value) {
|
453
434
|
XmlDocument.substituteEntities = value.isTrue();
|
454
|
-
return context.
|
435
|
+
return context.nil;
|
455
436
|
}
|
456
437
|
|
457
438
|
public IRubyObject getInternalSubset(ThreadContext context) {
|
@@ -460,27 +441,23 @@ public class XmlDocument extends XmlNode {
|
|
460
441
|
if (dtd == null) {
|
461
442
|
Document document = getDocument();
|
462
443
|
if (document.getUserData(XmlDocument.DTD_RAW_DOCUMENT) != null) {
|
463
|
-
dtd = XmlDtd.newFromInternalSubset(context.
|
444
|
+
dtd = XmlDtd.newFromInternalSubset(context.runtime, document);
|
464
445
|
} else if (document.getDoctype() != null) {
|
465
446
|
DocumentType docType = document.getDoctype();
|
466
447
|
IRubyObject name, publicId, systemId;
|
467
|
-
name = publicId = systemId = context.
|
448
|
+
name = publicId = systemId = context.nil;
|
468
449
|
if (docType.getName() != null) {
|
469
|
-
name = context.
|
450
|
+
name = context.runtime.newString(docType.getName());
|
470
451
|
}
|
471
452
|
if (docType.getPublicId() != null) {
|
472
|
-
publicId = context.
|
453
|
+
publicId = context.runtime.newString(docType.getPublicId());
|
473
454
|
}
|
474
455
|
if (docType.getSystemId() != null) {
|
475
|
-
systemId = context.
|
456
|
+
systemId = context.runtime.newString(docType.getSystemId());
|
476
457
|
}
|
477
|
-
dtd = XmlDtd.newEmpty(context.
|
478
|
-
document,
|
479
|
-
name,
|
480
|
-
publicId,
|
481
|
-
systemId);
|
458
|
+
dtd = XmlDtd.newEmpty(context.runtime, document, name, publicId, systemId);
|
482
459
|
} else {
|
483
|
-
dtd = context.
|
460
|
+
dtd = context.nil;
|
484
461
|
}
|
485
462
|
|
486
463
|
setInternalSubset(dtd);
|
@@ -497,9 +474,7 @@ public class XmlDocument extends XmlNode {
|
|
497
474
|
IRubyObject name,
|
498
475
|
IRubyObject external_id,
|
499
476
|
IRubyObject system_id) {
|
500
|
-
XmlDtd dtd = XmlDtd.newEmpty(context.
|
501
|
-
this.getDocument(),
|
502
|
-
name, external_id, system_id);
|
477
|
+
XmlDtd dtd = XmlDtd.newEmpty(context.runtime, getDocument(), name, external_id, system_id);
|
503
478
|
setInternalSubset(dtd);
|
504
479
|
return dtd;
|
505
480
|
}
|
@@ -511,7 +486,7 @@ public class XmlDocument extends XmlNode {
|
|
511
486
|
public IRubyObject getExternalSubset(ThreadContext context) {
|
512
487
|
IRubyObject dtd = (IRubyObject) node.getUserData(DTD_EXTERNAL_SUBSET);
|
513
488
|
|
514
|
-
if (dtd == null) return context.
|
489
|
+
if (dtd == null) return context.nil;
|
515
490
|
return dtd;
|
516
491
|
}
|
517
492
|
|
@@ -523,9 +498,7 @@ public class XmlDocument extends XmlNode {
|
|
523
498
|
IRubyObject name,
|
524
499
|
IRubyObject external_id,
|
525
500
|
IRubyObject system_id) {
|
526
|
-
XmlDtd dtd = XmlDtd.newEmpty(context.
|
527
|
-
this.getDocument(),
|
528
|
-
name, external_id, system_id);
|
501
|
+
XmlDtd dtd = XmlDtd.newEmpty(context.runtime, getDocument(), name, external_id, system_id);
|
529
502
|
setExternalSubset(dtd);
|
530
503
|
return dtd;
|
531
504
|
}
|
@@ -543,35 +516,40 @@ public class XmlDocument extends XmlNode {
|
|
543
516
|
Node child = children.item(i);
|
544
517
|
short type = child.getNodeType();
|
545
518
|
if (type == Node.COMMENT_NODE) {
|
546
|
-
XmlComment xmlComment = (XmlComment) getCachedNodeOrCreate(context.
|
519
|
+
XmlComment xmlComment = (XmlComment) getCachedNodeOrCreate(context.runtime, child);
|
547
520
|
xmlComment.accept(context, visitor);
|
548
521
|
} else if (type == Node.DOCUMENT_TYPE_NODE) {
|
549
|
-
XmlDtd xmlDtd = (XmlDtd) getCachedNodeOrCreate(context.
|
522
|
+
XmlDtd xmlDtd = (XmlDtd) getCachedNodeOrCreate(context.runtime, child);
|
550
523
|
xmlDtd.accept(context, visitor);
|
551
524
|
} else if (type == Node.PROCESSING_INSTRUCTION_NODE) {
|
552
|
-
XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) getCachedNodeOrCreate(context.
|
525
|
+
XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) getCachedNodeOrCreate(context.runtime, child);
|
553
526
|
xmlProcessingInstruction.accept(context, visitor);
|
554
527
|
} else if (type == Node.TEXT_NODE) {
|
555
|
-
XmlText xmlText = (XmlText) getCachedNodeOrCreate(context.
|
528
|
+
XmlText xmlText = (XmlText) getCachedNodeOrCreate(context.runtime, child);
|
556
529
|
xmlText.accept(context, visitor);
|
557
530
|
} else if (type == Node.ELEMENT_NODE) {
|
558
|
-
XmlElement xmlElement = (XmlElement) getCachedNodeOrCreate(context.
|
531
|
+
XmlElement xmlElement = (XmlElement) getCachedNodeOrCreate(context.runtime, child);
|
559
532
|
xmlElement.accept(context, visitor);
|
560
533
|
}
|
561
534
|
}
|
562
535
|
visitor.leave(document);
|
563
536
|
}
|
564
537
|
|
565
|
-
@JRubyMethod(meta=true)
|
566
|
-
public static IRubyObject
|
567
|
-
XmlDocument xmlDocument =
|
538
|
+
@JRubyMethod(meta = true)
|
539
|
+
public static IRubyObject wrap(ThreadContext context, IRubyObject klass, IRubyObject arg) {
|
540
|
+
XmlDocument xmlDocument = new XmlDocument(context.runtime, (RubyClass) klass, (Document) arg.toJava(Document.class));
|
568
541
|
Helpers.invoke(context, xmlDocument, "initialize");
|
569
|
-
Document document = (Document)arg.toJava(Document.class);
|
570
|
-
xmlDocument.setDocumentNode(context, document);
|
571
542
|
return xmlDocument;
|
572
543
|
}
|
573
544
|
|
574
|
-
@
|
545
|
+
@Deprecated
|
546
|
+
@JRubyMethod(meta = true, visibility = Visibility.PRIVATE)
|
547
|
+
public static IRubyObject wrapJavaDocument(ThreadContext context, IRubyObject klass, IRubyObject arg) {
|
548
|
+
return wrap(context, klass, arg);
|
549
|
+
}
|
550
|
+
|
551
|
+
@Deprecated // default to_java works (due inherited from XmlNode#toJava)
|
552
|
+
@JRubyMethod(visibility = Visibility.PRIVATE)
|
575
553
|
public IRubyObject toJavaDocument(ThreadContext context) {
|
576
554
|
return JavaUtil.convertJavaToUsableRubyObject(context.getRuntime(), node);
|
577
555
|
}
|
@@ -596,11 +574,11 @@ public class XmlDocument extends XmlNode {
|
|
596
574
|
}
|
597
575
|
if (args.length > 1 ) {
|
598
576
|
if (!args[1].isNil() && !(args[1] instanceof List)) {
|
599
|
-
throw context.
|
577
|
+
throw context.runtime.newTypeError("Expected array");
|
600
578
|
}
|
601
579
|
if (!args[1].isNil()) {
|
602
580
|
inclusive_namespace = ((RubyArray)args[1])
|
603
|
-
.join(context, context.
|
581
|
+
.join(context, context.runtime.newString(" "))
|
604
582
|
.asString()
|
605
583
|
.asJavaString(); // OMG I wish I knew JRuby better, this is ugly
|
606
584
|
}
|
@@ -632,29 +610,24 @@ public class XmlDocument extends XmlNode {
|
|
632
610
|
} else {
|
633
611
|
result = canonicalizer.canonicalizeSubtree(startingNode.getNode(), inclusive_namespace, filter);
|
634
612
|
}
|
635
|
-
|
636
|
-
return stringOrNil(context.getRuntime(), resultString);
|
613
|
+
return RubyString.newString(context.runtime, new ByteList(result, UTF8Encoding.INSTANCE));
|
637
614
|
} catch (CanonicalizationException e) {
|
638
615
|
// TODO Auto-generated catch block
|
639
616
|
e.printStackTrace();
|
640
|
-
} catch (UnsupportedEncodingException e) {
|
641
|
-
// TODO Auto-generated catch block
|
642
|
-
e.printStackTrace();
|
643
617
|
}
|
644
|
-
return context.
|
618
|
+
return context.nil;
|
645
619
|
}
|
646
620
|
|
647
621
|
private XmlNode getStartingNode(Block block) {
|
648
622
|
if (block.isGiven()) {
|
649
|
-
|
650
|
-
|
651
|
-
}
|
623
|
+
IRubyObject boundSelf = block.getBinding().getSelf();
|
624
|
+
if (boundSelf instanceof XmlNode) return (XmlNode) boundSelf;
|
652
625
|
}
|
653
626
|
return this;
|
654
627
|
}
|
655
628
|
|
656
629
|
public void resetNamespaceCache(ThreadContext context) {
|
657
630
|
nsCache = new NokogiriNamespaceCache();
|
658
|
-
createAndCacheNamespaces(context.
|
631
|
+
createAndCacheNamespaces(context.runtime, node);
|
659
632
|
}
|
660
633
|
}
|