nokogiri 1.5.0-java → 1.5.1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +56 -12
- data/CHANGELOG.rdoc +45 -0
- data/C_CODING_STYLE.rdoc +27 -0
- data/Manifest.txt +4 -0
- data/README.rdoc +11 -7
- data/Rakefile +44 -26
- data/bin/nokogiri +10 -2
- data/ext/java/nokogiri/HtmlDocument.java +37 -2
- data/ext/java/nokogiri/NokogiriService.java +10 -2
- data/ext/java/nokogiri/XmlAttr.java +1 -1
- data/ext/java/nokogiri/XmlDocument.java +68 -11
- data/ext/java/nokogiri/XmlDocumentFragment.java +16 -5
- data/ext/java/nokogiri/XmlElement.java +0 -40
- data/ext/java/nokogiri/XmlNamespace.java +8 -1
- data/ext/java/nokogiri/XmlNode.java +131 -27
- data/ext/java/nokogiri/XmlNodeSet.java +4 -1
- data/ext/java/nokogiri/XmlSaxParserContext.java +2 -13
- data/ext/java/nokogiri/XmlXpathContext.java +4 -1
- data/ext/java/nokogiri/XsltStylesheet.java +198 -37
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +40 -2
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +82 -9
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +4 -3
- data/ext/java/nokogiri/internals/ParserContext.java +33 -3
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +203 -12
- data/ext/java/nokogiri/internals/XmlDomParser.java +33 -2
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +32 -12
- data/ext/nokogiri/extconf.rb +11 -3
- data/ext/nokogiri/html_document.c +16 -0
- data/ext/nokogiri/html_sax_parser_context.c +59 -37
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +6 -8
- data/ext/nokogiri/nokogiri.h +3 -0
- data/ext/nokogiri/xml_document.c +101 -3
- data/ext/nokogiri/xml_document.h +3 -3
- data/ext/nokogiri/xml_node.c +150 -58
- data/ext/nokogiri/xml_node_set.c +169 -120
- data/ext/nokogiri/xml_node_set.h +5 -0
- data/ext/nokogiri/xml_sax_parser_context.c +64 -41
- data/ext/nokogiri/xml_text.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +30 -24
- data/ext/nokogiri/xslt_stylesheet.c +62 -16
- data/ext/nokogiri/xslt_stylesheet.h +5 -0
- data/lib/nokogiri/css/parser.rb +163 -157
- data/lib/nokogiri/css/parser.y +6 -3
- data/lib/nokogiri/css/tokenizer.rb +1 -1
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/document.rb +82 -42
- data/lib/nokogiri/html/sax/push_parser.rb +16 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +6 -0
- data/lib/nokogiri/xml/builder.rb +7 -1
- data/lib/nokogiri/xml/document.rb +32 -17
- data/lib/nokogiri/xml/document_fragment.rb +6 -1
- data/lib/nokogiri/xml/node.rb +40 -9
- data/lib/nokogiri/xslt.rb +5 -1
- data/tasks/cross_compile.rb +1 -0
- data/tasks/nokogiri.org.rb +6 -0
- data/tasks/test.rb +1 -0
- data/test/css/test_xpath_visitor.rb +6 -0
- data/test/helper.rb +1 -0
- data/test/html/test_document.rb +26 -0
- data/test/html/test_document_fragment.rb +1 -2
- data/test/test_memory_leak.rb +81 -1
- data/test/test_xslt_transforms.rb +152 -123
- data/test/xml/test_builder.rb +24 -2
- data/test/xml/test_c14n.rb +151 -0
- data/test/xml/test_document.rb +48 -0
- data/test/xml/test_namespace.rb +5 -0
- data/test/xml/test_node.rb +82 -1
- data/test/xml/test_node_attributes.rb +19 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +32 -0
- data/test/xml/test_node_set.rb +16 -8
- data/test/xml/test_reader_encoding.rb +16 -0
- data/test/xml/test_unparented_node.rb +32 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +22 -0
- metadata +147 -123
@@ -1,7 +1,7 @@
|
|
1
1
|
/**
|
2
2
|
* (The MIT License)
|
3
3
|
*
|
4
|
-
* Copyright (c) 2008 -
|
4
|
+
* Copyright (c) 2008 - 2012:
|
5
5
|
*
|
6
6
|
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
7
|
* * {Mike Dalessio}[http://mike.daless.io]
|
@@ -132,6 +132,9 @@ public class XmlNodeSet extends RubyObject implements NodeList {
|
|
132
132
|
@JRubyMethod
|
133
133
|
public IRubyObject delete(ThreadContext context, IRubyObject node_or_namespace){
|
134
134
|
if (nodes == null) return context.getRuntime().getNil();
|
135
|
+
if (node_or_namespace instanceof XmlNamespace) {
|
136
|
+
((XmlNamespace)node_or_namespace).deleteHref();
|
137
|
+
}
|
135
138
|
return nodes.delete(context, asXmlNodeOrNamespace(context, node_or_namespace), Block.NULL_BLOCK);
|
136
139
|
}
|
137
140
|
|
@@ -32,7 +32,7 @@
|
|
32
32
|
|
33
33
|
package nokogiri;
|
34
34
|
|
35
|
-
import static nokogiri.internals.NokogiriHelpers.
|
35
|
+
import static nokogiri.internals.NokogiriHelpers.isWhitespaceText;
|
36
36
|
import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
|
37
37
|
|
38
38
|
import java.io.IOException;
|
@@ -248,7 +248,7 @@ public class XmlSaxParserContext extends ParserContext {
|
|
248
248
|
// doesn't distinguish between empty and bad whereas
|
249
249
|
// Nokogiri does.
|
250
250
|
String message = spe.getMessage();
|
251
|
-
if ("Premature end of file.".matches(message)) {
|
251
|
+
if ("Premature end of file.".matches(message) && stringDataSize < 1) {
|
252
252
|
throw ruby.newRuntimeError(
|
253
253
|
"couldn't parse document: " + message);
|
254
254
|
} else {
|
@@ -340,17 +340,6 @@ public class XmlSaxParserContext extends ParserContext {
|
|
340
340
|
((XmlNode) doc).normalize();
|
341
341
|
}
|
342
342
|
|
343
|
-
protected boolean isWhitespaceText(ThreadContext context, IRubyObject obj) {
|
344
|
-
if (obj == null || obj.isNil()) return false;
|
345
|
-
|
346
|
-
XmlNode node = (XmlNode) obj;
|
347
|
-
if (!(node instanceof XmlText))
|
348
|
-
return false;
|
349
|
-
|
350
|
-
String content = rubyStringToString(node.content(context));
|
351
|
-
return content.trim().length() == 0;
|
352
|
-
}
|
353
|
-
|
354
343
|
@JRubyMethod(name="column")
|
355
344
|
public IRubyObject column(ThreadContext context) {
|
356
345
|
Integer number = handler.getColumn();
|
@@ -52,6 +52,7 @@ import org.jruby.Ruby;
|
|
52
52
|
import org.jruby.RubyBoolean;
|
53
53
|
import org.jruby.RubyClass;
|
54
54
|
import org.jruby.RubyException;
|
55
|
+
import org.jruby.RubyFloat;
|
55
56
|
import org.jruby.RubyNumeric;
|
56
57
|
import org.jruby.RubyObject;
|
57
58
|
import org.jruby.RubyString;
|
@@ -150,7 +151,9 @@ public class XmlXpathContext extends RubyObject {
|
|
150
151
|
private IRubyObject tryGetOpaqueValue(XPathExpression xpathExpression) throws XPathExpressionException {
|
151
152
|
String string = (String)xpathExpression.evaluate(context.node, XPathConstants.STRING);
|
152
153
|
Double value = null;
|
153
|
-
if ((value = getDoubleValue(string)) != null)
|
154
|
+
if ((value = getDoubleValue(string)) != null) {
|
155
|
+
return new RubyFloat(getRuntime(), value);
|
156
|
+
}
|
154
157
|
if (doesMatch(boolean_pattern, string.toLowerCase())) return RubyBoolean.newBoolean(getRuntime(), Boolean.parseBoolean(string));
|
155
158
|
return RubyString.newString(getRuntime(), string);
|
156
159
|
}
|
@@ -33,13 +33,19 @@
|
|
33
33
|
package nokogiri;
|
34
34
|
|
35
35
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
36
|
+
import static nokogiri.internals.NokogiriHelpers.stringOrBlank;
|
36
37
|
|
38
|
+
import java.io.IOException;
|
39
|
+
import java.io.PipedReader;
|
40
|
+
import java.io.PipedWriter;
|
41
|
+
import java.io.StringReader;
|
42
|
+
import java.nio.CharBuffer;
|
37
43
|
import java.util.HashMap;
|
38
44
|
import java.util.Map;
|
45
|
+
import java.util.Set;
|
39
46
|
import java.util.regex.Matcher;
|
40
47
|
import java.util.regex.Pattern;
|
41
48
|
|
42
|
-
import javax.xml.transform.ErrorListener;
|
43
49
|
import javax.xml.transform.Templates;
|
44
50
|
import javax.xml.transform.Transformer;
|
45
51
|
import javax.xml.transform.TransformerConfigurationException;
|
@@ -47,13 +53,17 @@ import javax.xml.transform.TransformerException;
|
|
47
53
|
import javax.xml.transform.TransformerFactory;
|
48
54
|
import javax.xml.transform.dom.DOMResult;
|
49
55
|
import javax.xml.transform.dom.DOMSource;
|
56
|
+
import javax.xml.transform.stream.StreamResult;
|
57
|
+
import javax.xml.transform.stream.StreamSource;
|
50
58
|
|
51
59
|
import nokogiri.internals.NokogiriXsltErrorListener;
|
52
60
|
|
53
61
|
import org.jruby.Ruby;
|
54
62
|
import org.jruby.RubyArray;
|
55
63
|
import org.jruby.RubyClass;
|
64
|
+
import org.jruby.RubyHash;
|
56
65
|
import org.jruby.RubyObject;
|
66
|
+
import org.jruby.RubyString;
|
57
67
|
import org.jruby.anno.JRubyClass;
|
58
68
|
import org.jruby.anno.JRubyMethod;
|
59
69
|
import org.jruby.javasupport.util.RuntimeHelpers;
|
@@ -70,8 +80,10 @@ import org.w3c.dom.Document;
|
|
70
80
|
@JRubyClass(name="Nokogiri::XSLT::Stylesheet")
|
71
81
|
public class XsltStylesheet extends RubyObject {
|
72
82
|
private static Map<String, Object> registry = new HashMap<String, Object>();
|
73
|
-
private
|
74
|
-
private Templates sheet;
|
83
|
+
private TransformerFactory factory = null;
|
84
|
+
private Templates sheet = null;
|
85
|
+
private IRubyObject stylesheet = null;
|
86
|
+
private boolean htmlish = false;
|
75
87
|
|
76
88
|
public static Map<String, Object> getRegistry() {
|
77
89
|
return registry;
|
@@ -80,17 +92,45 @@ public class XsltStylesheet extends RubyObject {
|
|
80
92
|
public XsltStylesheet(Ruby ruby, RubyClass rubyClass) {
|
81
93
|
super(ruby, rubyClass);
|
82
94
|
}
|
95
|
+
|
96
|
+
/**
|
97
|
+
* Create and return a copy of this object.
|
98
|
+
*
|
99
|
+
* @return a clone of this object
|
100
|
+
*/
|
101
|
+
@Override
|
102
|
+
public Object clone() throws CloneNotSupportedException {
|
103
|
+
return super.clone();
|
104
|
+
}
|
83
105
|
|
84
106
|
private void addParametersToTransformer(ThreadContext context, Transformer transf, IRubyObject parameters) {
|
85
|
-
Ruby
|
86
|
-
|
107
|
+
Ruby runtime = context.getRuntime();
|
108
|
+
|
109
|
+
if (parameters instanceof RubyHash) {
|
110
|
+
setHashParameters(transf, (RubyHash)parameters);
|
111
|
+
} else if (parameters instanceof RubyArray) {
|
112
|
+
setArrayParameters(transf, runtime, (RubyArray)parameters);
|
113
|
+
} else {
|
114
|
+
throw runtime.newTypeError("parameters should be given either Array or Hash");
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
private void setHashParameters(Transformer transformer, RubyHash hash) {
|
119
|
+
Set<String> keys = hash.keySet();
|
120
|
+
for (String key : keys) {
|
121
|
+
String value = (String)hash.get(key);
|
122
|
+
transformer.setParameter(key, unparseValue(value));
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
private void setArrayParameters(Transformer transformer, Ruby runtime, RubyArray params) {
|
87
127
|
int limit = params.getLength();
|
88
128
|
if(limit % 2 == 1) limit--;
|
89
129
|
|
90
130
|
for(int i = 0; i < limit; i+=2) {
|
91
|
-
String name = params.aref(
|
92
|
-
String value = params.aref(
|
93
|
-
|
131
|
+
String name = params.aref(runtime.newFixnum(i)).asJavaString();
|
132
|
+
String value = params.aref(runtime.newFixnum(i+1)).asJavaString();
|
133
|
+
transformer.setParameter(name, unparseValue(value));
|
94
134
|
}
|
95
135
|
}
|
96
136
|
|
@@ -105,36 +145,51 @@ public class XsltStylesheet extends RubyObject {
|
|
105
145
|
return orig;
|
106
146
|
}
|
107
147
|
|
108
|
-
@JRubyMethod(meta = true)
|
109
|
-
public static IRubyObject parse_stylesheet_doc(ThreadContext context, IRubyObject
|
148
|
+
@JRubyMethod(meta = true, rest = true)
|
149
|
+
public static IRubyObject parse_stylesheet_doc(ThreadContext context, IRubyObject klazz, IRubyObject[] args) {
|
110
150
|
|
111
|
-
Ruby
|
112
|
-
|
113
|
-
if(!(document instanceof XmlDocument)) {
|
114
|
-
throw ruby.newArgumentError("doc must be a Nokogiri::XML::Document instance");
|
115
|
-
}
|
116
|
-
|
117
|
-
XmlDocument xmlDoc = (XmlDocument) document;
|
151
|
+
Ruby runtime = context.getRuntime();
|
118
152
|
|
119
|
-
|
153
|
+
ensureFirstArgIsDocument(runtime, args[0]);
|
120
154
|
|
121
|
-
|
122
|
-
|
123
|
-
}
|
155
|
+
XmlDocument xmlDoc = (XmlDocument) args[0];
|
156
|
+
ensureDocumentHasNoError(context, xmlDoc);
|
124
157
|
|
125
158
|
Document doc = ((XmlDocument) xmlDoc.dup_implementation(context, true)).getDocument();
|
126
159
|
|
127
|
-
XsltStylesheet xslt =
|
160
|
+
XsltStylesheet xslt =
|
161
|
+
(XsltStylesheet) NokogiriService.XSLT_STYLESHEET_ALLOCATOR.allocate(runtime, (RubyClass)klazz);
|
128
162
|
|
129
163
|
try {
|
130
|
-
|
131
|
-
xslt.sheet = factory.newTemplates(new DOMSource(doc));
|
164
|
+
xslt.init(args[1], doc);
|
132
165
|
} catch (TransformerConfigurationException ex) {
|
133
|
-
|
166
|
+
runtime.newRuntimeError("could not parse xslt stylesheet");
|
134
167
|
}
|
135
168
|
|
136
169
|
return xslt;
|
137
170
|
}
|
171
|
+
|
172
|
+
private void init(IRubyObject stylesheet, Document document) throws TransformerConfigurationException {
|
173
|
+
this.stylesheet = stylesheet; // either RubyString or RubyFile
|
174
|
+
if (factory == null) factory = TransformerFactory.newInstance();
|
175
|
+
sheet = factory.newTemplates(new DOMSource(document));
|
176
|
+
}
|
177
|
+
|
178
|
+
private static void ensureFirstArgIsDocument(Ruby runtime, IRubyObject arg) {
|
179
|
+
if (arg instanceof XmlDocument) {
|
180
|
+
return;
|
181
|
+
} else {
|
182
|
+
throw runtime.newArgumentError("doc must be a Nokogiri::XML::Document instance");
|
183
|
+
}
|
184
|
+
}
|
185
|
+
|
186
|
+
private static void ensureDocumentHasNoError(ThreadContext context, XmlDocument xmlDoc) {
|
187
|
+
Ruby runtime = context.getRuntime();
|
188
|
+
RubyArray errors_of_xmlDoc = (RubyArray) xmlDoc.getInstanceVariable("@errors");
|
189
|
+
if (!errors_of_xmlDoc.isEmpty()) {
|
190
|
+
throw runtime.newRuntimeError(errors_of_xmlDoc.first().asJavaString());
|
191
|
+
}
|
192
|
+
}
|
138
193
|
|
139
194
|
@JRubyMethod
|
140
195
|
public IRubyObject serialize(ThreadContext context, IRubyObject doc) {
|
@@ -147,21 +202,23 @@ public class XsltStylesheet extends RubyObject {
|
|
147
202
|
public IRubyObject transform(ThreadContext context, IRubyObject[] args) {
|
148
203
|
Ruby runtime = context.getRuntime();
|
149
204
|
|
150
|
-
|
151
|
-
DOMResult result = new DOMResult();
|
205
|
+
argumentTypeCheck(runtime, args[0]);
|
152
206
|
|
153
207
|
NokogiriXsltErrorListener elistener = new NokogiriXsltErrorListener();
|
208
|
+
DOMSource domSource = new DOMSource(((XmlDocument) args[0]).getDocument());
|
209
|
+
DOMResult result = null;
|
210
|
+
String stringResult = null;
|
154
211
|
try{
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
addParametersToTransformer(context, transf, args[1]);
|
212
|
+
result = tryXsltTransformation(context, args, domSource, elistener); // DOMResult
|
213
|
+
if (result.getNode().getFirstChild() == null) {
|
214
|
+
stringResult = retryXsltTransformation(context, args, domSource, elistener); // StreamResult
|
159
215
|
}
|
160
|
-
transf.transform(docSource, result);
|
161
216
|
} catch(TransformerConfigurationException ex) {
|
162
|
-
|
217
|
+
throw runtime.newRuntimeError(ex.getMessage());
|
163
218
|
} catch(TransformerException ex) {
|
164
|
-
|
219
|
+
throw runtime.newRuntimeError(ex.getMessage());
|
220
|
+
} catch (IOException ex) {
|
221
|
+
throw runtime.newRuntimeError(ex.getMessage());
|
165
222
|
}
|
166
223
|
|
167
224
|
switch (elistener.getErrorType()) {
|
@@ -172,18 +229,122 @@ public class XsltStylesheet extends RubyObject {
|
|
172
229
|
default:
|
173
230
|
// no-op
|
174
231
|
}
|
232
|
+
|
233
|
+
if (stringResult == null) {
|
234
|
+
return createDocumentFromDomResult(context, runtime, result);
|
235
|
+
} else {
|
236
|
+
return createDocumentFromString(context, runtime, stringResult);
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
private DOMResult tryXsltTransformation(ThreadContext context, IRubyObject[] args, DOMSource domSource, NokogiriXsltErrorListener elistener) throws TransformerException {
|
241
|
+
Transformer transf = sheet.newTransformer();
|
242
|
+
transf.reset();
|
243
|
+
transf.setErrorListener(elistener);
|
244
|
+
if (args.length > 1) {
|
245
|
+
addParametersToTransformer(context, transf, args[1]);
|
246
|
+
}
|
247
|
+
|
248
|
+
DOMResult result = new DOMResult();
|
249
|
+
transf.transform(domSource, result);
|
250
|
+
return result;
|
251
|
+
}
|
252
|
+
|
253
|
+
private String retryXsltTransformation(ThreadContext context,
|
254
|
+
IRubyObject[] args,
|
255
|
+
DOMSource domSource,
|
256
|
+
NokogiriXsltErrorListener elistener)
|
257
|
+
throws TransformerException, IOException {
|
258
|
+
Templates templates = getTemplatesFromStreamSource();
|
259
|
+
Transformer transf = templates.newTransformer();
|
260
|
+
transf.setErrorListener(elistener);
|
261
|
+
if (args.length > 1) {
|
262
|
+
addParametersToTransformer(context, transf, args[1]);
|
263
|
+
}
|
264
|
+
PipedWriter pwriter = new PipedWriter();
|
265
|
+
PipedReader preader = new PipedReader();
|
266
|
+
pwriter.connect(preader);
|
267
|
+
StreamResult result = new StreamResult(pwriter);
|
268
|
+
transf.transform(domSource, result);
|
269
|
+
char[] cbuf = new char[1024];
|
270
|
+
int len = preader.read(cbuf, 0, 1024);
|
271
|
+
StringBuilder builder = new StringBuilder();
|
272
|
+
builder.append(CharBuffer.wrap(cbuf, 0, len));
|
273
|
+
htmlish = isHtml(builder.toString()); // judge from the first chunk
|
175
274
|
|
176
|
-
|
275
|
+
while (len == 1024) {
|
276
|
+
len = preader.read(cbuf, 0, 1024);
|
277
|
+
if (len > 0) {
|
278
|
+
builder.append(CharBuffer.wrap(cbuf, 0, len));
|
279
|
+
}
|
280
|
+
}
|
281
|
+
|
282
|
+
preader.close();
|
283
|
+
pwriter.close();
|
284
|
+
|
285
|
+
return builder.toString();
|
286
|
+
}
|
287
|
+
|
288
|
+
private IRubyObject createDocumentFromDomResult(ThreadContext context, Ruby runtime, DOMResult domResult) {
|
289
|
+
if ("html".equals(domResult.getNode().getFirstChild().getNodeName())) {
|
177
290
|
HtmlDocument htmlDocument = (HtmlDocument) getNokogiriClass(runtime, "Nokogiri::HTML::Document").allocate();
|
178
|
-
htmlDocument.
|
291
|
+
htmlDocument.setDocumentNode(context, (Document) domResult.getNode());
|
179
292
|
return htmlDocument;
|
180
293
|
} else {
|
181
294
|
XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"));
|
182
|
-
xmlDocument.
|
295
|
+
xmlDocument.setDocumentNode(context, (Document) domResult.getNode());
|
296
|
+
return xmlDocument;
|
297
|
+
}
|
298
|
+
}
|
299
|
+
|
300
|
+
private Templates getTemplatesFromStreamSource() throws TransformerConfigurationException {
|
301
|
+
if (stylesheet instanceof RubyString) {
|
302
|
+
StringReader reader = new StringReader((String)stylesheet.toJava(String.class));
|
303
|
+
StreamSource xsltStreamSource = new StreamSource(reader);
|
304
|
+
return factory.newTemplates(xsltStreamSource);
|
305
|
+
}
|
306
|
+
return null;
|
307
|
+
}
|
308
|
+
|
309
|
+
private static Pattern html_tag =
|
310
|
+
Pattern.compile("<(%s)*html", Pattern.CASE_INSENSITIVE);
|
311
|
+
|
312
|
+
private boolean isHtml(String chunk) {
|
313
|
+
Matcher m = XsltStylesheet.html_tag.matcher(chunk);
|
314
|
+
if (m.find()) return true;
|
315
|
+
else return false;
|
316
|
+
}
|
317
|
+
|
318
|
+
private IRubyObject createDocumentFromString(ThreadContext context, Ruby runtime, String stringResult) {
|
319
|
+
IRubyObject[] args = new IRubyObject[4];
|
320
|
+
args[0] = stringOrBlank(runtime, stringResult);
|
321
|
+
args[1] = runtime.getNil(); // url
|
322
|
+
args[2] = runtime.getNil(); // encoding
|
323
|
+
RubyClass parse_options = (RubyClass)runtime.getClassFromPath("Nokogiri::XML::ParseOptions");
|
324
|
+
if (htmlish) {
|
325
|
+
args[3] = parse_options.getConstant("DEFAULT_HTML");
|
326
|
+
RubyClass htmlDocumentClass = getNokogiriClass(runtime, "Nokogiri::HTML::Document");
|
327
|
+
return RuntimeHelpers.invoke(context, htmlDocumentClass, "parse", args);
|
328
|
+
} else {
|
329
|
+
args[3] = parse_options.getConstant("DEFAULT_XML");
|
330
|
+
RubyClass xmlDocumentClass = getNokogiriClass(runtime, "Nokogiri::XML::Document");
|
331
|
+
XmlDocument xmlDocument = (XmlDocument) RuntimeHelpers.invoke(context, xmlDocumentClass, "parse", args);
|
332
|
+
if (((Document)xmlDocument.getNode()).getDocumentElement() == null) {
|
333
|
+
RubyArray errors = (RubyArray) xmlDocument.getInstanceVariable("@errors");
|
334
|
+
RuntimeHelpers.invoke(context, errors, "<<", args[0]);
|
335
|
+
}
|
183
336
|
return xmlDocument;
|
184
337
|
}
|
185
338
|
}
|
186
339
|
|
340
|
+
private void argumentTypeCheck(Ruby runtime, IRubyObject arg) {
|
341
|
+
if (arg instanceof XmlDocument) {
|
342
|
+
return;
|
343
|
+
} else {
|
344
|
+
throw runtime.newArgumentError("argument must be a Nokogiri::XML::Document");
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
187
348
|
@JRubyMethod(name = {"registr", "register"}, meta = true)
|
188
349
|
public static IRubyObject register(ThreadContext context, IRubyObject cls, IRubyObject uri, IRubyObject receiver) {
|
189
350
|
throw context.getRuntime().newNotImplementedError("Nokogiri::XSLT.register method is not implemented");
|
@@ -34,6 +34,7 @@ package nokogiri.internals;
|
|
34
34
|
|
35
35
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
36
36
|
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
37
|
+
import static nokogiri.internals.NokogiriHelpers.stringOrNil;
|
37
38
|
import nokogiri.HtmlDocument;
|
38
39
|
import nokogiri.NokogiriService;
|
39
40
|
import nokogiri.XmlDocument;
|
@@ -52,6 +53,8 @@ import org.jruby.RubyClass;
|
|
52
53
|
import org.jruby.runtime.ThreadContext;
|
53
54
|
import org.jruby.runtime.builtin.IRubyObject;
|
54
55
|
import org.w3c.dom.Document;
|
56
|
+
import org.w3c.dom.NamedNodeMap;
|
57
|
+
import org.w3c.dom.NodeList;
|
55
58
|
|
56
59
|
/**
|
57
60
|
* Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
|
@@ -84,7 +87,8 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
84
87
|
XMLParserConfiguration config = new HTMLConfiguration();
|
85
88
|
XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
|
86
89
|
XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
|
87
|
-
XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
|
90
|
+
//XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
|
91
|
+
XMLDocumentFilter[] filters = { elementValidityCheckFilter};
|
88
92
|
|
89
93
|
config.setErrorHandler(this.errorHandler);
|
90
94
|
parser = new DOMParser(config);
|
@@ -119,10 +123,44 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
119
123
|
RubyClass klazz,
|
120
124
|
Document document) {
|
121
125
|
HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
|
122
|
-
htmlDocument.
|
126
|
+
htmlDocument.setDocumentNode(context, document);
|
127
|
+
if (ruby_encoding.isNil()) {
|
128
|
+
// ruby_encoding might have detected by HtmlDocument::EncodingReader
|
129
|
+
if (detected_encoding != null && !detected_encoding.isNil()) {
|
130
|
+
ruby_encoding = detected_encoding;
|
131
|
+
} else {
|
132
|
+
// no encoding given & no encoding detected, then try to get it
|
133
|
+
String charset = tryGetCharsetFromHtml5MetaTag(document);
|
134
|
+
ruby_encoding = stringOrNil(context.getRuntime(), charset);
|
135
|
+
}
|
136
|
+
}
|
123
137
|
htmlDocument.setEncoding(ruby_encoding);
|
124
138
|
return htmlDocument;
|
125
139
|
}
|
140
|
+
|
141
|
+
// NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
|
142
|
+
// from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
|
143
|
+
// so, this method attempts to find the charset.
|
144
|
+
private String tryGetCharsetFromHtml5MetaTag(Document document) {
|
145
|
+
if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null;
|
146
|
+
NodeList list = document.getDocumentElement().getChildNodes();
|
147
|
+
for (int i = 0; i < list.getLength(); i++) {
|
148
|
+
if ("head".equalsIgnoreCase(list.item(i).getNodeName())) {
|
149
|
+
NodeList headers = list.item(i).getChildNodes();
|
150
|
+
for (int j = 0; j < headers.getLength(); j++) {
|
151
|
+
if ("meta".equalsIgnoreCase(headers.item(j).getNodeName())) {
|
152
|
+
NamedNodeMap nodeMap = headers.item(j).getAttributes();
|
153
|
+
for (int k = 0; k < nodeMap.getLength(); k++) {
|
154
|
+
if ("charset".equalsIgnoreCase(nodeMap.item(k).getNodeName())) {
|
155
|
+
return nodeMap.item(k).getNodeValue();
|
156
|
+
}
|
157
|
+
}
|
158
|
+
}
|
159
|
+
}
|
160
|
+
}
|
161
|
+
}
|
162
|
+
return null;
|
163
|
+
}
|
126
164
|
|
127
165
|
/**
|
128
166
|
* Filter to strip out attributes that pertain to XML namespaces.
|