nokogiri 1.5.0.beta.1 → 1.5.0.beta.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +28 -8
- data/CHANGELOG.rdoc +23 -0
- data/Manifest.txt +63 -1
- data/README.ja.rdoc +1 -1
- data/README.rdoc +22 -4
- data/Rakefile +6 -2
- data/ext/java/nokogiri/EncodingHandler.java +92 -0
- data/ext/java/nokogiri/HtmlDocument.java +116 -0
- data/ext/java/nokogiri/HtmlElementDescription.java +111 -0
- data/ext/java/nokogiri/HtmlEntityLookup.java +45 -0
- data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -0
- data/ext/java/nokogiri/NokogiriService.java +370 -0
- data/ext/java/nokogiri/XmlAttr.java +147 -0
- data/ext/java/nokogiri/XmlAttributeDecl.java +98 -0
- data/ext/java/nokogiri/XmlCdata.java +50 -0
- data/ext/java/nokogiri/XmlComment.java +47 -0
- data/ext/java/nokogiri/XmlDocument.java +463 -0
- data/ext/java/nokogiri/XmlDocumentFragment.java +207 -0
- data/ext/java/nokogiri/XmlDtd.java +427 -0
- data/ext/java/nokogiri/XmlElement.java +172 -0
- data/ext/java/nokogiri/XmlElementContent.java +350 -0
- data/ext/java/nokogiri/XmlElementDecl.java +115 -0
- data/ext/java/nokogiri/XmlEntityDecl.java +129 -0
- data/ext/java/nokogiri/XmlEntityReference.java +42 -0
- data/ext/java/nokogiri/XmlNamespace.java +77 -0
- data/ext/java/nokogiri/XmlNode.java +1399 -0
- data/ext/java/nokogiri/XmlNodeSet.java +248 -0
- data/ext/java/nokogiri/XmlProcessingInstruction.java +70 -0
- data/ext/java/nokogiri/XmlReader.java +373 -0
- data/ext/java/nokogiri/XmlRelaxng.java +166 -0
- data/ext/java/nokogiri/XmlSaxParserContext.java +308 -0
- data/ext/java/nokogiri/XmlSaxPushParser.java +146 -0
- data/ext/java/nokogiri/XmlSchema.java +142 -0
- data/ext/java/nokogiri/XmlSyntaxError.java +84 -0
- data/ext/java/nokogiri/XmlText.java +96 -0
- data/ext/java/nokogiri/XmlXpathContext.java +130 -0
- data/ext/java/nokogiri/XsltStylesheet.java +126 -0
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +181 -0
- data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +39 -0
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +42 -0
- data/ext/java/nokogiri/internals/NokogiriHandler.java +251 -0
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +526 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +136 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +80 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +37 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +54 -0
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +49 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +88 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +23 -0
- data/ext/java/nokogiri/internals/ParserContext.java +235 -0
- data/ext/java/nokogiri/internals/PushInputStream.java +381 -0
- data/ext/java/nokogiri/internals/ReaderNode.java +431 -0
- data/ext/java/nokogiri/internals/SaveContext.java +249 -0
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +35 -0
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +10 -0
- data/ext/java/nokogiri/internals/XmlDomParser.java +45 -0
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +201 -0
- data/ext/java/nokogiri/internals/XmlSaxParser.java +33 -0
- data/ext/nokogiri/depend +32 -0
- data/ext/nokogiri/extconf.rb +61 -32
- data/ext/nokogiri/nokogiri.c +0 -5
- data/ext/nokogiri/nokogiri.h +2 -2
- data/ext/nokogiri/xml_document.c +5 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_node.c +56 -16
- data/ext/nokogiri/xml_node_set.c +7 -7
- data/ext/nokogiri/xml_reader.c +20 -1
- data/ext/nokogiri/xml_relax_ng.c +0 -7
- data/ext/nokogiri/xml_xpath_context.c +2 -0
- data/lib/isorelax.jar +0 -0
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
- data/lib/nokogiri.rb +1 -2
- data/lib/nokogiri/css/generated_parser.rb +155 -148
- data/lib/nokogiri/css/generated_tokenizer.rb +2 -1
- data/lib/nokogiri/css/parser.y +3 -0
- data/lib/nokogiri/css/xpath_visitor.rb +1 -7
- data/lib/nokogiri/html.rb +2 -2
- data/lib/nokogiri/html/document_fragment.rb +7 -4
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +3 -6
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +1 -2
- data/lib/nokogiri/xml/document_fragment.rb +7 -0
- data/lib/nokogiri/xml/node.rb +5 -3
- data/lib/nokogiri/xml/node_set.rb +25 -0
- data/lib/nokogiri/xml/reader.rb +2 -0
- data/lib/nokogiri/xml/sax/document.rb +3 -1
- data/lib/xercesImpl.jar +0 -0
- data/spec/helper.rb +3 -0
- data/spec/xml/reader_spec.rb +307 -0
- data/tasks/test.rb +1 -1
- data/test/css/test_parser.rb +11 -1
- data/test/html/sax/test_parser_context.rb +2 -2
- data/test/html/test_document.rb +2 -2
- data/test/html/test_document_fragment.rb +34 -6
- data/test/test_memory_leak.rb +2 -2
- data/test/test_reader.rb +28 -6
- data/test/test_xslt_transforms.rb +2 -3
- data/test/xml/test_attr.rb +31 -4
- data/test/xml/test_builder.rb +5 -5
- data/test/xml/test_cdata.rb +3 -3
- data/test/xml/test_document.rb +8 -8
- data/test/xml/test_document_fragment.rb +4 -12
- data/test/xml/test_node.rb +1 -1
- data/test/xml/test_node_reparenting.rb +26 -11
- data/test/xml/test_node_set.rb +38 -2
- data/test/xml/test_text.rb +11 -2
- data/test/xml/test_unparented_node.rb +1 -1
- data/test/xml/test_xpath.rb +11 -7
- metadata +68 -5
- data/lib/nokogiri/version_warning.rb +0 -14
@@ -0,0 +1,248 @@
|
|
1
|
+
package nokogiri;
|
2
|
+
|
3
|
+
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
4
|
+
import static nokogiri.internals.NokogiriHelpers.nodeListToRubyArray;
|
5
|
+
|
6
|
+
import java.util.List;
|
7
|
+
|
8
|
+
import org.jruby.Ruby;
|
9
|
+
import org.jruby.RubyArray;
|
10
|
+
import org.jruby.RubyClass;
|
11
|
+
import org.jruby.RubyObject;
|
12
|
+
import org.jruby.anno.JRubyClass;
|
13
|
+
import org.jruby.anno.JRubyMethod;
|
14
|
+
import org.jruby.javasupport.util.RuntimeHelpers;
|
15
|
+
import org.jruby.runtime.Block;
|
16
|
+
import org.jruby.runtime.ThreadContext;
|
17
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
18
|
+
import org.w3c.dom.Node;
|
19
|
+
import org.w3c.dom.NodeList;
|
20
|
+
|
21
|
+
@JRubyClass(name="Nokogiri::XML::NodeSet")
|
22
|
+
public class XmlNodeSet extends RubyObject {
|
23
|
+
private NodeList nodeList = null;
|
24
|
+
private RubyArray nodes;
|
25
|
+
private IRubyObject doc;
|
26
|
+
|
27
|
+
public XmlNodeSet(Ruby ruby, NodeList nodes) {
|
28
|
+
this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::NodeSet"), nodes);
|
29
|
+
}
|
30
|
+
|
31
|
+
public XmlNodeSet(Ruby ruby, RubyArray nodes) {
|
32
|
+
this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::NodeSet"), nodes);
|
33
|
+
}
|
34
|
+
|
35
|
+
public XmlNodeSet(Ruby ruby, RubyClass rubyClass, NodeList nodes) {
|
36
|
+
this(ruby, rubyClass, nodeListToRubyArray(ruby, nodes));
|
37
|
+
nodeList = nodes;
|
38
|
+
}
|
39
|
+
|
40
|
+
public XmlNodeSet(Ruby ruby, RubyClass rubyClass, RubyArray nodes){
|
41
|
+
super(ruby, rubyClass);
|
42
|
+
this.nodes = nodes;
|
43
|
+
|
44
|
+
IRubyObject first = nodes.first();
|
45
|
+
initialize(ruby, first);
|
46
|
+
}
|
47
|
+
|
48
|
+
public XmlNodeSet(Ruby ruby, XmlNodeSet reference){
|
49
|
+
super(ruby, getNokogiriClass(ruby, "Nokogiri::XML::NodeSet"));
|
50
|
+
this.nodes = null;
|
51
|
+
|
52
|
+
IRubyObject first = reference.nodes.first();
|
53
|
+
initialize(ruby, first);
|
54
|
+
}
|
55
|
+
|
56
|
+
void setNodes(RubyArray nodes) {
|
57
|
+
this.nodes = nodes;
|
58
|
+
nodeList = null;
|
59
|
+
}
|
60
|
+
|
61
|
+
private void initialize(Ruby ruby, IRubyObject refNode) {
|
62
|
+
if (refNode instanceof XmlNode) {
|
63
|
+
XmlNode n = (XmlNode)refNode;
|
64
|
+
doc = n.document(ruby.getCurrentContext());
|
65
|
+
setInstanceVariable("@document", doc);
|
66
|
+
if (doc != null) {
|
67
|
+
RuntimeHelpers.invoke(ruby.getCurrentContext(), doc, "decorate", this);
|
68
|
+
}
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
public static IRubyObject newEmptyNodeSet(ThreadContext context) {
|
73
|
+
Ruby ruby = context.getRuntime();
|
74
|
+
return new XmlNodeSet(ruby,
|
75
|
+
getNokogiriClass(ruby, "Nokogiri::XML::NodeSet"),
|
76
|
+
ruby.newEmptyArray());
|
77
|
+
}
|
78
|
+
|
79
|
+
public boolean isEmpty() {
|
80
|
+
return nodes.isEmpty();
|
81
|
+
}
|
82
|
+
|
83
|
+
public long length() {
|
84
|
+
return nodes.length().getLongValue();
|
85
|
+
}
|
86
|
+
|
87
|
+
public void relink_namespace(ThreadContext context) {
|
88
|
+
nodeList = null;
|
89
|
+
List<?> n = nodes.getList();
|
90
|
+
|
91
|
+
for (int i = 0; i < n.size(); i++) {
|
92
|
+
if (n.get(i) instanceof XmlNode) {
|
93
|
+
((XmlNode) n.get(i)).relink_namespace(context);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
public void setDocument(IRubyObject document) {
|
99
|
+
setInstanceVariable("@document", document);
|
100
|
+
this.doc = document;
|
101
|
+
}
|
102
|
+
|
103
|
+
public NodeList toNodeList(Ruby ruby) {
|
104
|
+
if (nodeList != null) return nodeList;
|
105
|
+
return new NokogiriNodeList(ruby, this.nodes);
|
106
|
+
}
|
107
|
+
|
108
|
+
@JRubyMethod(name="&")
|
109
|
+
public IRubyObject and(ThreadContext context, IRubyObject nodeSet){
|
110
|
+
nodeList = null;
|
111
|
+
return newXmlNodeSet(context, (RubyArray) nodes.op_and(asXmlNodeSet(context, nodeSet).nodes));
|
112
|
+
}
|
113
|
+
|
114
|
+
@JRubyMethod
|
115
|
+
public IRubyObject delete(ThreadContext context, IRubyObject node_or_namespace){
|
116
|
+
nodeList = null;
|
117
|
+
return nodes.delete(context, asXmlNodeOrNamespace(context, node_or_namespace), Block.NULL_BLOCK);
|
118
|
+
}
|
119
|
+
|
120
|
+
@JRubyMethod
|
121
|
+
public IRubyObject dup(ThreadContext context){
|
122
|
+
return newXmlNodeSet(context, nodes.aryDup());
|
123
|
+
}
|
124
|
+
|
125
|
+
@JRubyMethod(name = "include?")
|
126
|
+
public IRubyObject include_p(ThreadContext context, IRubyObject node_or_namespace){
|
127
|
+
return nodes.include_p(context, asXmlNodeOrNamespace(context, node_or_namespace));
|
128
|
+
}
|
129
|
+
|
130
|
+
@JRubyMethod(name = {"length", "size"})
|
131
|
+
public IRubyObject length(ThreadContext context) {
|
132
|
+
return nodes.length();
|
133
|
+
}
|
134
|
+
|
135
|
+
@JRubyMethod(name="-")
|
136
|
+
public IRubyObject op_diff(ThreadContext context, IRubyObject nodeSet){
|
137
|
+
nodeList = null;
|
138
|
+
XmlNodeSet xmlNodeSet = newXmlNodeSet(context, this);
|
139
|
+
xmlNodeSet.setNodes((RubyArray) nodes.op_diff(asXmlNodeSet(context, nodeSet).nodes));
|
140
|
+
return xmlNodeSet;
|
141
|
+
}
|
142
|
+
|
143
|
+
@JRubyMethod(name={"|", "+"})
|
144
|
+
public IRubyObject op_or(ThreadContext context, IRubyObject nodeSet){
|
145
|
+
nodeList = null;
|
146
|
+
return newXmlNodeSet(context, (RubyArray) nodes.op_or(asXmlNodeSet(context, nodeSet).nodes));
|
147
|
+
}
|
148
|
+
|
149
|
+
@JRubyMethod(name = {"push", "<<"})
|
150
|
+
public IRubyObject push(ThreadContext context, IRubyObject node_or_namespace) {
|
151
|
+
nodeList = null;
|
152
|
+
nodes.append(asXmlNodeOrNamespace(context, node_or_namespace));
|
153
|
+
return this;
|
154
|
+
}
|
155
|
+
|
156
|
+
@JRubyMethod(name={"[]", "slice"})
|
157
|
+
public IRubyObject slice(ThreadContext context, IRubyObject indexOrRange){
|
158
|
+
IRubyObject result;
|
159
|
+
if (context.getRuntime().is1_9()) {
|
160
|
+
result = nodes.aref19(indexOrRange);
|
161
|
+
} else {
|
162
|
+
result = nodes.aref(indexOrRange);
|
163
|
+
}
|
164
|
+
if (result instanceof RubyArray) {
|
165
|
+
return newXmlNodeSet(context, (RubyArray)result);
|
166
|
+
} else {
|
167
|
+
return result;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
@JRubyMethod(name={"[]", "slice"})
|
172
|
+
public IRubyObject slice(ThreadContext context, IRubyObject start, IRubyObject length){
|
173
|
+
IRubyObject result;
|
174
|
+
if (context.getRuntime().is1_9()) {
|
175
|
+
result = nodes.aref19(start, length);
|
176
|
+
} else {
|
177
|
+
result = nodes.aref(start, length);
|
178
|
+
}
|
179
|
+
if (result instanceof RubyArray) return newXmlNodeSet(context, (RubyArray)result);
|
180
|
+
else return context.getRuntime().getNil();
|
181
|
+
}
|
182
|
+
|
183
|
+
@JRubyMethod(name = {"to_a", "to_ary"})
|
184
|
+
public IRubyObject to_a(ThreadContext context){
|
185
|
+
return nodes;
|
186
|
+
}
|
187
|
+
|
188
|
+
@JRubyMethod(name = {"unlink", "remove"})
|
189
|
+
public IRubyObject unlink(ThreadContext context){
|
190
|
+
nodeList = null;
|
191
|
+
IRubyObject[] arr = this.nodes.toJavaArrayUnsafe();
|
192
|
+
long length = arr.length;
|
193
|
+
for (int i = 0; i < length; i++) {
|
194
|
+
if (arr[i] instanceof XmlNode) {
|
195
|
+
((XmlNode) arr[i] ).unlink(context);
|
196
|
+
}
|
197
|
+
}
|
198
|
+
return this;
|
199
|
+
}
|
200
|
+
|
201
|
+
private XmlNodeSet newXmlNodeSet(ThreadContext context, RubyArray array) {
|
202
|
+
XmlNodeSet result = new XmlNodeSet(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::NodeSet"), array);
|
203
|
+
return result;
|
204
|
+
}
|
205
|
+
|
206
|
+
private XmlNodeSet newXmlNodeSet(ThreadContext context, XmlNodeSet reference) {
|
207
|
+
XmlNodeSet result = new XmlNodeSet(context.getRuntime(), reference);
|
208
|
+
return result;
|
209
|
+
}
|
210
|
+
|
211
|
+
private IRubyObject asXmlNodeOrNamespace(ThreadContext context, IRubyObject possibleNode) {
|
212
|
+
if (possibleNode instanceof XmlNode || possibleNode instanceof XmlNamespace) {
|
213
|
+
return possibleNode;
|
214
|
+
} else {
|
215
|
+
throw context.getRuntime().newArgumentError("node must be a Nokogiri::XML::Node or Nokogiri::XML::Namespace");
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
219
|
+
private XmlNodeSet asXmlNodeSet(ThreadContext context, IRubyObject possibleNodeSet) {
|
220
|
+
// if(!(possibleNodeSet instanceof XmlNodeSet)) {
|
221
|
+
if(!RuntimeHelpers.invoke(context, possibleNodeSet, "is_a?",
|
222
|
+
getNokogiriClass(context.getRuntime(), "Nokogiri::XML::NodeSet")).isTrue()) {
|
223
|
+
throw context.getRuntime().newArgumentError("node must be a Nokogiri::XML::NodeSet");
|
224
|
+
}
|
225
|
+
return (XmlNodeSet) possibleNodeSet;
|
226
|
+
}
|
227
|
+
|
228
|
+
class NokogiriNodeList implements NodeList{
|
229
|
+
|
230
|
+
private final RubyArray nodes;
|
231
|
+
private final Ruby ruby;
|
232
|
+
|
233
|
+
public NokogiriNodeList(Ruby ruby, RubyArray nodes) {
|
234
|
+
this.nodes = nodes;
|
235
|
+
this.ruby = ruby;
|
236
|
+
}
|
237
|
+
|
238
|
+
public Node item(int i) {
|
239
|
+
return XmlNode.getNodeFromXmlNode(ruby.getCurrentContext(),
|
240
|
+
this.nodes.aref(ruby.newFixnum(i)));
|
241
|
+
}
|
242
|
+
|
243
|
+
public int getLength() {
|
244
|
+
return this.nodes.getLength();
|
245
|
+
}
|
246
|
+
|
247
|
+
}
|
248
|
+
}
|
@@ -0,0 +1,70 @@
|
|
1
|
+
package nokogiri;
|
2
|
+
|
3
|
+
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
4
|
+
import nokogiri.internals.SaveContext;
|
5
|
+
|
6
|
+
import org.jruby.Ruby;
|
7
|
+
import org.jruby.RubyClass;
|
8
|
+
import org.jruby.anno.JRubyClass;
|
9
|
+
import org.jruby.anno.JRubyMethod;
|
10
|
+
import org.jruby.javasupport.util.RuntimeHelpers;
|
11
|
+
import org.jruby.runtime.ThreadContext;
|
12
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
13
|
+
import org.w3c.dom.Document;
|
14
|
+
import org.w3c.dom.Node;
|
15
|
+
|
16
|
+
/**
|
17
|
+
*
|
18
|
+
* @author sergio
|
19
|
+
*/
|
20
|
+
@JRubyClass(name="Nokogiri::XML::ProcessingInstruction", parent="Nokogiri::XML::Node")
|
21
|
+
public class XmlProcessingInstruction extends XmlNode {
|
22
|
+
|
23
|
+
public XmlProcessingInstruction(Ruby ruby, RubyClass klass, Node node) {
|
24
|
+
super(ruby, klass, node);
|
25
|
+
}
|
26
|
+
|
27
|
+
@JRubyMethod(name="new", meta=true, rest=true, required=3)
|
28
|
+
public static IRubyObject rbNew(ThreadContext context,
|
29
|
+
IRubyObject klass,
|
30
|
+
IRubyObject[] args) {
|
31
|
+
|
32
|
+
IRubyObject doc = args[0];
|
33
|
+
IRubyObject target = args[1];
|
34
|
+
IRubyObject data = args[2];
|
35
|
+
|
36
|
+
Document document = ((XmlNode) doc).getOwnerDocument();
|
37
|
+
Node node =
|
38
|
+
document.createProcessingInstruction(rubyStringToString(target),
|
39
|
+
rubyStringToString(data));
|
40
|
+
XmlProcessingInstruction self =
|
41
|
+
new XmlProcessingInstruction(context.getRuntime(),
|
42
|
+
(RubyClass) klass,
|
43
|
+
node);
|
44
|
+
|
45
|
+
RuntimeHelpers.invoke(context, self, "initialize", args);
|
46
|
+
|
47
|
+
// TODO: if_block_given.
|
48
|
+
|
49
|
+
return self;
|
50
|
+
}
|
51
|
+
|
52
|
+
@Override
|
53
|
+
public boolean isProcessingInstruction() { return true; }
|
54
|
+
|
55
|
+
@Override
|
56
|
+
public void saveContent(ThreadContext context, SaveContext ctx) {
|
57
|
+
ctx.append("<?");
|
58
|
+
ctx.append(node_name(context).convertToString().asJavaString());
|
59
|
+
IRubyObject content = content(context);
|
60
|
+
if(!content.isNil()) {
|
61
|
+
if (ctx.asHtml()) ctx.append(" ");
|
62
|
+
ctx.append(content.convertToString().asJavaString());
|
63
|
+
}
|
64
|
+
if (ctx.asHtml())
|
65
|
+
ctx.append(">");
|
66
|
+
else
|
67
|
+
ctx.append("?>");
|
68
|
+
}
|
69
|
+
|
70
|
+
}
|
@@ -0,0 +1,373 @@
|
|
1
|
+
package nokogiri;
|
2
|
+
|
3
|
+
import static nokogiri.internals.NokogiriHelpers.stringOrBlank;
|
4
|
+
|
5
|
+
import java.io.ByteArrayInputStream;
|
6
|
+
import java.io.IOException;
|
7
|
+
import java.util.ArrayDeque;
|
8
|
+
import java.util.Stack;
|
9
|
+
|
10
|
+
import nokogiri.internals.ReaderNode;
|
11
|
+
import nokogiri.internals.ReaderNode.ElementNode;
|
12
|
+
|
13
|
+
import org.jruby.Ruby;
|
14
|
+
import org.jruby.RubyArray;
|
15
|
+
import org.jruby.RubyBoolean;
|
16
|
+
import org.jruby.RubyClass;
|
17
|
+
import org.jruby.RubyFixnum;
|
18
|
+
import org.jruby.RubyModule;
|
19
|
+
import org.jruby.RubyObject;
|
20
|
+
import org.jruby.RubyString;
|
21
|
+
import org.jruby.anno.JRubyClass;
|
22
|
+
import org.jruby.anno.JRubyMethod;
|
23
|
+
import org.jruby.exceptions.RaiseException;
|
24
|
+
import org.jruby.javasupport.util.RuntimeHelpers;
|
25
|
+
import org.jruby.runtime.Block;
|
26
|
+
import org.jruby.runtime.ThreadContext;
|
27
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
28
|
+
import org.jruby.util.ByteList;
|
29
|
+
import org.xml.sax.Attributes;
|
30
|
+
import org.xml.sax.InputSource;
|
31
|
+
import org.xml.sax.SAXException;
|
32
|
+
import org.xml.sax.SAXParseException;
|
33
|
+
import org.xml.sax.XMLReader;
|
34
|
+
import org.xml.sax.ext.DefaultHandler2;
|
35
|
+
import org.xml.sax.helpers.XMLReaderFactory;
|
36
|
+
|
37
|
+
@JRubyClass(name="Nokogiri::XML::Reader")
|
38
|
+
public class XmlReader extends RubyObject {
|
39
|
+
|
40
|
+
private static final int XML_TEXTREADER_MODE_INITIAL = 0;
|
41
|
+
private static final int XML_TEXTREADER_MODE_INTERACTIVE = 1;
|
42
|
+
private static final int XML_TEXTREADER_MODE_ERROR = 2;
|
43
|
+
private static final int XML_TEXTREADER_MODE_EOF = 3;
|
44
|
+
private static final int XML_TEXTREADER_MODE_CLOSED = 4;
|
45
|
+
private static final int XML_TEXTREADER_MODE_READING = 5;
|
46
|
+
|
47
|
+
final ArrayDeque<ReaderNode> nodeQueue = new ArrayDeque<ReaderNode>();
|
48
|
+
private int state;
|
49
|
+
|
50
|
+
public XmlReader(Ruby ruby, RubyClass rubyClass) {
|
51
|
+
super(ruby, rubyClass);
|
52
|
+
nodeQueue.add(new ReaderNode.EmptyNode(ruby));
|
53
|
+
}
|
54
|
+
|
55
|
+
private static IRubyObject[] getArgs(IRubyObject[] args) {
|
56
|
+
int size = Math.min(args.length, 3);
|
57
|
+
IRubyObject[] newArgs = new IRubyObject[size];
|
58
|
+
for(int i = 0; i < size; i++)
|
59
|
+
newArgs[i] = args[i];
|
60
|
+
return newArgs;
|
61
|
+
}
|
62
|
+
|
63
|
+
private void parseRubyString(ThreadContext context, RubyString content){
|
64
|
+
Ruby ruby = context.getRuntime();
|
65
|
+
try {
|
66
|
+
this.setState(XML_TEXTREADER_MODE_READING);
|
67
|
+
XMLReader reader = this.createReader(ruby);
|
68
|
+
ByteList byteList = content.getByteList();
|
69
|
+
ByteArrayInputStream bais = new ByteArrayInputStream(byteList.unsafeBytes(), byteList.begin(), byteList.length());
|
70
|
+
reader.parse(new InputSource(bais));
|
71
|
+
this.setState(XML_TEXTREADER_MODE_CLOSED);
|
72
|
+
} catch (SAXParseException spe) {
|
73
|
+
this.setState(XML_TEXTREADER_MODE_ERROR);
|
74
|
+
this.nodeQueue.add(new ReaderNode.ExceptionNode(ruby, spe));
|
75
|
+
} catch (IOException ioe) {
|
76
|
+
throw RaiseException.createNativeRaiseException(ruby, ioe);
|
77
|
+
} catch (SAXException saxe) {
|
78
|
+
throw RaiseException.createNativeRaiseException(ruby, saxe);
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
private void setSource(IRubyObject source){
|
83
|
+
this.setInstanceVariable("@source", source);
|
84
|
+
}
|
85
|
+
|
86
|
+
private void setState(int state) { this.state = state; }
|
87
|
+
|
88
|
+
@JRubyMethod
|
89
|
+
public IRubyObject attribute(ThreadContext context, IRubyObject name) {
|
90
|
+
return nodeQueue.peek().getAttributeByName(name);
|
91
|
+
}
|
92
|
+
|
93
|
+
@JRubyMethod
|
94
|
+
public IRubyObject attribute_at(ThreadContext context, IRubyObject index) {
|
95
|
+
return nodeQueue.peek().getAttributeByIndex(index);
|
96
|
+
}
|
97
|
+
|
98
|
+
@JRubyMethod
|
99
|
+
public IRubyObject attribute_count(ThreadContext context) {
|
100
|
+
return nodeQueue.peek().getAttributeCount();
|
101
|
+
}
|
102
|
+
|
103
|
+
@JRubyMethod
|
104
|
+
public IRubyObject attribute_nodes(ThreadContext context) {
|
105
|
+
return nodeQueue.peek().getAttributesNodes();
|
106
|
+
}
|
107
|
+
|
108
|
+
@JRubyMethod
|
109
|
+
public IRubyObject attr_nodes(ThreadContext context) {
|
110
|
+
return nodeQueue.peek().getAttributesNodes();
|
111
|
+
}
|
112
|
+
|
113
|
+
@JRubyMethod(name = "attributes?")
|
114
|
+
public IRubyObject attributes_p(ThreadContext context) {
|
115
|
+
return nodeQueue.peek().hasAttributes();
|
116
|
+
}
|
117
|
+
|
118
|
+
@JRubyMethod
|
119
|
+
public IRubyObject base_uri(ThreadContext context) {
|
120
|
+
return nodeQueue.peek().getXmlBase();
|
121
|
+
}
|
122
|
+
|
123
|
+
@JRubyMethod(name="default?")
|
124
|
+
public IRubyObject default_p(ThreadContext context){
|
125
|
+
return nodeQueue.peek().isDefault();
|
126
|
+
}
|
127
|
+
|
128
|
+
@JRubyMethod
|
129
|
+
public IRubyObject depth(ThreadContext context) {
|
130
|
+
return nodeQueue.peek().getDepth();
|
131
|
+
}
|
132
|
+
|
133
|
+
@JRubyMethod(name = {"empty_element?", "self_closing?"})
|
134
|
+
public IRubyObject empty_element_p(ThreadContext context) {
|
135
|
+
ReaderNode readerNode = nodeQueue.peek();
|
136
|
+
if (readerNode == null) return context.getRuntime().getNil();
|
137
|
+
if (!(readerNode instanceof ElementNode)) context.getRuntime().getFalse();
|
138
|
+
return RubyBoolean.newBoolean(context.getRuntime(), !readerNode.hasChildren);
|
139
|
+
}
|
140
|
+
|
141
|
+
@JRubyMethod(meta = true, rest = true)
|
142
|
+
public static IRubyObject from_io(ThreadContext context, IRubyObject cls, IRubyObject args[]) {
|
143
|
+
|
144
|
+
// Only to pass the source test.
|
145
|
+
Ruby ruby = context.getRuntime();
|
146
|
+
|
147
|
+
// Not nil allowed!
|
148
|
+
if(args[0].isNil()) throw ruby.newArgumentError("io cannot be nil");
|
149
|
+
|
150
|
+
XmlReader r = new XmlReader(ruby, ((RubyModule) ruby.getModule("Nokogiri").getConstant("XML")).getClass("Reader"));
|
151
|
+
|
152
|
+
r.callInit(getArgs(args), Block.NULL_BLOCK);
|
153
|
+
|
154
|
+
r.setSource(args[0]);
|
155
|
+
|
156
|
+
RubyString content = RuntimeHelpers.invoke(context, args[0], "read").convertToString();
|
157
|
+
|
158
|
+
r.parseRubyString(context, content);
|
159
|
+
return r;
|
160
|
+
}
|
161
|
+
|
162
|
+
@JRubyMethod(meta = true, rest = true)
|
163
|
+
public static IRubyObject from_memory(ThreadContext context, IRubyObject cls, IRubyObject args[]) {
|
164
|
+
Ruby ruby = context.getRuntime();
|
165
|
+
|
166
|
+
// Not nil allowed!
|
167
|
+
if(args[0].isNil()) throw ruby.newArgumentError("string cannot be nil");
|
168
|
+
|
169
|
+
XmlReader r = new XmlReader(ruby, ((RubyModule) ruby.getModule("Nokogiri").getConstant("XML")).getClass("Reader"));
|
170
|
+
|
171
|
+
r.callInit(getArgs(args), Block.NULL_BLOCK);
|
172
|
+
|
173
|
+
r.setSource(args[0]);
|
174
|
+
|
175
|
+
r.parseRubyString(context, args[0].convertToString());
|
176
|
+
|
177
|
+
return r;
|
178
|
+
}
|
179
|
+
|
180
|
+
@JRubyMethod
|
181
|
+
public IRubyObject node_type(ThreadContext context) {
|
182
|
+
IRubyObject node_type = nodeQueue.peek().getNodeType();
|
183
|
+
return node_type == null ? RubyFixnum.zero(context.getRuntime()) : node_type;
|
184
|
+
}
|
185
|
+
|
186
|
+
@JRubyMethod
|
187
|
+
public IRubyObject inner_xml(ThreadContext context) {
|
188
|
+
return stringOrBlank(context.getRuntime(), getInnerXml(nodeQueue, nodeQueue.peek()));
|
189
|
+
}
|
190
|
+
|
191
|
+
private String getInnerXml(ArrayDeque<ReaderNode> nodeQueue, ReaderNode current) {
|
192
|
+
if (current.depth < 0) return null;
|
193
|
+
if (!current.hasChildren) return null;
|
194
|
+
StringBuffer sb = new StringBuffer();
|
195
|
+
int currentDepth = (Integer)current.depth;
|
196
|
+
for (ReaderNode node : nodeQueue) {
|
197
|
+
if (((Integer)node.depth) > currentDepth) sb.append(node.getString());
|
198
|
+
}
|
199
|
+
return new String(sb);
|
200
|
+
}
|
201
|
+
|
202
|
+
@JRubyMethod
|
203
|
+
public IRubyObject outer_xml(ThreadContext context) {
|
204
|
+
return stringOrBlank(context.getRuntime(), getOuterXml(nodeQueue, nodeQueue.peek()));
|
205
|
+
}
|
206
|
+
|
207
|
+
private String getOuterXml(ArrayDeque<ReaderNode> nodeQueue, ReaderNode current) {
|
208
|
+
if (current.depth < 0) return null;
|
209
|
+
StringBuffer sb = new StringBuffer();
|
210
|
+
int initialDepth = (Integer)current.depth - 1;
|
211
|
+
for (ReaderNode node : nodeQueue) {
|
212
|
+
if (((Integer)node.depth) > initialDepth) sb.append(node.getString());
|
213
|
+
}
|
214
|
+
return new String(sb);
|
215
|
+
}
|
216
|
+
|
217
|
+
@JRubyMethod
|
218
|
+
public IRubyObject lang(ThreadContext context) {
|
219
|
+
return nodeQueue.peek().getLang();
|
220
|
+
}
|
221
|
+
|
222
|
+
@JRubyMethod
|
223
|
+
public IRubyObject local_name(ThreadContext context) {
|
224
|
+
return nodeQueue.peek().getLocalName();
|
225
|
+
}
|
226
|
+
|
227
|
+
@JRubyMethod
|
228
|
+
public IRubyObject name(ThreadContext context) {
|
229
|
+
return nodeQueue.peek().getName();
|
230
|
+
}
|
231
|
+
|
232
|
+
@JRubyMethod
|
233
|
+
public IRubyObject namespace_uri(ThreadContext context) {
|
234
|
+
return nodeQueue.peek().getUri();
|
235
|
+
}
|
236
|
+
|
237
|
+
@JRubyMethod
|
238
|
+
public IRubyObject namespaces(ThreadContext context) {
|
239
|
+
return nodeQueue.peek().getNamespaces(context);
|
240
|
+
}
|
241
|
+
|
242
|
+
@JRubyMethod
|
243
|
+
public IRubyObject prefix(ThreadContext context) {
|
244
|
+
return nodeQueue.peek().getPrefix();
|
245
|
+
}
|
246
|
+
|
247
|
+
@JRubyMethod
|
248
|
+
public IRubyObject read(ThreadContext context) {
|
249
|
+
this.nodeQueue.poll();
|
250
|
+
if(nodeQueue.peek() == null) {
|
251
|
+
return context.getRuntime().getNil();
|
252
|
+
} else if(nodeQueue.peek().isError()) {
|
253
|
+
RubyArray errors = (RubyArray) this.getInstanceVariable("@errors");
|
254
|
+
errors.append(nodeQueue.peek().toSyntaxError());
|
255
|
+
|
256
|
+
this.setInstanceVariable("@errors", errors);
|
257
|
+
|
258
|
+
throw new RaiseException((XmlSyntaxError) nodeQueue.peek().toSyntaxError());
|
259
|
+
} else {
|
260
|
+
return this;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
@JRubyMethod
|
265
|
+
public IRubyObject state(ThreadContext context) {
|
266
|
+
return context.getRuntime().newFixnum(this.state);
|
267
|
+
}
|
268
|
+
|
269
|
+
@JRubyMethod
|
270
|
+
public IRubyObject value(ThreadContext context) {
|
271
|
+
return nodeQueue.peek().getValue();
|
272
|
+
}
|
273
|
+
|
274
|
+
@JRubyMethod(name = "value?")
|
275
|
+
public IRubyObject value_p(ThreadContext context) {
|
276
|
+
return nodeQueue.peek().hasValue();
|
277
|
+
}
|
278
|
+
|
279
|
+
@JRubyMethod
|
280
|
+
public IRubyObject xml_version(ThreadContext context) {
|
281
|
+
return nodeQueue.peek().getXmlVersion();
|
282
|
+
}
|
283
|
+
|
284
|
+
protected XMLReader createReader(final Ruby ruby) {
|
285
|
+
DefaultHandler2 handler = new DefaultHandler2() {
|
286
|
+
|
287
|
+
Stack<String> langStack;
|
288
|
+
int depth;
|
289
|
+
Stack<String> xmlBaseStack;
|
290
|
+
Stack<ReaderNode.ElementNode> elementStack;
|
291
|
+
|
292
|
+
@Override
|
293
|
+
public void characters(char[] chars, int start, int length) {
|
294
|
+
ReaderNode.TextNode node = new ReaderNode.TextNode(ruby, new String(chars, start, length), depth, langStack, xmlBaseStack);
|
295
|
+
nodeQueue.add(node);
|
296
|
+
}
|
297
|
+
|
298
|
+
@Override
|
299
|
+
public void endDocument() throws SAXException {
|
300
|
+
langStack = null;
|
301
|
+
xmlBaseStack = null;
|
302
|
+
elementStack = null;
|
303
|
+
}
|
304
|
+
|
305
|
+
@Override
|
306
|
+
public void endElement(String uri, String localName, String qName) {
|
307
|
+
depth--;
|
308
|
+
ReaderNode previous = nodeQueue.getLast();
|
309
|
+
ElementNode startElementNode = elementStack.pop();
|
310
|
+
if (previous instanceof ReaderNode.ElementNode && qName.equals(previous.name)) {
|
311
|
+
previous.hasChildren = false;
|
312
|
+
} else {
|
313
|
+
ReaderNode node = new ReaderNode.ClosingNode(ruby, uri, localName, qName, depth, langStack, xmlBaseStack);
|
314
|
+
if (startElementNode != null) {
|
315
|
+
node.attributeList = startElementNode.attributeList;
|
316
|
+
node.namespaces = startElementNode.namespaces;
|
317
|
+
}
|
318
|
+
nodeQueue.add(node);
|
319
|
+
}
|
320
|
+
if (!langStack.isEmpty()) langStack.pop();
|
321
|
+
if (!xmlBaseStack.isEmpty()) xmlBaseStack.pop();
|
322
|
+
}
|
323
|
+
|
324
|
+
@Override
|
325
|
+
public void error(SAXParseException ex) throws SAXParseException {
|
326
|
+
nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex));
|
327
|
+
throw ex;
|
328
|
+
}
|
329
|
+
|
330
|
+
@Override
|
331
|
+
public void fatalError(SAXParseException ex) throws SAXParseException {
|
332
|
+
nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex));
|
333
|
+
throw ex;
|
334
|
+
}
|
335
|
+
|
336
|
+
@Override
|
337
|
+
public void startDocument() {
|
338
|
+
depth = 0;
|
339
|
+
langStack = new Stack<String>();
|
340
|
+
xmlBaseStack = new Stack<String>();
|
341
|
+
elementStack = new Stack<ReaderNode.ElementNode>();
|
342
|
+
}
|
343
|
+
|
344
|
+
@Override
|
345
|
+
public void startElement(String uri, String localName, String qName, Attributes attrs) {
|
346
|
+
ReaderNode readerNode = new ReaderNode.ElementNode(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
|
347
|
+
nodeQueue.add(readerNode);
|
348
|
+
depth++;
|
349
|
+
if (readerNode.lang != null) langStack.push(readerNode.lang);
|
350
|
+
if (readerNode.xmlBase != null) xmlBaseStack.push(readerNode.xmlBase);
|
351
|
+
elementStack.push((ReaderNode.ElementNode)readerNode);
|
352
|
+
}
|
353
|
+
|
354
|
+
@Override
|
355
|
+
public void warning(SAXParseException ex) throws SAXParseException {
|
356
|
+
nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex));
|
357
|
+
throw ex;
|
358
|
+
}
|
359
|
+
};
|
360
|
+
try {
|
361
|
+
XMLReader reader = XMLReaderFactory.createXMLReader();
|
362
|
+
reader.setContentHandler(handler);
|
363
|
+
reader.setDTDHandler(handler);
|
364
|
+
reader.setErrorHandler(handler);
|
365
|
+
reader.setFeature("http://xml.org/sax/features/xmlns-uris", true);
|
366
|
+
reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
|
367
|
+
reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
368
|
+
return reader;
|
369
|
+
} catch (SAXException saxe) {
|
370
|
+
throw RaiseException.createNativeRaiseException(ruby, saxe);
|
371
|
+
}
|
372
|
+
}
|
373
|
+
}
|