nokogiri 1.5.0.beta.1 → 1.5.0.beta.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +28 -8
- data/CHANGELOG.rdoc +23 -0
- data/Manifest.txt +63 -1
- data/README.ja.rdoc +1 -1
- data/README.rdoc +22 -4
- data/Rakefile +6 -2
- data/ext/java/nokogiri/EncodingHandler.java +92 -0
- data/ext/java/nokogiri/HtmlDocument.java +116 -0
- data/ext/java/nokogiri/HtmlElementDescription.java +111 -0
- data/ext/java/nokogiri/HtmlEntityLookup.java +45 -0
- data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -0
- data/ext/java/nokogiri/NokogiriService.java +370 -0
- data/ext/java/nokogiri/XmlAttr.java +147 -0
- data/ext/java/nokogiri/XmlAttributeDecl.java +98 -0
- data/ext/java/nokogiri/XmlCdata.java +50 -0
- data/ext/java/nokogiri/XmlComment.java +47 -0
- data/ext/java/nokogiri/XmlDocument.java +463 -0
- data/ext/java/nokogiri/XmlDocumentFragment.java +207 -0
- data/ext/java/nokogiri/XmlDtd.java +427 -0
- data/ext/java/nokogiri/XmlElement.java +172 -0
- data/ext/java/nokogiri/XmlElementContent.java +350 -0
- data/ext/java/nokogiri/XmlElementDecl.java +115 -0
- data/ext/java/nokogiri/XmlEntityDecl.java +129 -0
- data/ext/java/nokogiri/XmlEntityReference.java +42 -0
- data/ext/java/nokogiri/XmlNamespace.java +77 -0
- data/ext/java/nokogiri/XmlNode.java +1399 -0
- data/ext/java/nokogiri/XmlNodeSet.java +248 -0
- data/ext/java/nokogiri/XmlProcessingInstruction.java +70 -0
- data/ext/java/nokogiri/XmlReader.java +373 -0
- data/ext/java/nokogiri/XmlRelaxng.java +166 -0
- data/ext/java/nokogiri/XmlSaxParserContext.java +308 -0
- data/ext/java/nokogiri/XmlSaxPushParser.java +146 -0
- data/ext/java/nokogiri/XmlSchema.java +142 -0
- data/ext/java/nokogiri/XmlSyntaxError.java +84 -0
- data/ext/java/nokogiri/XmlText.java +96 -0
- data/ext/java/nokogiri/XmlXpathContext.java +130 -0
- data/ext/java/nokogiri/XsltStylesheet.java +126 -0
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +181 -0
- data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +39 -0
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +42 -0
- data/ext/java/nokogiri/internals/NokogiriHandler.java +251 -0
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +526 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +136 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +80 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +37 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +54 -0
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +49 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +88 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +23 -0
- data/ext/java/nokogiri/internals/ParserContext.java +235 -0
- data/ext/java/nokogiri/internals/PushInputStream.java +381 -0
- data/ext/java/nokogiri/internals/ReaderNode.java +431 -0
- data/ext/java/nokogiri/internals/SaveContext.java +249 -0
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +35 -0
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +10 -0
- data/ext/java/nokogiri/internals/XmlDomParser.java +45 -0
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +201 -0
- data/ext/java/nokogiri/internals/XmlSaxParser.java +33 -0
- data/ext/nokogiri/depend +32 -0
- data/ext/nokogiri/extconf.rb +61 -32
- data/ext/nokogiri/nokogiri.c +0 -5
- data/ext/nokogiri/nokogiri.h +2 -2
- data/ext/nokogiri/xml_document.c +5 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_node.c +56 -16
- data/ext/nokogiri/xml_node_set.c +7 -7
- data/ext/nokogiri/xml_reader.c +20 -1
- data/ext/nokogiri/xml_relax_ng.c +0 -7
- data/ext/nokogiri/xml_xpath_context.c +2 -0
- data/lib/isorelax.jar +0 -0
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
- data/lib/nokogiri.rb +1 -2
- data/lib/nokogiri/css/generated_parser.rb +155 -148
- data/lib/nokogiri/css/generated_tokenizer.rb +2 -1
- data/lib/nokogiri/css/parser.y +3 -0
- data/lib/nokogiri/css/xpath_visitor.rb +1 -7
- data/lib/nokogiri/html.rb +2 -2
- data/lib/nokogiri/html/document_fragment.rb +7 -4
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +3 -6
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +1 -2
- data/lib/nokogiri/xml/document_fragment.rb +7 -0
- data/lib/nokogiri/xml/node.rb +5 -3
- data/lib/nokogiri/xml/node_set.rb +25 -0
- data/lib/nokogiri/xml/reader.rb +2 -0
- data/lib/nokogiri/xml/sax/document.rb +3 -1
- data/lib/xercesImpl.jar +0 -0
- data/spec/helper.rb +3 -0
- data/spec/xml/reader_spec.rb +307 -0
- data/tasks/test.rb +1 -1
- data/test/css/test_parser.rb +11 -1
- data/test/html/sax/test_parser_context.rb +2 -2
- data/test/html/test_document.rb +2 -2
- data/test/html/test_document_fragment.rb +34 -6
- data/test/test_memory_leak.rb +2 -2
- data/test/test_reader.rb +28 -6
- data/test/test_xslt_transforms.rb +2 -3
- data/test/xml/test_attr.rb +31 -4
- data/test/xml/test_builder.rb +5 -5
- data/test/xml/test_cdata.rb +3 -3
- data/test/xml/test_document.rb +8 -8
- data/test/xml/test_document_fragment.rb +4 -12
- data/test/xml/test_node.rb +1 -1
- data/test/xml/test_node_reparenting.rb +26 -11
- data/test/xml/test_node_set.rb +38 -2
- data/test/xml/test_text.rb +11 -2
- data/test/xml/test_unparented_node.rb +1 -1
- data/test/xml/test_xpath.rb +11 -7
- metadata +68 -5
- data/lib/nokogiri/version_warning.rb +0 -14
@@ -0,0 +1,207 @@
|
|
1
|
+
package nokogiri;
|
2
|
+
|
3
|
+
import static nokogiri.internals.NokogiriHelpers.getLocalNameForNamespace;
|
4
|
+
import static nokogiri.internals.NokogiriHelpers.getLocalPart;
|
5
|
+
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
6
|
+
import static nokogiri.internals.NokogiriHelpers.getPrefix;
|
7
|
+
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
8
|
+
|
9
|
+
import java.util.HashMap;
|
10
|
+
import java.util.Map;
|
11
|
+
import java.util.Set;
|
12
|
+
import java.util.regex.Matcher;
|
13
|
+
import java.util.regex.Pattern;
|
14
|
+
|
15
|
+
import nokogiri.internals.NokogiriNamespaceContext;
|
16
|
+
import nokogiri.internals.SaveContext;
|
17
|
+
|
18
|
+
import org.jruby.Ruby;
|
19
|
+
import org.jruby.RubyArray;
|
20
|
+
import org.jruby.RubyBoolean;
|
21
|
+
import org.jruby.RubyClass;
|
22
|
+
import org.jruby.RubyString;
|
23
|
+
import org.jruby.anno.JRubyClass;
|
24
|
+
import org.jruby.anno.JRubyMethod;
|
25
|
+
import org.jruby.javasupport.JavaUtil;
|
26
|
+
import org.jruby.javasupport.util.RuntimeHelpers;
|
27
|
+
import org.jruby.runtime.ThreadContext;
|
28
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
29
|
+
import org.w3c.dom.Attr;
|
30
|
+
import org.w3c.dom.NamedNodeMap;
|
31
|
+
|
32
|
+
/**
|
33
|
+
*
|
34
|
+
* @author sergio
|
35
|
+
*/
|
36
|
+
@JRubyClass(name="Nokogiri::XML::DocumentFragment", parent="Nokogiri::XML::Node")
|
37
|
+
public class XmlDocumentFragment extends XmlNode {
|
38
|
+
private XmlElement fragmentContext = null;
|
39
|
+
|
40
|
+
public XmlDocumentFragment(Ruby ruby) {
|
41
|
+
this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DocumentFragment"));
|
42
|
+
}
|
43
|
+
|
44
|
+
public XmlDocumentFragment(Ruby ruby, RubyClass klazz) {
|
45
|
+
super(ruby, klazz);
|
46
|
+
}
|
47
|
+
|
48
|
+
// @JRubyMethod(name="new", meta = true)
|
49
|
+
// public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject doc) {
|
50
|
+
// IRubyObject[] argc = new IRubyObject[1];
|
51
|
+
// argc[0] = doc;
|
52
|
+
// return rbNew(context, cls, argc);
|
53
|
+
// }
|
54
|
+
|
55
|
+
@JRubyMethod(name="new", meta = true, required=1, optional=2)
|
56
|
+
public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] argc) {
|
57
|
+
|
58
|
+
if(argc.length < 1) {
|
59
|
+
throw context.getRuntime().newArgumentError(argc.length, 1);
|
60
|
+
}
|
61
|
+
|
62
|
+
if(!(argc[0] instanceof XmlDocument)){
|
63
|
+
throw context.getRuntime().newArgumentError("first parameter must be a Nokogiri::XML::Document instance");
|
64
|
+
}
|
65
|
+
|
66
|
+
XmlDocument doc = (XmlDocument) argc[0];
|
67
|
+
|
68
|
+
// make wellformed fragment, ignore invalid namespace, or add appropriate namespace to parse
|
69
|
+
if (argc.length > 1 && argc[1] instanceof RubyString) {
|
70
|
+
argc[1] = JavaUtil.convertJavaToRuby(context.getRuntime(), addRootTagIfNeeded(context, doc, (String)argc[1].toJava(String.class)));
|
71
|
+
argc[1] = JavaUtil.convertJavaToRuby(context.getRuntime(), ignoreNamespaceIfNeeded(doc, (String)argc[1].toJava(String.class)));
|
72
|
+
argc[1] = JavaUtil.convertJavaToRuby(context.getRuntime(), addNamespaceDeclIfNeeded(doc, (String)argc[1].toJava(String.class)));
|
73
|
+
}
|
74
|
+
|
75
|
+
XmlDocumentFragment fragment = (XmlDocumentFragment) ((RubyClass)cls).allocate();
|
76
|
+
fragment.setDocument(doc);
|
77
|
+
fragment.setNode(doc.getDocument().createDocumentFragment());
|
78
|
+
|
79
|
+
//TODO: Get namespace definitions from doc.
|
80
|
+
if (argc.length == 3 && argc[2] != null && argc[2] instanceof XmlElement) {
|
81
|
+
fragment.fragmentContext = (XmlElement)argc[2];
|
82
|
+
}
|
83
|
+
RuntimeHelpers.invoke(context, fragment, "initialize", argc);
|
84
|
+
return fragment;
|
85
|
+
}
|
86
|
+
|
87
|
+
private static Pattern qname_pattern = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
|
88
|
+
private static Pattern starttag_pattern = Pattern.compile("<[^</>]+>");
|
89
|
+
// welformed_pattern is also used in XmlNode#in_context() method.
|
90
|
+
public static Pattern wellformed_pattern = Pattern.compile("<(.*)>(()|[^>]*)</\\1>|<[^</>]+/>");
|
91
|
+
|
92
|
+
private static String addRootTagIfNeeded(ThreadContext context, XmlDocument doc, String tags) {
|
93
|
+
IRubyObject isHtml = RuntimeHelpers.invoke(context, doc, "html?");
|
94
|
+
if (isHtml instanceof RubyBoolean && ((RubyBoolean)isHtml).isTrue()) return tags;
|
95
|
+
Matcher matcher = wellformed_pattern.matcher(tags);
|
96
|
+
while(matcher.find()) {
|
97
|
+
if (matcher.start() == 0 && matcher.end() == tags.length()) return tags;
|
98
|
+
break;
|
99
|
+
}
|
100
|
+
tags = "<"+ NokogiriNamespaceContext.NOKOGIRI_TEMPORARY_ROOT_TAG + ">" + tags + "</" + NokogiriNamespaceContext.NOKOGIRI_TEMPORARY_ROOT_TAG + ">";
|
101
|
+
return tags;
|
102
|
+
}
|
103
|
+
|
104
|
+
private static String ignoreNamespaceIfNeeded(XmlDocument doc, String tags) {
|
105
|
+
if (doc.getDocument() == null) return tags;
|
106
|
+
if (doc.getDocument().getDocumentElement() == null) return tags;
|
107
|
+
Matcher matcher = qname_pattern.matcher(tags);
|
108
|
+
Map<String, String> rewriteTable = new HashMap<String, String>();
|
109
|
+
while(matcher.find()) {
|
110
|
+
String qName = matcher.group();
|
111
|
+
NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
|
112
|
+
if (!isNamespaceDefined(qName, nodeMap)) {
|
113
|
+
rewriteTable.put(qName, getLocalPart(qName));
|
114
|
+
}
|
115
|
+
}
|
116
|
+
Set<String> keys = rewriteTable.keySet();
|
117
|
+
for (String key : keys) {
|
118
|
+
tags = tags.replace(key, rewriteTable.get(key));
|
119
|
+
}
|
120
|
+
return tags;
|
121
|
+
}
|
122
|
+
|
123
|
+
private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
|
124
|
+
if (isNamespace(qName.intern())) return true;
|
125
|
+
for (int i=0; i < nodeMap.getLength(); i++) {
|
126
|
+
Attr attr = (Attr)nodeMap.item(i);
|
127
|
+
if (isNamespace(attr.getNodeName())) {
|
128
|
+
String localPart = getLocalNameForNamespace(attr.getNodeName());
|
129
|
+
if (getPrefix(qName).equals(localPart)) {
|
130
|
+
return true;
|
131
|
+
}
|
132
|
+
}
|
133
|
+
}
|
134
|
+
return false;
|
135
|
+
}
|
136
|
+
|
137
|
+
private static String addNamespaceDeclIfNeeded(XmlDocument doc, String tags) {
|
138
|
+
if (doc.getDocument() == null) return tags;
|
139
|
+
if (doc.getDocument().getDocumentElement() == null) return tags;
|
140
|
+
Matcher matcher = starttag_pattern.matcher(tags);
|
141
|
+
Map<String, String> rewriteTable = new HashMap<String, String>();
|
142
|
+
while(matcher.find()) {
|
143
|
+
String start_tag = matcher.group();
|
144
|
+
Matcher matcher2 = qname_pattern.matcher(start_tag);
|
145
|
+
while(matcher2.find()) {
|
146
|
+
String qName = matcher2.group();
|
147
|
+
NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
|
148
|
+
if (isNamespaceDefined(qName, nodeMap)) {
|
149
|
+
String namespaceDecl = getNamespceDecl(getPrefix(qName), nodeMap);
|
150
|
+
if (namespaceDecl != null) {
|
151
|
+
rewriteTable.put("<"+qName+">", "<"+qName + " " + namespaceDecl+">");
|
152
|
+
}
|
153
|
+
}
|
154
|
+
}
|
155
|
+
}
|
156
|
+
Set<String> keys = rewriteTable.keySet();
|
157
|
+
for (String key : keys) {
|
158
|
+
tags = tags.replace(key, rewriteTable.get(key));
|
159
|
+
}
|
160
|
+
|
161
|
+
return tags;
|
162
|
+
}
|
163
|
+
|
164
|
+
private static String getNamespceDecl(String prefix, NamedNodeMap nodeMap) {
|
165
|
+
for (int i=0; i < nodeMap.getLength(); i++) {
|
166
|
+
Attr attr = (Attr)nodeMap.item(i);
|
167
|
+
if (prefix.equals(attr.getLocalName())) {
|
168
|
+
return attr.getName() + "=\"" + attr.getValue() + "\"";
|
169
|
+
}
|
170
|
+
}
|
171
|
+
return null;
|
172
|
+
}
|
173
|
+
|
174
|
+
public XmlElement getFragmentContext() {
|
175
|
+
return fragmentContext;
|
176
|
+
}
|
177
|
+
|
178
|
+
//@Override
|
179
|
+
public void add_child(ThreadContext context, XmlNode child) {
|
180
|
+
// Some magic for DocumentFragment
|
181
|
+
|
182
|
+
Ruby ruby = context.getRuntime();
|
183
|
+
XmlNodeSet children = (XmlNodeSet) child.children(context);
|
184
|
+
|
185
|
+
long length = children.length();
|
186
|
+
|
187
|
+
RubyArray childrenArray = children.convertToArray();
|
188
|
+
|
189
|
+
if(length != 0) {
|
190
|
+
for(int i = 0; i < length; i++) {
|
191
|
+
XmlNode item = (XmlNode) ((XmlNode) childrenArray.aref(ruby.newFixnum(i))).dup_implementation(context, true);
|
192
|
+
add_child(context, item);
|
193
|
+
}
|
194
|
+
}
|
195
|
+
}
|
196
|
+
|
197
|
+
@Override
|
198
|
+
public void relink_namespace(ThreadContext context) {
|
199
|
+
((XmlNodeSet) children(context)).relink_namespace(context);
|
200
|
+
}
|
201
|
+
|
202
|
+
@Override
|
203
|
+
public void saveContent(ThreadContext context, SaveContext ctx) {
|
204
|
+
saveNodeListContent(context, (XmlNodeSet) children(context), ctx);
|
205
|
+
}
|
206
|
+
|
207
|
+
}
|
@@ -0,0 +1,427 @@
|
|
1
|
+
package nokogiri;
|
2
|
+
|
3
|
+
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
4
|
+
import static nokogiri.internals.NokogiriHelpers.nonEmptyStringOrNil;
|
5
|
+
import static nokogiri.internals.NokogiriHelpers.stringOrNil;
|
6
|
+
import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
|
7
|
+
import nokogiri.internals.NokogiriHelpers;
|
8
|
+
import nokogiri.internals.SaveContext;
|
9
|
+
|
10
|
+
import org.apache.xerces.xni.QName;
|
11
|
+
import org.cyberneko.dtd.DTDConfiguration;
|
12
|
+
import org.jruby.Ruby;
|
13
|
+
import org.jruby.RubyArray;
|
14
|
+
import org.jruby.RubyClass;
|
15
|
+
import org.jruby.RubyHash;
|
16
|
+
import org.jruby.anno.JRubyClass;
|
17
|
+
import org.jruby.anno.JRubyMethod;
|
18
|
+
import org.jruby.runtime.ThreadContext;
|
19
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
20
|
+
import org.w3c.dom.Document;
|
21
|
+
import org.w3c.dom.DocumentType;
|
22
|
+
import org.w3c.dom.Element;
|
23
|
+
import org.w3c.dom.Node;
|
24
|
+
|
25
|
+
@JRubyClass(name="Nokogiri::XML::DTD", parent="Nokogiri::XML::Node")
|
26
|
+
public class XmlDtd extends XmlNode {
|
27
|
+
protected RubyArray allDecls = null;
|
28
|
+
|
29
|
+
/** cache of children, Nokogiri::XML::NodeSet */
|
30
|
+
protected IRubyObject children = null;
|
31
|
+
|
32
|
+
/** cache of name => XmlAttributeDecl */
|
33
|
+
protected RubyHash attributes = null;
|
34
|
+
|
35
|
+
/** cache of name => XmlElementDecl */
|
36
|
+
protected RubyHash elements = null;
|
37
|
+
|
38
|
+
/** cache of name => XmlEntityDecl */
|
39
|
+
protected RubyHash entities = null;
|
40
|
+
|
41
|
+
/** cache of name => Nokogiri::XML::Notation */
|
42
|
+
protected RubyHash notations = null;
|
43
|
+
protected RubyClass notationClass;
|
44
|
+
|
45
|
+
/** temporary store of content models before they are added to
|
46
|
+
* their XmlElementDecl. */
|
47
|
+
protected RubyHash contentModels;
|
48
|
+
|
49
|
+
/** node name */
|
50
|
+
protected IRubyObject name;
|
51
|
+
|
52
|
+
/** public ID (or external ID) */
|
53
|
+
protected IRubyObject pubId;
|
54
|
+
|
55
|
+
/** system ID */
|
56
|
+
protected IRubyObject sysId;
|
57
|
+
|
58
|
+
public XmlDtd(Ruby ruby, RubyClass rubyClass) {
|
59
|
+
super(ruby, rubyClass);
|
60
|
+
}
|
61
|
+
|
62
|
+
public XmlDtd(Ruby ruby) {
|
63
|
+
this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DTD"), null);
|
64
|
+
}
|
65
|
+
|
66
|
+
public XmlDtd(Ruby ruby, Node dtd) {
|
67
|
+
this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DTD"), dtd);
|
68
|
+
}
|
69
|
+
|
70
|
+
public XmlDtd(Ruby ruby, RubyClass rubyClass, Node dtd) {
|
71
|
+
super(ruby, rubyClass, dtd);
|
72
|
+
notationClass = (RubyClass)
|
73
|
+
ruby.getClassFromPath("Nokogiri::XML::Notation");
|
74
|
+
|
75
|
+
name = pubId = sysId = ruby.getNil();
|
76
|
+
if (dtd == null) return;
|
77
|
+
|
78
|
+
// This is the dtd declaration stored in the document; it
|
79
|
+
// contains the DTD name (root element) and public and system
|
80
|
+
// ids. The actual declarations are in the NekoDTD 'dtd'
|
81
|
+
// variable. I don't know of a way to consolidate the two.
|
82
|
+
|
83
|
+
DocumentType otherDtd = dtd.getOwnerDocument().getDoctype();
|
84
|
+
if (otherDtd != null) {
|
85
|
+
name = stringOrNil(ruby, otherDtd.getNodeName());
|
86
|
+
pubId = nonEmptyStringOrNil(ruby, otherDtd.getPublicId());
|
87
|
+
sysId = nonEmptyStringOrNil(ruby, otherDtd.getSystemId());
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
public static XmlDtd newEmpty(Ruby ruby,
|
92
|
+
Document doc,
|
93
|
+
IRubyObject name,
|
94
|
+
IRubyObject external_id,
|
95
|
+
IRubyObject system_id) {
|
96
|
+
Element placeHolder = doc.createElement("dtd_placeholder");
|
97
|
+
XmlDtd dtd = new XmlDtd(ruby, placeHolder);
|
98
|
+
dtd.name = name;
|
99
|
+
dtd.pubId = external_id;
|
100
|
+
dtd.sysId = system_id;
|
101
|
+
return dtd;
|
102
|
+
}
|
103
|
+
|
104
|
+
|
105
|
+
/**
|
106
|
+
* Create an unparented element that contains DTD declarations
|
107
|
+
* parsed from the internal subset attached as user data to
|
108
|
+
* <code>doc</code>. The attached dtd must be the tree from
|
109
|
+
* NekoDTD. The owner document of the returned tree will be
|
110
|
+
* <code>doc</doc>.
|
111
|
+
*
|
112
|
+
* NekoDTD parser returns a new document node containing elements
|
113
|
+
* representing the dtd declarations. The plan is to get the root
|
114
|
+
* element and adopt it into the correct document, stipping the
|
115
|
+
* Document provided by NekoDTD.
|
116
|
+
*
|
117
|
+
*/
|
118
|
+
public static XmlDtd newFromInternalSubset(Ruby ruby, Document doc) {
|
119
|
+
Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT);
|
120
|
+
if (dtdTree_ == null)
|
121
|
+
return new XmlDtd(ruby);
|
122
|
+
|
123
|
+
Node dtdTree = (Node) dtdTree_;
|
124
|
+
Node dtd = getInternalSubset(dtdTree);
|
125
|
+
if (dtd == null) {
|
126
|
+
return new XmlDtd(ruby);
|
127
|
+
} else {
|
128
|
+
// Import the node into doc so it has the correct owner document.
|
129
|
+
dtd = doc.importNode(dtd, true);
|
130
|
+
return new XmlDtd(ruby, dtd);
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
public static IRubyObject newFromExternalSubset(Ruby ruby, Document doc) {
|
135
|
+
Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT);
|
136
|
+
if (dtdTree_ == null) {
|
137
|
+
return ruby.getNil();
|
138
|
+
}
|
139
|
+
|
140
|
+
Node dtdTree = (Node) dtdTree_;
|
141
|
+
Node dtd = getExternalSubset(dtdTree);
|
142
|
+
if (dtd == null) {
|
143
|
+
return ruby.getNil();
|
144
|
+
} else if (!dtd.hasChildNodes()) {
|
145
|
+
return ruby.getNil();
|
146
|
+
} else {
|
147
|
+
// Import the node into doc so it has the correct owner document.
|
148
|
+
dtd = doc.importNode(dtd, true);
|
149
|
+
return new XmlDtd(ruby, dtd);
|
150
|
+
}
|
151
|
+
}
|
152
|
+
|
153
|
+
/*
|
154
|
+
* <code>dtd</code> is the document node of a NekoDTD tree.
|
155
|
+
* NekoDTD tree looks like this:
|
156
|
+
*
|
157
|
+
* <code><pre>
|
158
|
+
* [#document: null]
|
159
|
+
* [#comment: ...]
|
160
|
+
* [#comment: ...]
|
161
|
+
* [dtd: null] // a DocumentType; isDTD(node) => false
|
162
|
+
* [dtd: null] // root of dtd, an Element node; isDTD(node) => true
|
163
|
+
* ... decls, content models, etc. ...
|
164
|
+
* [externalSubset: null] pubid="the pubid" sysid="the sysid"
|
165
|
+
* ... external subset decls, etc. ...
|
166
|
+
* </pre></code>
|
167
|
+
*/
|
168
|
+
protected static Node getInternalSubset(Node dtdTree) {
|
169
|
+
Node root;
|
170
|
+
for (root = dtdTree.getFirstChild(); ; root = root.getNextSibling()) {
|
171
|
+
if (root == null)
|
172
|
+
return null;
|
173
|
+
else if (isDTD(root))
|
174
|
+
return root; // we have second dtd which is root
|
175
|
+
}
|
176
|
+
}
|
177
|
+
|
178
|
+
protected static Node getExternalSubset(Node dtdTree) {
|
179
|
+
Node dtd = getInternalSubset(dtdTree);
|
180
|
+
if (dtd == null) return null;
|
181
|
+
for (Node ext = dtd.getFirstChild(); ; ext = ext.getNextSibling()) {
|
182
|
+
if (ext == null)
|
183
|
+
return null;
|
184
|
+
else if (isExternalSubset(ext))
|
185
|
+
return ext;
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
/**
|
190
|
+
* This overrides the #attributes method defined in
|
191
|
+
* lib/nokogiri/xml/node.rb.
|
192
|
+
*/
|
193
|
+
@JRubyMethod
|
194
|
+
public IRubyObject attributes(ThreadContext context) {
|
195
|
+
if (attributes == null) extractDecls(context);
|
196
|
+
|
197
|
+
return attributes;
|
198
|
+
}
|
199
|
+
|
200
|
+
@JRubyMethod
|
201
|
+
public IRubyObject elements(ThreadContext context) {
|
202
|
+
if (elements == null) extractDecls(context);
|
203
|
+
|
204
|
+
return elements;
|
205
|
+
}
|
206
|
+
|
207
|
+
@JRubyMethod
|
208
|
+
public IRubyObject entities(ThreadContext context) {
|
209
|
+
if (entities == null) extractDecls(context);
|
210
|
+
|
211
|
+
return entities;
|
212
|
+
}
|
213
|
+
|
214
|
+
@JRubyMethod
|
215
|
+
public IRubyObject notations(ThreadContext context) {
|
216
|
+
if (notations == null) extractDecls(context);
|
217
|
+
|
218
|
+
return notations;
|
219
|
+
}
|
220
|
+
|
221
|
+
/**
|
222
|
+
* Our "node" object is as-returned by NekoDTD. The actual
|
223
|
+
* "children" that we're interested in (Attribute declarations,
|
224
|
+
* etc.) are a few layers deep.
|
225
|
+
*/
|
226
|
+
@Override
|
227
|
+
@JRubyMethod
|
228
|
+
public IRubyObject children(ThreadContext context) {
|
229
|
+
if (children == null) extractDecls(context);
|
230
|
+
|
231
|
+
return children;
|
232
|
+
}
|
233
|
+
|
234
|
+
/**
|
235
|
+
* Returns the name of the dtd.
|
236
|
+
*/
|
237
|
+
@Override
|
238
|
+
@JRubyMethod
|
239
|
+
public IRubyObject node_name(ThreadContext context) {
|
240
|
+
return name;
|
241
|
+
}
|
242
|
+
|
243
|
+
@Override
|
244
|
+
@JRubyMethod(name = "node_name=")
|
245
|
+
public IRubyObject node_name_set(ThreadContext context, IRubyObject name) {
|
246
|
+
throw context.getRuntime()
|
247
|
+
.newRuntimeError("cannot change name of DTD");
|
248
|
+
}
|
249
|
+
|
250
|
+
@JRubyMethod
|
251
|
+
public IRubyObject system_id(ThreadContext context) {
|
252
|
+
return sysId;
|
253
|
+
}
|
254
|
+
|
255
|
+
@JRubyMethod
|
256
|
+
public IRubyObject external_id(ThreadContext context) {
|
257
|
+
return pubId;
|
258
|
+
}
|
259
|
+
|
260
|
+
@JRubyMethod
|
261
|
+
public IRubyObject validate(ThreadContext context, IRubyObject doc) {
|
262
|
+
RubyArray errors = RubyArray.newArray(context.getRuntime());
|
263
|
+
if (doc instanceof XmlDocument) {
|
264
|
+
errors = (RubyArray) ((XmlDocument)doc).getInstanceVariable("@errors");
|
265
|
+
}
|
266
|
+
return errors;
|
267
|
+
}
|
268
|
+
|
269
|
+
public static boolean nameEquals(Node node, QName name) {
|
270
|
+
return name.localpart.equals(node.getNodeName());
|
271
|
+
}
|
272
|
+
|
273
|
+
public static boolean isExternalSubset(Node node) {
|
274
|
+
return nameEquals(node, DTDConfiguration.E_EXTERNAL_SUBSET);
|
275
|
+
}
|
276
|
+
|
277
|
+
/**
|
278
|
+
* Checks instanceof Element so we return false for a DocumentType
|
279
|
+
* node (NekoDTD uses Element for all its nodes).
|
280
|
+
*/
|
281
|
+
public static boolean isDTD(Node node) {
|
282
|
+
return (node instanceof Element &&
|
283
|
+
nameEquals(node, DTDConfiguration.E_DTD));
|
284
|
+
}
|
285
|
+
|
286
|
+
public static boolean isAttributeDecl(Node node) {
|
287
|
+
return nameEquals(node, DTDConfiguration.E_ATTRIBUTE_DECL);
|
288
|
+
}
|
289
|
+
|
290
|
+
public static boolean isElementDecl(Node node) {
|
291
|
+
return nameEquals(node, DTDConfiguration.E_ELEMENT_DECL);
|
292
|
+
}
|
293
|
+
|
294
|
+
public static boolean isEntityDecl(Node node) {
|
295
|
+
return (nameEquals(node, DTDConfiguration.E_INTERNAL_ENTITY_DECL) ||
|
296
|
+
nameEquals(node, DTDConfiguration.E_UNPARSED_ENTITY_DECL));
|
297
|
+
}
|
298
|
+
|
299
|
+
public static boolean isNotationDecl(Node node) {
|
300
|
+
return nameEquals(node, DTDConfiguration.E_NOTATION_DECL);
|
301
|
+
}
|
302
|
+
|
303
|
+
public static boolean isContentModel(Node node) {
|
304
|
+
return nameEquals(node, DTDConfiguration.E_CONTENT_MODEL);
|
305
|
+
}
|
306
|
+
|
307
|
+
/**
|
308
|
+
* Recursively extract various DTD declarations and store them in
|
309
|
+
* the various collections.
|
310
|
+
*/
|
311
|
+
protected void extractDecls(ThreadContext context) {
|
312
|
+
Ruby runtime = context.getRuntime();
|
313
|
+
|
314
|
+
// initialize data structures
|
315
|
+
allDecls = RubyArray.newArray(runtime);
|
316
|
+
attributes = RubyHash.newHash(runtime);
|
317
|
+
elements = RubyHash.newHash(runtime);
|
318
|
+
entities = RubyHash.newHash(runtime);
|
319
|
+
notations = RubyHash.newHash(runtime);
|
320
|
+
contentModels = RubyHash.newHash(runtime);
|
321
|
+
children = runtime.getNil();
|
322
|
+
|
323
|
+
// recursively extract decls
|
324
|
+
if (node == null) return; // leave all the decl hash's empty
|
325
|
+
extractDecls(context, node.getFirstChild());
|
326
|
+
|
327
|
+
// convert allDecls to a NodeSet
|
328
|
+
children =
|
329
|
+
new XmlNodeSet(runtime,
|
330
|
+
getNokogiriClass(runtime, "Nokogiri::XML::NodeSet"),
|
331
|
+
allDecls);
|
332
|
+
|
333
|
+
// add attribute decls as attributes to the matching element decl
|
334
|
+
RubyArray keys = attributes.keys();
|
335
|
+
for (int i = 0; i < keys.getLength(); ++i) {
|
336
|
+
IRubyObject akey = keys.entry(i);
|
337
|
+
IRubyObject val;
|
338
|
+
|
339
|
+
val = attributes.op_aref(context, akey);
|
340
|
+
if (val.isNil()) continue;
|
341
|
+
XmlAttributeDecl attrDecl = (XmlAttributeDecl) val;
|
342
|
+
IRubyObject ekey = attrDecl.element_name(context);
|
343
|
+
val = elements.op_aref(context, ekey);
|
344
|
+
if (val.isNil()) continue;
|
345
|
+
XmlElementDecl elemDecl = (XmlElementDecl) val;
|
346
|
+
|
347
|
+
elemDecl.appendAttrDecl(attrDecl);
|
348
|
+
}
|
349
|
+
|
350
|
+
// add content models to the matching element decl
|
351
|
+
keys = contentModels.keys();
|
352
|
+
for (int i = 0; i < keys.getLength(); ++i) {
|
353
|
+
IRubyObject key = keys.entry(i);
|
354
|
+
IRubyObject cm = contentModels.op_aref(context, key);
|
355
|
+
|
356
|
+
IRubyObject elem = elements.op_aref(context, key);
|
357
|
+
if (elem.isNil()) continue;
|
358
|
+
if (((XmlElementDecl)elem).isEmpty()) continue;
|
359
|
+
((XmlElementDecl) elem).setContentModel(cm);
|
360
|
+
}
|
361
|
+
}
|
362
|
+
|
363
|
+
/**
|
364
|
+
* The <code>node</code> is either the first child of the root dtd
|
365
|
+
* node (as returned by getInternalSubset()) or the first child of
|
366
|
+
* the external subset node (as returned by getExternalSubset()).
|
367
|
+
*
|
368
|
+
* This recursive function will not descend into an
|
369
|
+
* 'externalSubset' node, thus for an internal subset it only
|
370
|
+
* extracts nodes in the internal subset, and for an external
|
371
|
+
* subset it extracts everything and assumess <code>node</code>
|
372
|
+
* and all children are part of the external subset.
|
373
|
+
*/
|
374
|
+
protected void extractDecls(ThreadContext context, Node node) {
|
375
|
+
while (node != null) {
|
376
|
+
if (isExternalSubset(node)) {
|
377
|
+
return;
|
378
|
+
} else if (isAttributeDecl(node)) {
|
379
|
+
XmlAttributeDecl decl = (XmlAttributeDecl)
|
380
|
+
XmlAttributeDecl.create(context, node);
|
381
|
+
attributes.op_aset(context, decl.attribute_name(context), decl);
|
382
|
+
allDecls.append(decl);
|
383
|
+
} else if (isElementDecl(node)) {
|
384
|
+
XmlElementDecl decl = (XmlElementDecl)
|
385
|
+
XmlElementDecl.create(context, node);
|
386
|
+
elements.op_aset(context, decl.element_name(context), decl);
|
387
|
+
allDecls.append(decl);
|
388
|
+
} else if (isEntityDecl(node)) {
|
389
|
+
XmlEntityDecl decl = (XmlEntityDecl)
|
390
|
+
XmlEntityDecl.create(context, node);
|
391
|
+
entities.op_aset(context, decl.node_name(context), decl);
|
392
|
+
allDecls.append(decl);
|
393
|
+
} else if (isNotationDecl(node)) {
|
394
|
+
XmlNode tmp = (XmlNode)
|
395
|
+
NokogiriHelpers.constructNode(context.getRuntime(), node);
|
396
|
+
IRubyObject decl = invoke(context, notationClass, "new",
|
397
|
+
tmp.getAttribute(context, "name"),
|
398
|
+
tmp.getAttribute(context, "pubid"),
|
399
|
+
tmp.getAttribute(context, "sysid"));
|
400
|
+
notations.op_aset(context,
|
401
|
+
tmp.getAttribute(context, "name"), decl);
|
402
|
+
allDecls.append(decl);
|
403
|
+
} else if (isContentModel(node)) {
|
404
|
+
XmlElementContent cm =
|
405
|
+
new XmlElementContent(context.getRuntime(),
|
406
|
+
(XmlDocument) document(context),
|
407
|
+
node);
|
408
|
+
contentModels.op_aset(context, cm.element_name(context), cm);
|
409
|
+
} else {
|
410
|
+
// recurse
|
411
|
+
extractDecls(context, node.getFirstChild());
|
412
|
+
}
|
413
|
+
|
414
|
+
node = node.getNextSibling();
|
415
|
+
}
|
416
|
+
}
|
417
|
+
|
418
|
+
public void saveContent(ThreadContext context, SaveContext ctx) {
|
419
|
+
ctx.append("<!DOCTYPE " + name + " ");
|
420
|
+
if (pubId != null) {
|
421
|
+
ctx.append("PUBLIC \"" + pubId + "\" \"" + sysId + "\">");
|
422
|
+
} else if (sysId != null) {
|
423
|
+
ctx.append("SYSTEM " + sysId);
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
}
|