nokogiri 1.5.0.beta.4 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +34 -0
- data/CHANGELOG.rdoc +40 -1
- data/Manifest.txt +11 -2
- data/README.rdoc +1 -1
- data/Rakefile +100 -104
- data/bin/nokogiri +1 -2
- data/ext/nokogiri/nokogiri.c +24 -1
- data/ext/nokogiri/xml_io.c +32 -7
- data/ext/nokogiri/xml_node.c +14 -13
- data/ext/nokogiri/xml_sax_parser.c +9 -4
- data/ext/nokogiri/xslt_stylesheet.c +7 -1
- data/lib/nokogiri.rb +3 -22
- data/lib/nokogiri/css.rb +4 -0
- data/lib/nokogiri/html/document.rb +10 -14
- data/lib/nokogiri/version.rb +76 -23
- data/lib/nokogiri/xml/builder.rb +7 -0
- data/lib/nokogiri/xml/document.rb +17 -1
- data/lib/nokogiri/xml/document_fragment.rb +14 -0
- data/lib/nokogiri/xml/node.rb +36 -28
- data/lib/nokogiri/xml/node/save_options.rb +17 -1
- data/lib/nokogiri/xml/node_set.rb +7 -0
- data/lib/nokogiri/xml/parse_options.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +6 -6
- data/lib/nokogiri/xml/schema.rb +7 -1
- data/nokogiri_help_responses.md +40 -0
- data/tasks/cross_compile.rb +134 -159
- data/tasks/nokogiri.org.rb +18 -0
- data/tasks/test.rb +1 -1
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/noencoding.html +47 -0
- data/test/helper.rb +2 -0
- data/test/html/test_document.rb +15 -0
- data/test/html/test_document_encoding.rb +13 -0
- data/test/test_memory_leak.rb +20 -0
- data/test/test_reader.rb +22 -0
- data/test/test_xslt_transforms.rb +6 -2
- data/test/xml/node/test_save_options.rb +10 -2
- data/test/xml/test_builder.rb +17 -0
- data/test/xml/test_document.rb +22 -0
- data/test/xml/test_node.rb +19 -1
- data/test/xml/test_node_reparenting.rb +16 -3
- data/test/xml/test_node_set.rb +34 -0
- data/test/xml/test_schema.rb +5 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- metadata +185 -157
- data/deps.rip +0 -5
- data/ext/java/nokogiri/EncodingHandler.java +0 -124
- data/ext/java/nokogiri/HtmlDocument.java +0 -149
- data/ext/java/nokogiri/HtmlElementDescription.java +0 -145
- data/ext/java/nokogiri/HtmlEntityLookup.java +0 -79
- data/ext/java/nokogiri/HtmlSaxParserContext.java +0 -259
- data/ext/java/nokogiri/NokogiriService.java +0 -535
- data/ext/java/nokogiri/XmlAttr.java +0 -191
- data/ext/java/nokogiri/XmlAttributeDecl.java +0 -130
- data/ext/java/nokogiri/XmlCdata.java +0 -91
- data/ext/java/nokogiri/XmlComment.java +0 -86
- data/ext/java/nokogiri/XmlDocument.java +0 -529
- data/ext/java/nokogiri/XmlDocumentFragment.java +0 -217
- data/ext/java/nokogiri/XmlDtd.java +0 -467
- data/ext/java/nokogiri/XmlElement.java +0 -222
- data/ext/java/nokogiri/XmlElementContent.java +0 -382
- data/ext/java/nokogiri/XmlElementDecl.java +0 -148
- data/ext/java/nokogiri/XmlEntityDecl.java +0 -162
- data/ext/java/nokogiri/XmlEntityReference.java +0 -75
- data/ext/java/nokogiri/XmlNamespace.java +0 -128
- data/ext/java/nokogiri/XmlNode.java +0 -1399
- data/ext/java/nokogiri/XmlNodeSet.java +0 -311
- data/ext/java/nokogiri/XmlProcessingInstruction.java +0 -103
- data/ext/java/nokogiri/XmlReader.java +0 -411
- data/ext/java/nokogiri/XmlRelaxng.java +0 -144
- data/ext/java/nokogiri/XmlSaxParserContext.java +0 -367
- data/ext/java/nokogiri/XmlSaxPushParser.java +0 -184
- data/ext/java/nokogiri/XmlSchema.java +0 -319
- data/ext/java/nokogiri/XmlSyntaxError.java +0 -119
- data/ext/java/nokogiri/XmlText.java +0 -136
- data/ext/java/nokogiri/XmlXpathContext.java +0 -179
- data/ext/java/nokogiri/XsltStylesheet.java +0 -183
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +0 -206
- data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +0 -73
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +0 -86
- data/ext/java/nokogiri/internals/NokogiriHandler.java +0 -327
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +0 -582
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +0 -171
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +0 -118
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +0 -74
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +0 -121
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +0 -79
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +0 -126
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +0 -56
- data/ext/java/nokogiri/internals/ParserContext.java +0 -278
- data/ext/java/nokogiri/internals/PushInputStream.java +0 -411
- data/ext/java/nokogiri/internals/ReaderNode.java +0 -474
- data/ext/java/nokogiri/internals/SaveContext.java +0 -288
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +0 -76
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +0 -42
- data/ext/java/nokogiri/internals/XmlDomParser.java +0 -77
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +0 -238
- data/ext/java/nokogiri/internals/XmlSaxParser.java +0 -65
- data/ext/java/nokogiri/internals/XsltExtensionFunction.java +0 -72
- data/lib/isorelax.jar +0 -0
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
- data/lib/xercesImpl.jar +0 -0
@@ -1,206 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2011:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
|
-
package nokogiri.internals;
|
34
|
-
|
35
|
-
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
36
|
-
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
37
|
-
import nokogiri.HtmlDocument;
|
38
|
-
import nokogiri.NokogiriService;
|
39
|
-
import nokogiri.XmlDocument;
|
40
|
-
|
41
|
-
import org.apache.xerces.parsers.DOMParser;
|
42
|
-
import org.apache.xerces.xni.Augmentations;
|
43
|
-
import org.apache.xerces.xni.QName;
|
44
|
-
import org.apache.xerces.xni.XMLAttributes;
|
45
|
-
import org.apache.xerces.xni.XNIException;
|
46
|
-
import org.apache.xerces.xni.parser.XMLDocumentFilter;
|
47
|
-
import org.apache.xerces.xni.parser.XMLParserConfiguration;
|
48
|
-
import org.cyberneko.html.HTMLConfiguration;
|
49
|
-
import org.cyberneko.html.filters.DefaultFilter;
|
50
|
-
import org.jruby.Ruby;
|
51
|
-
import org.jruby.RubyClass;
|
52
|
-
import org.jruby.runtime.ThreadContext;
|
53
|
-
import org.jruby.runtime.builtin.IRubyObject;
|
54
|
-
import org.w3c.dom.Document;
|
55
|
-
|
56
|
-
/**
|
57
|
-
* Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
|
58
|
-
*
|
59
|
-
* @author sergio
|
60
|
-
* @author Yoko Harada <yokolet@gmail.com>
|
61
|
-
*/
|
62
|
-
public class HtmlDomParserContext extends XmlDomParserContext {
|
63
|
-
|
64
|
-
public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
|
65
|
-
super(runtime, options);
|
66
|
-
}
|
67
|
-
|
68
|
-
public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
|
69
|
-
super(runtime, encoding, options);
|
70
|
-
}
|
71
|
-
|
72
|
-
@Override
|
73
|
-
protected void initErrorHandler() {
|
74
|
-
if (options.strict) {
|
75
|
-
errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
|
76
|
-
} else {
|
77
|
-
errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning);
|
78
|
-
}
|
79
|
-
}
|
80
|
-
|
81
|
-
@Override
|
82
|
-
protected void initParser(Ruby runtime) {
|
83
|
-
XMLParserConfiguration config = new HTMLConfiguration();
|
84
|
-
XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
|
85
|
-
XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
|
86
|
-
XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
|
87
|
-
|
88
|
-
config.setErrorHandler(this.errorHandler);
|
89
|
-
parser = new DOMParser(config);
|
90
|
-
|
91
|
-
// see http://nekohtml.sourceforge.net/settings.html for details
|
92
|
-
setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
|
93
|
-
setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
|
94
|
-
setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
|
95
|
-
setFeature("http://cyberneko.org/html/features/report-errors", true);
|
96
|
-
setFeature("http://xml.org/sax/features/namespaces", false);
|
97
|
-
setProperty("http://cyberneko.org/html/properties/filters", filters);
|
98
|
-
setFeature("http://cyberneko.org/html/features/insert-doctype", true);
|
99
|
-
}
|
100
|
-
|
101
|
-
/**
|
102
|
-
* Enable NekoHTML feature for balancing tags in a document fragment.
|
103
|
-
*
|
104
|
-
* This method is used in XmlNode#in_context method.
|
105
|
-
*/
|
106
|
-
public void enableDocumentFragment() {
|
107
|
-
setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
|
108
|
-
}
|
109
|
-
|
110
|
-
@Override
|
111
|
-
protected XmlDocument getNewEmptyDocument(ThreadContext context) {
|
112
|
-
IRubyObject[] args = new IRubyObject[0];
|
113
|
-
return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document"), args);
|
114
|
-
}
|
115
|
-
|
116
|
-
@Override
|
117
|
-
protected XmlDocument wrapDocument(ThreadContext context,
|
118
|
-
RubyClass klazz,
|
119
|
-
Document document) {
|
120
|
-
HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
|
121
|
-
htmlDocument.setNode(context, document);
|
122
|
-
htmlDocument.setEncoding(ruby_encoding);
|
123
|
-
return htmlDocument;
|
124
|
-
}
|
125
|
-
|
126
|
-
/**
|
127
|
-
* Filter to strip out attributes that pertain to XML namespaces.
|
128
|
-
*
|
129
|
-
* @author sergio
|
130
|
-
* @author Patrick Mahoney <pat@polycrystal.org>
|
131
|
-
*/
|
132
|
-
public static class RemoveNSAttrsFilter extends DefaultFilter {
|
133
|
-
@Override
|
134
|
-
public void startElement(QName element, XMLAttributes attrs,
|
135
|
-
Augmentations augs) throws XNIException {
|
136
|
-
int i;
|
137
|
-
for (i = 0; i < attrs.getLength(); ++i) {
|
138
|
-
if (isNamespace(attrs.getQName(i))) {
|
139
|
-
attrs.removeAttributeAt(i);
|
140
|
-
--i;
|
141
|
-
}
|
142
|
-
}
|
143
|
-
|
144
|
-
element.uri = null;
|
145
|
-
super.startElement(element, attrs, augs);
|
146
|
-
}
|
147
|
-
}
|
148
|
-
|
149
|
-
public static class ElementValidityCheckFilter extends DefaultFilter {
|
150
|
-
private NokogiriErrorHandler errorHandler;
|
151
|
-
|
152
|
-
private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) {
|
153
|
-
this.errorHandler = errorHandler;
|
154
|
-
}
|
155
|
-
|
156
|
-
// element names from xhtml1-strict.dtd
|
157
|
-
private static String[][] element_names = {
|
158
|
-
{"a", "abbr", "acronym", "address", "area"},
|
159
|
-
{"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
|
160
|
-
{"caption", "cite", "code", "col", "colgroup"},
|
161
|
-
{"dd", "del", "dfn", "div", "dl", "dt"},
|
162
|
-
{"em"},
|
163
|
-
{"fieldset", "font", "form", "frame", "frameset"},
|
164
|
-
{}, // g
|
165
|
-
{"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
|
166
|
-
{"i", "iframe", "img", "input", "ins"},
|
167
|
-
{}, // j
|
168
|
-
{"kbd"},
|
169
|
-
{"label", "legend", "li", "link"},
|
170
|
-
{"map", "meta"},
|
171
|
-
{"noframes", "noscript"},
|
172
|
-
{"object", "ol", "optgroup", "option"},
|
173
|
-
{"p", "param", "pre"},
|
174
|
-
{"q"},
|
175
|
-
{}, // r
|
176
|
-
{"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
|
177
|
-
{"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
|
178
|
-
{"u", "ul"},
|
179
|
-
{"var"},
|
180
|
-
{}, // w
|
181
|
-
{}, // x
|
182
|
-
{}, // y
|
183
|
-
{} // z
|
184
|
-
};
|
185
|
-
|
186
|
-
private boolean isValid(String testee) {
|
187
|
-
char[] c = testee.toCharArray();
|
188
|
-
int index = new Integer(c[0]) - 97;
|
189
|
-
if (index > 25) return false;
|
190
|
-
for (int i=0; i<element_names[index].length; i++) {
|
191
|
-
if (testee.equals(element_names[index][i])) {
|
192
|
-
return true;
|
193
|
-
}
|
194
|
-
}
|
195
|
-
return false;
|
196
|
-
}
|
197
|
-
|
198
|
-
@Override
|
199
|
-
public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
|
200
|
-
if (!isValid(name.rawname)) {
|
201
|
-
errorHandler.getErrors().add(new Exception("Tag " + name.rawname + " invalid"));
|
202
|
-
}
|
203
|
-
super.startElement(name, attrs, augs);
|
204
|
-
}
|
205
|
-
}
|
206
|
-
}
|
@@ -1,73 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2011:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
|
-
package nokogiri.internals;
|
34
|
-
|
35
|
-
import java.util.Hashtable;
|
36
|
-
import nokogiri.XmlDocument;
|
37
|
-
import org.w3c.dom.Document;
|
38
|
-
|
39
|
-
/**
|
40
|
-
* Currently, this class is not used anywhere.
|
41
|
-
* I'm not sure what for this class was written.(Yoko)
|
42
|
-
*
|
43
|
-
* @author sergio
|
44
|
-
*/
|
45
|
-
public class NokogiriDocumentCache {
|
46
|
-
|
47
|
-
private static NokogiriDocumentCache instance;
|
48
|
-
protected Hashtable<Document, XmlDocument> cache;
|
49
|
-
|
50
|
-
private NokogiriDocumentCache() {
|
51
|
-
this.cache = new Hashtable<Document, XmlDocument>();
|
52
|
-
}
|
53
|
-
|
54
|
-
public static NokogiriDocumentCache getInstance() {
|
55
|
-
if(instance == null) {
|
56
|
-
instance = new NokogiriDocumentCache();
|
57
|
-
}
|
58
|
-
return instance;
|
59
|
-
}
|
60
|
-
|
61
|
-
public XmlDocument getXmlDocument(Document doc) {
|
62
|
-
return this.cache.get(doc);
|
63
|
-
}
|
64
|
-
|
65
|
-
public void putDocument(Document doc, XmlDocument xmlDoc) {
|
66
|
-
this.cache.put(doc, xmlDoc);
|
67
|
-
}
|
68
|
-
|
69
|
-
public XmlDocument removeDocument(Document doc) {
|
70
|
-
return this.cache.remove(doc);
|
71
|
-
}
|
72
|
-
|
73
|
-
}
|
@@ -1,86 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2011:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
|
-
package nokogiri.internals;
|
34
|
-
|
35
|
-
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
36
|
-
|
37
|
-
import java.util.ArrayList;
|
38
|
-
import java.util.List;
|
39
|
-
|
40
|
-
import nokogiri.NokogiriService;
|
41
|
-
import nokogiri.XmlSyntaxError;
|
42
|
-
|
43
|
-
import org.apache.xerces.xni.parser.XMLErrorHandler;
|
44
|
-
import org.jruby.Ruby;
|
45
|
-
import org.jruby.runtime.ThreadContext;
|
46
|
-
import org.jruby.runtime.builtin.IRubyObject;
|
47
|
-
import org.xml.sax.ErrorHandler;
|
48
|
-
|
49
|
-
/**
|
50
|
-
* Super class of error handlers.
|
51
|
-
*
|
52
|
-
* XMLErrorHandler is used by nokogiri.internals.HtmlDomParserContext since NekoHtml
|
53
|
-
* uses this type of the error handler.
|
54
|
-
*
|
55
|
-
* @author sergio
|
56
|
-
* @author Yoko Harada <yokolet@gmail.com>
|
57
|
-
*/
|
58
|
-
public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler {
|
59
|
-
protected List<Exception> errors;
|
60
|
-
protected boolean noerror;
|
61
|
-
protected boolean nowarning;
|
62
|
-
|
63
|
-
public NokogiriErrorHandler(boolean noerror, boolean nowarning) {
|
64
|
-
errors = new ArrayList<Exception>();
|
65
|
-
this.noerror = noerror;
|
66
|
-
this.nowarning = nowarning;
|
67
|
-
}
|
68
|
-
|
69
|
-
public List<Exception> getErrors() { return errors; }
|
70
|
-
|
71
|
-
public List<IRubyObject> getErrorsReadyForRuby(ThreadContext context) {
|
72
|
-
Ruby runtime = context.getRuntime();
|
73
|
-
List<IRubyObject> res = new ArrayList<IRubyObject>();
|
74
|
-
for (int i = 0; i < errors.size(); i++) {
|
75
|
-
XmlSyntaxError xmlSyntaxError = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::SyntaxError"));
|
76
|
-
xmlSyntaxError.setException(errors.get(i));
|
77
|
-
res.add(xmlSyntaxError);
|
78
|
-
}
|
79
|
-
return res;
|
80
|
-
}
|
81
|
-
|
82
|
-
protected boolean usesNekoHtml(String domain) {
|
83
|
-
if ("http://cyberneko.org/html".equals(domain)) return true;
|
84
|
-
else return false;
|
85
|
-
}
|
86
|
-
}
|
@@ -1,327 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2011:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
|
-
package nokogiri.internals;
|
34
|
-
|
35
|
-
import static nokogiri.internals.NokogiriHelpers.getLocalPart;
|
36
|
-
import static nokogiri.internals.NokogiriHelpers.getPrefix;
|
37
|
-
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
38
|
-
import static nokogiri.internals.NokogiriHelpers.stringOrNil;
|
39
|
-
|
40
|
-
import java.util.ArrayDeque;
|
41
|
-
import java.util.LinkedList;
|
42
|
-
|
43
|
-
import nokogiri.XmlSyntaxError;
|
44
|
-
|
45
|
-
import org.jruby.Ruby;
|
46
|
-
import org.jruby.RubyArray;
|
47
|
-
import org.jruby.RubyClass;
|
48
|
-
import org.jruby.RubyObject;
|
49
|
-
import org.jruby.javasupport.util.RuntimeHelpers;
|
50
|
-
import org.jruby.runtime.ThreadContext;
|
51
|
-
import org.jruby.runtime.builtin.IRubyObject;
|
52
|
-
import org.xml.sax.Attributes;
|
53
|
-
import org.xml.sax.Locator;
|
54
|
-
import org.xml.sax.SAXException;
|
55
|
-
import org.xml.sax.SAXParseException;
|
56
|
-
import org.xml.sax.ext.DefaultHandler2;
|
57
|
-
|
58
|
-
/**
|
59
|
-
* A handler for SAX parsing.
|
60
|
-
*
|
61
|
-
* @author sergio
|
62
|
-
* @author Yoko Harada <yokolet@gmail.com>
|
63
|
-
*/
|
64
|
-
public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
|
65
|
-
|
66
|
-
boolean inCDATA = false;
|
67
|
-
|
68
|
-
private Ruby ruby;
|
69
|
-
private RubyClass attrClass;
|
70
|
-
private IRubyObject object;
|
71
|
-
|
72
|
-
/**
|
73
|
-
* Stores parse errors with the most-recent error last.
|
74
|
-
*
|
75
|
-
* TODO: should these be stored in the document 'errors' array?
|
76
|
-
* Currently only string messages are stored there.
|
77
|
-
*/
|
78
|
-
private LinkedList<XmlSyntaxError> errors = new LinkedList<XmlSyntaxError>();
|
79
|
-
|
80
|
-
private Locator locator;
|
81
|
-
private ArrayDeque<Integer> lines;
|
82
|
-
private ArrayDeque<Integer> columns;
|
83
|
-
private static String htmlParserName = "Nokogiri::HTML::SAX::Parser";
|
84
|
-
private boolean needEmptyAttrCheck = false;
|
85
|
-
|
86
|
-
public NokogiriHandler(Ruby runtime, IRubyObject object) {
|
87
|
-
this.ruby = runtime;
|
88
|
-
this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute");
|
89
|
-
this.object = object;
|
90
|
-
lines = new ArrayDeque<Integer>();
|
91
|
-
columns = new ArrayDeque<Integer>();
|
92
|
-
String objectName = object.getMetaClass().getName();
|
93
|
-
if (htmlParserName.equals(objectName)) needEmptyAttrCheck = true;
|
94
|
-
}
|
95
|
-
|
96
|
-
public void setDocumentLocator(Locator locator) {
|
97
|
-
this.locator = locator;
|
98
|
-
}
|
99
|
-
|
100
|
-
@Override
|
101
|
-
public void startDocument() throws SAXException {
|
102
|
-
call("start_document");
|
103
|
-
}
|
104
|
-
|
105
|
-
public void xmlDecl(String version, String encoding, String standalone) {
|
106
|
-
call("xmldecl", stringOrNil(ruby, version),
|
107
|
-
stringOrNil(ruby, encoding),
|
108
|
-
stringOrNil(ruby, standalone));
|
109
|
-
}
|
110
|
-
|
111
|
-
@Override
|
112
|
-
public void endDocument() throws SAXException {
|
113
|
-
call("end_document");
|
114
|
-
}
|
115
|
-
|
116
|
-
/*
|
117
|
-
* This has to call either "start_element" or
|
118
|
-
* "start_element_namespace" depending on whether there are any
|
119
|
-
* namespace attributes.
|
120
|
-
*
|
121
|
-
* Attributes that define namespaces are passed in a separate
|
122
|
-
* array of of <code>[:prefix, :uri]</code> arrays and are not
|
123
|
-
* passed with the other attributes.
|
124
|
-
*/
|
125
|
-
@Override
|
126
|
-
public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
|
127
|
-
// for attributes other than namespace attrs
|
128
|
-
RubyArray rubyAttr = RubyArray.newArray(ruby);
|
129
|
-
// for namespace defining attributes
|
130
|
-
RubyArray rubyNSAttr = RubyArray.newArray(ruby);
|
131
|
-
|
132
|
-
ThreadContext context = ruby.getCurrentContext();
|
133
|
-
boolean fromFragmentHandler = false; // isFromFragmentHandler();
|
134
|
-
|
135
|
-
lines.add(locator.getLineNumber());
|
136
|
-
columns.add(locator.getColumnNumber() - 1); // libxml counts from 0 while java does from 1
|
137
|
-
|
138
|
-
for (int i = 0; i < attrs.getLength(); i++) {
|
139
|
-
String u = attrs.getURI(i);
|
140
|
-
String qn = attrs.getQName(i);
|
141
|
-
String ln = attrs.getLocalName(i);
|
142
|
-
String val = attrs.getValue(i);
|
143
|
-
String pre;
|
144
|
-
|
145
|
-
pre = getPrefix(qn);
|
146
|
-
if (ln == null || ln.equals("")) ln = getLocalPart(qn);
|
147
|
-
|
148
|
-
if (isNamespace(qn) && !fromFragmentHandler) {
|
149
|
-
// I haven't figured the reason out yet, but, in somewhere,
|
150
|
-
// namespace is converted to array in array in array and cause
|
151
|
-
// TypeError at line 45 in fragment_handler.rb
|
152
|
-
RubyArray ns = RubyArray.newArray(ruby, 2);
|
153
|
-
if (ln.equals("xmlns")) ln = null;
|
154
|
-
ns.add(stringOrNil(ruby, ln));
|
155
|
-
ns.add(ruby.newString(val));
|
156
|
-
rubyNSAttr.add(ns);
|
157
|
-
} else {
|
158
|
-
IRubyObject[] args = null;
|
159
|
-
if (needEmptyAttrCheck) {
|
160
|
-
if (isEmptyAttr(ln)) {
|
161
|
-
args = new IRubyObject[3];
|
162
|
-
args[0] = stringOrNil(ruby, ln);
|
163
|
-
args[1] = stringOrNil(ruby, pre);
|
164
|
-
args[2] = stringOrNil(ruby, u);
|
165
|
-
}
|
166
|
-
}
|
167
|
-
if (args == null) {
|
168
|
-
args = new IRubyObject[4];
|
169
|
-
args[0] = stringOrNil(ruby, ln);
|
170
|
-
args[1] = stringOrNil(ruby, pre);
|
171
|
-
args[2] = stringOrNil(ruby, u);
|
172
|
-
args[3] = stringOrNil(ruby, val);
|
173
|
-
}
|
174
|
-
|
175
|
-
IRubyObject attr = RuntimeHelpers.invoke(context, attrClass, "new", args);
|
176
|
-
rubyAttr.add(attr);
|
177
|
-
}
|
178
|
-
}
|
179
|
-
|
180
|
-
if (localName == null || localName.equals("")) localName = getLocalPart(qName);
|
181
|
-
call("start_element_namespace",
|
182
|
-
stringOrNil(ruby, localName),
|
183
|
-
rubyAttr,
|
184
|
-
stringOrNil(ruby, getPrefix(qName)),
|
185
|
-
stringOrNil(ruby, uri),
|
186
|
-
rubyNSAttr);
|
187
|
-
}
|
188
|
-
|
189
|
-
private static String[] emptyAttrs =
|
190
|
-
{"checked", "compact", "declare", "defer", "disabled", "ismap", "multiple",
|
191
|
-
"noresize", "nohref", "noshade", "nowrap", "readonly", "selected"};
|
192
|
-
|
193
|
-
private boolean isEmptyAttr(String name) {
|
194
|
-
for (String emptyAttr : emptyAttrs) {
|
195
|
-
if (emptyAttr.equals(name)) return true;
|
196
|
-
}
|
197
|
-
return false;
|
198
|
-
}
|
199
|
-
|
200
|
-
public Integer getLine() {
|
201
|
-
return lines.pop();
|
202
|
-
}
|
203
|
-
|
204
|
-
public Integer getColumn() {
|
205
|
-
return columns.pop();
|
206
|
-
}
|
207
|
-
|
208
|
-
private boolean isFromFragmentHandler() {
|
209
|
-
if (object != null && object instanceof RubyObject) {
|
210
|
-
RubyObject rubyObj = (RubyObject)object;
|
211
|
-
IRubyObject document = rubyObj.getInstanceVariable("@document");
|
212
|
-
if (document != null) {
|
213
|
-
String name = document.getMetaClass().getName();
|
214
|
-
if ("Nokogiri::XML::FragmentHandler".equals(name)) {
|
215
|
-
return true;
|
216
|
-
}
|
217
|
-
}
|
218
|
-
}
|
219
|
-
return false;
|
220
|
-
}
|
221
|
-
|
222
|
-
@Override
|
223
|
-
public void endElement(String uri, String localName, String qName) throws SAXException {
|
224
|
-
call("end_element_namespace",
|
225
|
-
stringOrNil(ruby, localName),
|
226
|
-
stringOrNil(ruby, getPrefix(qName)),
|
227
|
-
stringOrNil(ruby, uri));
|
228
|
-
}
|
229
|
-
|
230
|
-
@Override
|
231
|
-
public void characters(char[] ch, int start, int length) throws SAXException {
|
232
|
-
String target = inCDATA ? "cdata_block" : "characters";
|
233
|
-
call(target, ruby.newString(new String(ch, start, length)));
|
234
|
-
}
|
235
|
-
|
236
|
-
@Override
|
237
|
-
public void comment(char[] ch, int start, int length) throws SAXException {
|
238
|
-
call("comment", ruby.newString(new String(ch, start, length)));
|
239
|
-
}
|
240
|
-
|
241
|
-
@Override
|
242
|
-
public void startCDATA() throws SAXException {
|
243
|
-
inCDATA = true;
|
244
|
-
}
|
245
|
-
|
246
|
-
@Override
|
247
|
-
public void endCDATA() throws SAXException {
|
248
|
-
inCDATA = false;
|
249
|
-
}
|
250
|
-
|
251
|
-
@Override
|
252
|
-
public void error(SAXParseException saxpe) {
|
253
|
-
addError(XmlSyntaxError.createError(ruby, saxpe));
|
254
|
-
call("error", ruby.newString(saxpe.getMessage()));
|
255
|
-
}
|
256
|
-
|
257
|
-
@Override
|
258
|
-
public void fatalError(SAXParseException saxpe) throws SAXException
|
259
|
-
{
|
260
|
-
addError(XmlSyntaxError.createFatalError(ruby, saxpe));
|
261
|
-
call("error", ruby.newString(saxpe.getMessage()));
|
262
|
-
}
|
263
|
-
|
264
|
-
@Override
|
265
|
-
public void warning(SAXParseException saxpe) {
|
266
|
-
//System.out.println("warning: " + saxpe);
|
267
|
-
call("warning", ruby.newString(saxpe.getMessage()));
|
268
|
-
}
|
269
|
-
|
270
|
-
protected synchronized void addError(XmlSyntaxError e) {
|
271
|
-
errors.add(e);
|
272
|
-
}
|
273
|
-
|
274
|
-
public synchronized int getErrorCount() {
|
275
|
-
return errors.size();
|
276
|
-
}
|
277
|
-
|
278
|
-
public synchronized IRubyObject getLastError() {
|
279
|
-
return errors.getLast();
|
280
|
-
}
|
281
|
-
|
282
|
-
private void call(String methodName) {
|
283
|
-
ThreadContext context = ruby.getCurrentContext();
|
284
|
-
RuntimeHelpers.invoke(context, document(context), methodName);
|
285
|
-
}
|
286
|
-
|
287
|
-
private void call(String methodName, IRubyObject argument) {
|
288
|
-
ThreadContext context = ruby.getCurrentContext();
|
289
|
-
RuntimeHelpers.invoke(context, document(context), methodName, argument);
|
290
|
-
}
|
291
|
-
|
292
|
-
private void call(String methodName, IRubyObject arg1, IRubyObject arg2) {
|
293
|
-
ThreadContext context = ruby.getCurrentContext();
|
294
|
-
RuntimeHelpers.invoke(context, document(context), methodName, arg1, arg2);
|
295
|
-
}
|
296
|
-
|
297
|
-
private void call(String methodName, IRubyObject arg1, IRubyObject arg2,
|
298
|
-
IRubyObject arg3) {
|
299
|
-
ThreadContext context = ruby.getCurrentContext();
|
300
|
-
RuntimeHelpers.invoke(context, document(context), methodName,
|
301
|
-
arg1, arg2, arg3);
|
302
|
-
}
|
303
|
-
|
304
|
-
private void call(String methodName,
|
305
|
-
IRubyObject arg0,
|
306
|
-
IRubyObject arg1,
|
307
|
-
IRubyObject arg2,
|
308
|
-
IRubyObject arg3,
|
309
|
-
IRubyObject arg4) {
|
310
|
-
IRubyObject[] args = new IRubyObject[5];
|
311
|
-
args[0] = arg0;
|
312
|
-
args[1] = arg1;
|
313
|
-
args[2] = arg2;
|
314
|
-
args[3] = arg3;
|
315
|
-
args[4] = arg4;
|
316
|
-
ThreadContext context = ruby.getCurrentContext();
|
317
|
-
RuntimeHelpers.invoke(context, document(context), methodName, args);
|
318
|
-
}
|
319
|
-
|
320
|
-
private IRubyObject document(ThreadContext context) {
|
321
|
-
if (object instanceof RubyObject) {
|
322
|
-
return ((RubyObject)object).fastGetInstanceVariable("@document");
|
323
|
-
}
|
324
|
-
return context.getRuntime().getNil();
|
325
|
-
}
|
326
|
-
|
327
|
-
}
|