nokogiri 1.5.0.beta.2 → 1.5.0.beta.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +63 -0
- data/CHANGELOG.rdoc +44 -1
- data/Manifest.txt +3 -3
- data/README.ja.rdoc +4 -4
- data/README.rdoc +4 -4
- data/Rakefile +3 -0
- data/bin/nokogiri +6 -1
- data/ext/java/nokogiri/EncodingHandler.java +32 -0
- data/ext/java/nokogiri/HtmlDocument.java +36 -0
- data/ext/java/nokogiri/HtmlElementDescription.java +34 -0
- data/ext/java/nokogiri/HtmlEntityLookup.java +34 -0
- data/ext/java/nokogiri/HtmlSaxParserContext.java +41 -3
- data/ext/java/nokogiri/NokogiriService.java +109 -13
- data/ext/java/nokogiri/XmlAttr.java +40 -4
- data/ext/java/nokogiri/XmlAttributeDecl.java +32 -0
- data/ext/java/nokogiri/XmlCdata.java +41 -2
- data/ext/java/nokogiri/XmlComment.java +38 -1
- data/ext/java/nokogiri/XmlDocument.java +56 -11
- data/ext/java/nokogiri/XmlDocumentFragment.java +39 -30
- data/ext/java/nokogiri/XmlDtd.java +37 -0
- data/ext/java/nokogiri/XmlElement.java +51 -2
- data/ext/java/nokogiri/XmlElementContent.java +32 -0
- data/ext/java/nokogiri/XmlElementDecl.java +32 -0
- data/ext/java/nokogiri/XmlEntityDecl.java +32 -0
- data/ext/java/nokogiri/XmlEntityReference.java +35 -2
- data/ext/java/nokogiri/XmlNamespace.java +55 -5
- data/ext/java/nokogiri/XmlNode.java +129 -136
- data/ext/java/nokogiri/XmlNodeSet.java +36 -0
- data/ext/java/nokogiri/XmlProcessingInstruction.java +34 -1
- data/ext/java/nokogiri/XmlReader.java +36 -0
- data/ext/java/nokogiri/XmlRelaxng.java +34 -1
- data/ext/java/nokogiri/XmlSaxParserContext.java +52 -7
- data/ext/java/nokogiri/XmlSaxPushParser.java +36 -0
- data/ext/java/nokogiri/XmlSchema.java +34 -1
- data/ext/java/nokogiri/XmlSyntaxError.java +48 -18
- data/ext/java/nokogiri/XmlText.java +45 -6
- data/ext/java/nokogiri/XmlXpathContext.java +45 -0
- data/ext/java/nokogiri/XsltStylesheet.java +58 -3
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +50 -26
- data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +35 -1
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +51 -13
- data/ext/java/nokogiri/internals/NokogiriHandler.java +70 -21
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +95 -38
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +37 -3
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +39 -1
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +43 -7
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +77 -10
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +49 -20
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +34 -2
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -1
- data/ext/java/nokogiri/internals/ParserContext.java +32 -0
- data/ext/java/nokogiri/internals/PushInputStream.java +33 -3
- data/ext/java/nokogiri/internals/ReaderNode.java +50 -8
- data/ext/java/nokogiri/internals/SaveContext.java +35 -2
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +34 -1
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +32 -0
- data/ext/java/nokogiri/internals/XmlDomParser.java +32 -0
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +43 -11
- data/ext/java/nokogiri/internals/XmlSaxParser.java +32 -0
- data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
- data/ext/nokogiri/depend +358 -32
- data/ext/nokogiri/extconf.rb +1 -3
- data/ext/nokogiri/nokogiri.c +2 -0
- data/ext/nokogiri/nokogiri.h +7 -0
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_io.c +2 -2
- data/ext/nokogiri/xml_node.c +31 -6
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_sax_parser.c +1 -1
- data/ext/nokogiri/xml_sax_parser_context.c +40 -0
- data/ext/nokogiri/xml_xpath_context.c +33 -2
- data/ext/nokogiri/xslt_stylesheet.c +122 -6
- data/lib/nokogiri.rb +12 -5
- data/lib/nokogiri/css/generated_tokenizer.rb +1 -2
- data/lib/nokogiri/css/xpath_visitor.rb +15 -7
- data/lib/nokogiri/decorators/slop.rb +5 -3
- data/lib/nokogiri/html/document.rb +3 -3
- data/lib/nokogiri/html/document_fragment.rb +19 -17
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/document.rb +26 -1
- data/lib/nokogiri/xml/document_fragment.rb +2 -2
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/node.rb +156 -45
- data/lib/nokogiri/xml/node_set.rb +2 -2
- data/lib/nokogiri/xml/reader.rb +36 -0
- data/lib/nokogiri/xml/sax/document.rb +4 -2
- data/lib/nokogiri/xslt.rb +9 -5
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- data/tasks/cross_compile.rb +27 -8
- data/test/css/test_parser.rb +29 -18
- data/test/decorators/test_slop.rb +16 -0
- data/test/html/test_document_fragment.rb +46 -3
- data/test/html/test_node.rb +9 -0
- data/test/xml/sax/test_parser.rb +11 -3
- data/test/xml/sax/test_parser_context.rb +50 -0
- data/test/xml/sax/test_push_parser.rb +18 -1
- data/test/xml/test_document_fragment.rb +15 -8
- data/test/xml/test_dtd.rb +15 -0
- data/test/xml/test_node.rb +31 -2
- data/test/xml/test_node_reparenting.rb +59 -31
- data/test/xml/test_node_set.rb +13 -0
- data/test/xml/test_xpath.rb +32 -0
- data/test/xslt/test_custom_functions.rb +94 -0
- metadata +83 -81
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/spec/helper.rb +0 -3
- data/spec/xml/reader_spec.rb +0 -307
@@ -1,6 +1,39 @@
|
|
1
|
+
/**
|
2
|
+
* (The MIT License)
|
3
|
+
*
|
4
|
+
* Copyright (c) 2008 - 2010:
|
5
|
+
*
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
+
*
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
* a copy of this software and associated documentation files (the
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
* the following conditions:
|
20
|
+
*
|
21
|
+
* The above copyright notice and this permission notice shall be
|
22
|
+
* included in all copies or substantial portions of the Software.
|
23
|
+
*
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
*/
|
32
|
+
|
1
33
|
package nokogiri;
|
2
34
|
|
3
35
|
import static nokogiri.internals.NokogiriHelpers.isXmlEscaped;
|
36
|
+
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
4
37
|
import static nokogiri.internals.NokogiriHelpers.stringOrNil;
|
5
38
|
import nokogiri.internals.NokogiriHelpers;
|
6
39
|
import nokogiri.internals.SaveContext;
|
@@ -15,10 +48,15 @@ import org.jruby.runtime.builtin.IRubyObject;
|
|
15
48
|
import org.w3c.dom.Document;
|
16
49
|
import org.w3c.dom.Node;
|
17
50
|
|
51
|
+
/**
|
52
|
+
* Class for Nokogiri::XML::Text
|
53
|
+
*
|
54
|
+
*/
|
18
55
|
@JRubyClass(name="Nokogiri::XML::Text", parent="Nokogiri::XML::CharacterData")
|
19
56
|
public class XmlText extends XmlNode {
|
20
|
-
|
21
|
-
|
57
|
+
|
58
|
+
public XmlText(Ruby runtime, RubyClass rubyClass, Node node) {
|
59
|
+
super(runtime, rubyClass, node);
|
22
60
|
}
|
23
61
|
|
24
62
|
public XmlText(Ruby runtime, RubyClass klass) {
|
@@ -40,13 +78,14 @@ public class XmlText extends XmlNode {
|
|
40
78
|
Document document = xmlDoc.getDocument();
|
41
79
|
// text node content should not be encoded when it is created by Text node.
|
42
80
|
// while content should be encoded when it is created by Element node.
|
43
|
-
Node node = document.createTextNode((
|
44
|
-
setNode(node);
|
81
|
+
Node node = document.createTextNode(rubyStringToString(content));
|
82
|
+
setNode(context, node);
|
45
83
|
}
|
46
84
|
|
47
85
|
@Override
|
48
86
|
protected IRubyObject getNodeName(ThreadContext context) {
|
49
|
-
|
87
|
+
if (name == null) name = context.getRuntime().newString("text");
|
88
|
+
return name;
|
50
89
|
}
|
51
90
|
|
52
91
|
@Override
|
@@ -78,7 +117,7 @@ public class XmlText extends XmlNode {
|
|
78
117
|
XmlDocument xmlDocument = (XmlDocument)document(context);
|
79
118
|
IRubyObject ruby_encoding = xmlDocument.encoding(context);
|
80
119
|
if (!ruby_encoding.isNil()) {
|
81
|
-
encoding = (
|
120
|
+
encoding = rubyStringToString(ruby_encoding);
|
82
121
|
}
|
83
122
|
return encoding;
|
84
123
|
}
|
@@ -1,3 +1,35 @@
|
|
1
|
+
/**
|
2
|
+
* (The MIT License)
|
3
|
+
*
|
4
|
+
* Copyright (c) 2008 - 2010:
|
5
|
+
*
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
+
*
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
* a copy of this software and associated documentation files (the
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
* the following conditions:
|
20
|
+
*
|
21
|
+
* The above copyright notice and this permission notice shall be
|
22
|
+
* included in all copies or substantial portions of the Software.
|
23
|
+
*
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
*/
|
32
|
+
|
1
33
|
package nokogiri;
|
2
34
|
|
3
35
|
import java.util.Set;
|
@@ -12,6 +44,7 @@ import javax.xml.xpath.XPathFactory;
|
|
12
44
|
|
13
45
|
import nokogiri.internals.NokogiriNamespaceContext;
|
14
46
|
import nokogiri.internals.NokogiriXPathFunctionResolver;
|
47
|
+
import nokogiri.internals.NokogiriXPathVariableResolver;
|
15
48
|
|
16
49
|
import org.jruby.Ruby;
|
17
50
|
import org.jruby.RubyBoolean;
|
@@ -27,6 +60,10 @@ import org.jruby.runtime.ThreadContext;
|
|
27
60
|
import org.jruby.runtime.builtin.IRubyObject;
|
28
61
|
import org.w3c.dom.NodeList;
|
29
62
|
|
63
|
+
/**
|
64
|
+
* Class for Nokogiri::XML::XpathContext
|
65
|
+
*
|
66
|
+
*/
|
30
67
|
@JRubyClass(name="Nokogiri::XML::XPathContext")
|
31
68
|
public class XmlXpathContext extends RubyObject {
|
32
69
|
private XmlNode context;
|
@@ -37,6 +74,7 @@ public class XmlXpathContext extends RubyObject {
|
|
37
74
|
this.context = context;
|
38
75
|
this.xpath = XPathFactory.newInstance().newXPath();
|
39
76
|
this.xpath.setNamespaceContext(new NokogiriNamespaceContext());
|
77
|
+
this.xpath.setXPathVariableResolver(new NokogiriXPathVariableResolver());
|
40
78
|
}
|
41
79
|
|
42
80
|
@JRubyMethod(name = "new", meta = true)
|
@@ -127,4 +165,11 @@ public class XmlXpathContext extends RubyObject {
|
|
127
165
|
((NokogiriNamespaceContext) this.xpath.getNamespaceContext()).registerNamespace(prefix.convertToString().asJavaString(), uri.convertToString().asJavaString());
|
128
166
|
return this;
|
129
167
|
}
|
168
|
+
|
169
|
+
@JRubyMethod
|
170
|
+
public IRubyObject register_variable(ThreadContext context, IRubyObject name, IRubyObject value) {
|
171
|
+
((NokogiriXPathVariableResolver) this.xpath.getXPathVariableResolver()).
|
172
|
+
registerVariable(name.convertToString().asJavaString(), value.convertToString().asJavaString());
|
173
|
+
return this;
|
174
|
+
}
|
130
175
|
}
|
@@ -1,7 +1,41 @@
|
|
1
|
+
/**
|
2
|
+
* (The MIT License)
|
3
|
+
*
|
4
|
+
* Copyright (c) 2008 - 2010:
|
5
|
+
*
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
+
*
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
* a copy of this software and associated documentation files (the
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
* the following conditions:
|
20
|
+
*
|
21
|
+
* The above copyright notice and this permission notice shall be
|
22
|
+
* included in all copies or substantial portions of the Software.
|
23
|
+
*
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
*/
|
32
|
+
|
1
33
|
package nokogiri;
|
2
34
|
|
3
35
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
4
36
|
|
37
|
+
import java.util.HashMap;
|
38
|
+
import java.util.Map;
|
5
39
|
import java.util.regex.Matcher;
|
6
40
|
import java.util.regex.Pattern;
|
7
41
|
|
@@ -24,11 +58,20 @@ import org.jruby.runtime.ThreadContext;
|
|
24
58
|
import org.jruby.runtime.builtin.IRubyObject;
|
25
59
|
import org.w3c.dom.Document;
|
26
60
|
|
61
|
+
/**
|
62
|
+
* Class for Nokogiri::XSLT::Stylesheet
|
63
|
+
*
|
64
|
+
*/
|
27
65
|
@JRubyClass(name="Nokogiri::XSLT::Stylesheet")
|
28
66
|
public class XsltStylesheet extends RubyObject {
|
29
|
-
|
67
|
+
private static Map<String, Object> registry = new HashMap<String, Object>();
|
68
|
+
private static TransformerFactory factory = null;
|
30
69
|
private Templates sheet;
|
31
70
|
|
71
|
+
public static Map<String, Object> getRegistry() {
|
72
|
+
return registry;
|
73
|
+
}
|
74
|
+
|
32
75
|
public XsltStylesheet(Ruby ruby, RubyClass rubyClass) {
|
33
76
|
super(ruby, rubyClass);
|
34
77
|
}
|
@@ -77,8 +120,10 @@ public class XsltStylesheet extends RubyObject {
|
|
77
120
|
Document doc = ((XmlDocument) xmlDoc.dup_implementation(context, true)).getDocument();
|
78
121
|
|
79
122
|
XsltStylesheet xslt = new XsltStylesheet(ruby, (RubyClass) cls);
|
123
|
+
|
80
124
|
try {
|
81
|
-
|
125
|
+
if (factory == null) factory = TransformerFactory.newInstance();
|
126
|
+
xslt.sheet = factory.newTemplates(new DOMSource(doc));
|
82
127
|
} catch (TransformerConfigurationException ex) {
|
83
128
|
ruby.newRuntimeError("could not parse xslt stylesheet");
|
84
129
|
}
|
@@ -88,7 +133,6 @@ public class XsltStylesheet extends RubyObject {
|
|
88
133
|
|
89
134
|
@JRubyMethod
|
90
135
|
public IRubyObject serialize(ThreadContext context, IRubyObject doc) {
|
91
|
-
System.out.println("Serialize called in stylesheet");
|
92
136
|
return RuntimeHelpers.invoke(context,
|
93
137
|
RuntimeHelpers.invoke(context, doc, "root"),
|
94
138
|
"to_s");
|
@@ -123,4 +167,15 @@ public class XsltStylesheet extends RubyObject {
|
|
123
167
|
(Document) result.getNode());
|
124
168
|
}
|
125
169
|
}
|
170
|
+
|
171
|
+
@JRubyMethod(name = {"registr", "register"}, meta = true)
|
172
|
+
public static IRubyObject register(ThreadContext context, IRubyObject cls, IRubyObject uri, IRubyObject receiver) {
|
173
|
+
throw context.getRuntime().newNotImplementedError("Nokogiri::XSLT.register method is not implemented");
|
174
|
+
/* When API conflict is solved, this method should be below:
|
175
|
+
// ThreadContext is used while executing xslt extension function
|
176
|
+
registry.put("context", context);
|
177
|
+
registry.put("receiver", receiver);
|
178
|
+
return context.getRuntime().getNil();
|
179
|
+
*/
|
180
|
+
}
|
126
181
|
}
|
@@ -1,3 +1,35 @@
|
|
1
|
+
/**
|
2
|
+
* (The MIT License)
|
3
|
+
*
|
4
|
+
* Copyright (c) 2008 - 2010:
|
5
|
+
*
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
+
*
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
* a copy of this software and associated documentation files (the
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
* the following conditions:
|
20
|
+
*
|
21
|
+
* The above copyright notice and this permission notice shall be
|
22
|
+
* included in all copies or substantial portions of the Software.
|
23
|
+
*
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
*/
|
32
|
+
|
1
33
|
package nokogiri.internals;
|
2
34
|
|
3
35
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
@@ -21,20 +53,11 @@ import org.jruby.runtime.builtin.IRubyObject;
|
|
21
53
|
import org.w3c.dom.Document;
|
22
54
|
|
23
55
|
/**
|
24
|
-
*
|
56
|
+
* Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
|
57
|
+
*
|
25
58
|
* @author sergio
|
26
59
|
*/
|
27
60
|
public class HtmlDomParserContext extends XmlDomParserContext {
|
28
|
-
protected static final String PROPERTY_FILTERS =
|
29
|
-
"http://cyberneko.org/html/properties/filters";
|
30
|
-
protected static final String PROPERTY_ELEM_NAMES =
|
31
|
-
"http://cyberneko.org/html/properties/names/elems";
|
32
|
-
protected static final String PROPERTY_ATTRS_NAMES =
|
33
|
-
"http://cyberneko.org/html/properties/names/attrs";
|
34
|
-
protected static final String FEATURE_DOCUMENT_FRAGMENT =
|
35
|
-
"http://cyberneko.org/html/features/balance-tags/document-fragment";
|
36
|
-
protected static final String FEATURE_REPORT_ERRORS =
|
37
|
-
"http://cyberneko.org/html/features/report-errors";
|
38
61
|
|
39
62
|
public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
|
40
63
|
super(runtime, options);
|
@@ -46,12 +69,10 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
46
69
|
|
47
70
|
@Override
|
48
71
|
protected void initErrorHandler() {
|
49
|
-
if (
|
50
|
-
errorHandler = new
|
51
|
-
} else if (options.noError) {
|
52
|
-
errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError);
|
72
|
+
if (options.strict) {
|
73
|
+
errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
|
53
74
|
} else {
|
54
|
-
errorHandler = new
|
75
|
+
errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning);
|
55
76
|
}
|
56
77
|
}
|
57
78
|
|
@@ -60,33 +81,35 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
60
81
|
XMLParserConfiguration config = new HTMLConfiguration();
|
61
82
|
XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
|
62
83
|
XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
|
63
|
-
//XMLDocumentFilter[] filters = { removeNSAttrsFilter};
|
64
84
|
XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
|
65
85
|
|
66
86
|
config.setErrorHandler(this.errorHandler);
|
67
87
|
parser = new DOMParser(config);
|
68
88
|
|
89
|
+
// see http://nekohtml.sourceforge.net/settings.html for details
|
69
90
|
setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
|
70
|
-
setProperty(
|
71
|
-
setProperty(
|
72
|
-
setFeature(
|
91
|
+
setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
|
92
|
+
setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
|
93
|
+
setFeature("http://cyberneko.org/html/features/report-errors", true);
|
73
94
|
setFeature("http://xml.org/sax/features/namespaces", false);
|
74
|
-
setProperty(
|
95
|
+
setProperty("http://cyberneko.org/html/properties/filters", filters);
|
96
|
+
setFeature("http://cyberneko.org/html/features/insert-doctype", true);
|
75
97
|
}
|
76
98
|
|
77
99
|
/**
|
78
|
-
* Enable NekoHTML feature for balancing tags in a document
|
79
|
-
*
|
100
|
+
* Enable NekoHTML feature for balancing tags in a document fragment.
|
101
|
+
*
|
102
|
+
* This method is used in XmlNode#in_context method.
|
80
103
|
*/
|
81
104
|
public void enableDocumentFragment() {
|
82
|
-
setFeature(
|
105
|
+
setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
|
83
106
|
}
|
84
107
|
|
85
108
|
@Override
|
86
109
|
protected XmlDocument getNewEmptyDocument(ThreadContext context) {
|
87
110
|
IRubyObject[] args = new IRubyObject[0];
|
88
111
|
return (XmlDocument) XmlDocument.rbNew(context,
|
89
|
-
getNokogiriClass(context.getRuntime(), "Nokogiri::
|
112
|
+
getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document"),
|
90
113
|
args);
|
91
114
|
}
|
92
115
|
|
@@ -162,6 +185,7 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
162
185
|
private boolean isValid(String testee) {
|
163
186
|
char[] c = testee.toCharArray();
|
164
187
|
int index = new Integer(c[0]) - 97;
|
188
|
+
if (index > 25) return false;
|
165
189
|
for (int i=0; i<element_names[index].length; i++) {
|
166
190
|
if (testee.equals(element_names[index][i])) {
|
167
191
|
return true;
|
@@ -173,7 +197,7 @@ public class HtmlDomParserContext extends XmlDomParserContext {
|
|
173
197
|
@Override
|
174
198
|
public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
|
175
199
|
if (!isValid(name.rawname)) {
|
176
|
-
errorHandler.
|
200
|
+
errorHandler.getErrors().add(new Exception("Tag " + name.rawname + " invalid"));
|
177
201
|
}
|
178
202
|
super.startElement(name, attrs, augs);
|
179
203
|
}
|
@@ -1,3 +1,35 @@
|
|
1
|
+
/**
|
2
|
+
* (The MIT License)
|
3
|
+
*
|
4
|
+
* Copyright (c) 2008 - 2010:
|
5
|
+
*
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
+
*
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
* a copy of this software and associated documentation files (the
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
* the following conditions:
|
20
|
+
*
|
21
|
+
* The above copyright notice and this permission notice shall be
|
22
|
+
* included in all copies or substantial portions of the Software.
|
23
|
+
*
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
*/
|
32
|
+
|
1
33
|
package nokogiri.internals;
|
2
34
|
|
3
35
|
import java.util.Hashtable;
|
@@ -5,7 +37,9 @@ import nokogiri.XmlDocument;
|
|
5
37
|
import org.w3c.dom.Document;
|
6
38
|
|
7
39
|
/**
|
8
|
-
*
|
40
|
+
* Currently, this class is not used anywhere.
|
41
|
+
* I'm not sure what for this class was written.(Yoko)
|
42
|
+
*
|
9
43
|
* @author sergio
|
10
44
|
*/
|
11
45
|
public class NokogiriDocumentCache {
|
@@ -1,3 +1,35 @@
|
|
1
|
+
/**
|
2
|
+
* (The MIT License)
|
3
|
+
*
|
4
|
+
* Copyright (c) 2008 - 2010:
|
5
|
+
*
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
+
*
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
* a copy of this software and associated documentation files (the
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
* the following conditions:
|
20
|
+
*
|
21
|
+
* The above copyright notice and this permission notice shall be
|
22
|
+
* included in all copies or substantial portions of the Software.
|
23
|
+
*
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
*/
|
32
|
+
|
1
33
|
package nokogiri.internals;
|
2
34
|
|
3
35
|
import java.util.ArrayList;
|
@@ -12,31 +44,37 @@ import org.jruby.runtime.builtin.IRubyObject;
|
|
12
44
|
import org.xml.sax.ErrorHandler;
|
13
45
|
|
14
46
|
/**
|
15
|
-
*
|
47
|
+
* Super class of error handlers.
|
48
|
+
*
|
49
|
+
* XMLErrorHandler is used by nokogiri.internals.HtmlDomParserContext since NekoHtml
|
50
|
+
* uses this type of the error handler.
|
51
|
+
*
|
16
52
|
* @author sergio
|
17
53
|
*/
|
18
|
-
public abstract class NokogiriErrorHandler
|
19
|
-
implements ErrorHandler, XMLErrorHandler {
|
20
|
-
|
54
|
+
public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler {
|
21
55
|
protected List<Exception> errors;
|
56
|
+
protected boolean noerror;
|
57
|
+
protected boolean nowarning;
|
22
58
|
|
23
|
-
public NokogiriErrorHandler() {
|
24
|
-
|
59
|
+
public NokogiriErrorHandler(boolean noerror, boolean nowarning) {
|
60
|
+
errors = new ArrayList<Exception>();
|
61
|
+
this.noerror = noerror;
|
62
|
+
this.nowarning = nowarning;
|
25
63
|
}
|
26
64
|
|
27
|
-
|
28
|
-
errors.add(e);
|
29
|
-
}
|
30
|
-
|
31
|
-
public List<Exception> getErrors() { return this.errors; }
|
65
|
+
public List<Exception> getErrors() { return errors; }
|
32
66
|
|
33
67
|
public List<IRubyObject> getErrorsReadyForRuby(ThreadContext context){
|
34
68
|
Ruby ruby = context.getRuntime();
|
35
69
|
List<IRubyObject> res = new ArrayList<IRubyObject>();
|
36
|
-
for(int i = 0; i <
|
37
|
-
res.add(new XmlSyntaxError(ruby,
|
70
|
+
for(int i = 0; i < errors.size(); i++) {
|
71
|
+
res.add(new XmlSyntaxError(ruby, errors.get(i)));
|
38
72
|
}
|
39
73
|
return res;
|
40
74
|
}
|
41
75
|
|
76
|
+
protected boolean usesNekoHtml(String domain) {
|
77
|
+
if ("http://cyberneko.org/html".equals(domain)) return true;
|
78
|
+
else return false;
|
79
|
+
}
|
42
80
|
}
|