nokogiri 1.11.0.rc4-java → 1.11.5-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +12 -12
- data/LICENSE.md +1 -1
- data/README.md +168 -91
- data/dependencies.yml +12 -12
- data/ext/java/nokogiri/EncodingHandler.java +76 -89
- data/ext/java/nokogiri/HtmlDocument.java +135 -144
- data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
- data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
- data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
- data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
- data/ext/java/nokogiri/NokogiriService.java +595 -556
- data/ext/java/nokogiri/XmlAttr.java +118 -126
- data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
- data/ext/java/nokogiri/XmlCdata.java +35 -58
- data/ext/java/nokogiri/XmlComment.java +46 -67
- data/ext/java/nokogiri/XmlDocument.java +645 -572
- data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
- data/ext/java/nokogiri/XmlDtd.java +448 -414
- data/ext/java/nokogiri/XmlElement.java +23 -48
- data/ext/java/nokogiri/XmlElementContent.java +343 -316
- data/ext/java/nokogiri/XmlElementDecl.java +124 -125
- data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
- data/ext/java/nokogiri/XmlEntityReference.java +49 -72
- data/ext/java/nokogiri/XmlNamespace.java +175 -175
- data/ext/java/nokogiri/XmlNode.java +1843 -1620
- data/ext/java/nokogiri/XmlNodeSet.java +361 -331
- data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
- data/ext/java/nokogiri/XmlReader.java +513 -450
- data/ext/java/nokogiri/XmlRelaxng.java +85 -104
- data/ext/java/nokogiri/XmlSaxParserContext.java +328 -315
- data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
- data/ext/java/nokogiri/XmlSchema.java +328 -295
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -115
- data/ext/java/nokogiri/XmlText.java +55 -76
- data/ext/java/nokogiri/XmlXpathContext.java +240 -238
- data/ext/java/nokogiri/XsltStylesheet.java +280 -269
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -202
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -98
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +116 -131
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -56
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
- data/ext/java/nokogiri/internals/ParserContext.java +206 -211
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
- data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
- data/ext/nokogiri/depend +34 -474
- data/ext/nokogiri/extconf.rb +270 -183
- data/ext/nokogiri/html_document.c +10 -15
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +67 -64
- data/ext/nokogiri/html_sax_push_parser.c +42 -34
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +190 -60
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +246 -188
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +371 -320
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +117 -112
- data/ext/nokogiri/xml_sax_parser_context.c +101 -84
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +48 -42
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +134 -127
- data/ext/nokogiri/xslt_stylesheet.c +157 -157
- data/lib/nokogiri.rb +2 -6
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +32 -8
- data/lib/nokogiri/xml/document.rb +74 -28
- data/lib/nokogiri/xml/node.rb +39 -42
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +62 -127
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -1,9 +1,12 @@
|
|
1
1
|
package nokogiri.internals;
|
2
2
|
|
3
3
|
@SuppressWarnings("serial")
|
4
|
-
public class ClosedStreamException extends Exception
|
4
|
+
public class ClosedStreamException extends Exception
|
5
|
+
{
|
5
6
|
|
6
|
-
public
|
7
|
+
public
|
8
|
+
ClosedStreamException(String message)
|
9
|
+
{
|
7
10
|
super(message);
|
8
11
|
}
|
9
12
|
|
@@ -1,35 +1,3 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2012:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
1
|
package nokogiri.internals;
|
34
2
|
|
35
3
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
@@ -52,6 +20,7 @@ import org.cyberneko.html.filters.DefaultFilter;
|
|
52
20
|
import org.jruby.Ruby;
|
53
21
|
import org.jruby.RubyClass;
|
54
22
|
import org.jruby.runtime.ThreadContext;
|
23
|
+
import org.jruby.runtime.Helpers;
|
55
24
|
import org.jruby.runtime.builtin.IRubyObject;
|
56
25
|
import org.w3c.dom.Document;
|
57
26
|
import org.w3c.dom.NamedNodeMap;
|
@@ -60,193 +29,223 @@ import org.w3c.dom.NodeList;
|
|
60
29
|
|
61
30
|
/**
|
62
31
|
* Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
|
63
|
-
*
|
32
|
+
*
|
64
33
|
* @author sergio
|
65
34
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
66
35
|
* @author Yoko Harada <yokolet@gmail.com>
|
67
36
|
*/
|
68
|
-
public class HtmlDomParserContext extends XmlDomParserContext
|
69
|
-
|
70
|
-
|
71
|
-
|
37
|
+
public class HtmlDomParserContext extends XmlDomParserContext
|
38
|
+
{
|
39
|
+
|
40
|
+
public
|
41
|
+
HtmlDomParserContext(Ruby runtime, IRubyObject options)
|
42
|
+
{
|
43
|
+
this(runtime, runtime.getNil(), options);
|
44
|
+
}
|
45
|
+
|
46
|
+
public
|
47
|
+
HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
|
48
|
+
{
|
49
|
+
super(runtime, encoding, options);
|
50
|
+
java_encoding = NokogiriHelpers.getValidEncoding(encoding);
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
protected void
|
55
|
+
initParser(Ruby runtime)
|
56
|
+
{
|
57
|
+
XMLParserConfiguration config = new HTMLConfiguration();
|
58
|
+
//XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
|
59
|
+
XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
|
60
|
+
//XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
|
61
|
+
XMLDocumentFilter[] filters = { elementValidityCheckFilter};
|
62
|
+
|
63
|
+
config.setErrorHandler(this.errorHandler);
|
64
|
+
|
65
|
+
parser = new NokogiriDomParser(config);
|
66
|
+
|
67
|
+
// see http://nekohtml.sourceforge.net/settings.html for details
|
68
|
+
setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
|
69
|
+
setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
|
70
|
+
setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
|
71
|
+
setProperty("http://cyberneko.org/html/properties/filters", filters);
|
72
|
+
setFeature("http://cyberneko.org/html/features/report-errors", true);
|
73
|
+
setFeature("http://xml.org/sax/features/namespaces", false);
|
74
|
+
}
|
75
|
+
|
76
|
+
@Override
|
77
|
+
public void
|
78
|
+
setEncoding(String encoding)
|
79
|
+
{
|
80
|
+
super.setEncoding(encoding);
|
81
|
+
}
|
82
|
+
|
83
|
+
/**
|
84
|
+
* Enable NekoHTML feature for balancing tags in a document fragment.
|
85
|
+
*
|
86
|
+
* This method is used in XmlNode#in_context method.
|
87
|
+
*/
|
88
|
+
public void
|
89
|
+
enableDocumentFragment()
|
90
|
+
{
|
91
|
+
setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
|
92
|
+
}
|
93
|
+
|
94
|
+
@Override
|
95
|
+
public XmlDocument
|
96
|
+
parse(ThreadContext context, RubyClass klass, IRubyObject url)
|
97
|
+
{
|
98
|
+
XmlDocument xmlDoc = super.parse(context, klass, url);
|
99
|
+
|
100
|
+
// let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
|
101
|
+
// https://github.com/sparklemotion/nokogiri/issues/2130
|
102
|
+
if (!options.recover && errorHandler.getErrors().size() > 0) {
|
103
|
+
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
|
104
|
+
String exceptionMsg = String.format("%s: '%s'",
|
105
|
+
"Parser without recover option encountered error or warning",
|
106
|
+
errorHandler.getErrors().get(0));
|
107
|
+
xmlSyntaxError.setException(new Exception(exceptionMsg));
|
108
|
+
throw xmlSyntaxError.toThrowable();
|
72
109
|
}
|
73
110
|
|
74
|
-
|
75
|
-
|
76
|
-
|
111
|
+
return xmlDoc;
|
112
|
+
}
|
113
|
+
|
114
|
+
@Override
|
115
|
+
protected XmlDocument
|
116
|
+
wrapDocument(ThreadContext context, RubyClass klass, Document document)
|
117
|
+
{
|
118
|
+
HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
|
119
|
+
htmlDocument.setDocumentNode(context.runtime, document);
|
120
|
+
Helpers.invoke(context, htmlDocument, "initialize");
|
121
|
+
|
122
|
+
if (ruby_encoding.isNil()) {
|
123
|
+
// ruby_encoding might have detected by HtmlDocument::EncodingReader
|
124
|
+
if (detected_encoding != null && !detected_encoding.isNil()) {
|
125
|
+
ruby_encoding = detected_encoding;
|
126
|
+
} else {
|
127
|
+
// no encoding given & no encoding detected, then try to get it
|
128
|
+
String charset = tryGetCharsetFromHtml5MetaTag(document);
|
129
|
+
ruby_encoding = stringOrNil(context.runtime, charset);
|
130
|
+
}
|
77
131
|
}
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
132
|
+
htmlDocument.setEncoding(ruby_encoding);
|
133
|
+
htmlDocument.setParsedEncoding(java_encoding);
|
134
|
+
return htmlDocument;
|
135
|
+
}
|
136
|
+
|
137
|
+
// NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
|
138
|
+
// from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
|
139
|
+
// so, this method attempts to find the charset.
|
140
|
+
private static String
|
141
|
+
tryGetCharsetFromHtml5MetaTag(Document document)
|
142
|
+
{
|
143
|
+
if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) { return null; }
|
144
|
+
NodeList list = document.getDocumentElement().getChildNodes();
|
145
|
+
Node item;
|
146
|
+
for (int i = 0; i < list.getLength(); i++) {
|
147
|
+
if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
|
148
|
+
NodeList headers = item.getChildNodes();
|
149
|
+
for (int j = 0; j < headers.getLength(); j++) {
|
150
|
+
if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
|
151
|
+
NamedNodeMap nodeMap = item.getAttributes();
|
152
|
+
for (int k = 0; k < nodeMap.getLength(); k++) {
|
153
|
+
if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
|
154
|
+
return item.getNodeValue();
|
155
|
+
}
|
156
|
+
}
|
157
|
+
}
|
158
|
+
}
|
159
|
+
}
|
98
160
|
}
|
99
|
-
|
161
|
+
return null;
|
162
|
+
}
|
163
|
+
|
164
|
+
/**
|
165
|
+
* Filter to strip out attributes that pertain to XML namespaces.
|
166
|
+
*/
|
167
|
+
public static class RemoveNSAttrsFilter extends DefaultFilter
|
168
|
+
{
|
100
169
|
@Override
|
101
|
-
public void
|
102
|
-
|
103
|
-
|
170
|
+
public void
|
171
|
+
startElement(QName element, XMLAttributes attrs,
|
172
|
+
Augmentations augs) throws XNIException
|
173
|
+
{
|
174
|
+
int i;
|
175
|
+
for (i = 0; i < attrs.getLength(); ++i) {
|
176
|
+
if (isNamespace(attrs.getQName(i))) {
|
177
|
+
attrs.removeAttributeAt(i);
|
178
|
+
--i;
|
179
|
+
}
|
180
|
+
}
|
104
181
|
|
105
|
-
|
106
|
-
|
107
|
-
*
|
108
|
-
* This method is used in XmlNode#in_context method.
|
109
|
-
*/
|
110
|
-
public void enableDocumentFragment() {
|
111
|
-
setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
|
182
|
+
element.uri = null;
|
183
|
+
super.startElement(element, attrs, augs);
|
112
184
|
}
|
185
|
+
}
|
113
186
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
// let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
|
119
|
-
// https://github.com/sparklemotion/nokogiri/issues/2130
|
120
|
-
if (!options.recover && errorHandler.getErrors().size() > 0) {
|
121
|
-
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
|
122
|
-
String exceptionMsg = String.format("%s: '%s'",
|
123
|
-
"Parser without recover option encountered error or warning",
|
124
|
-
errorHandler.getErrors().get(0));
|
125
|
-
xmlSyntaxError.setException(new Exception(exceptionMsg));
|
126
|
-
throw xmlSyntaxError.toThrowable();
|
127
|
-
}
|
187
|
+
public static class ElementValidityCheckFilter extends DefaultFilter
|
188
|
+
{
|
189
|
+
private NokogiriErrorHandler errorHandler;
|
128
190
|
|
129
|
-
|
191
|
+
private
|
192
|
+
ElementValidityCheckFilter(NokogiriErrorHandler errorHandler)
|
193
|
+
{
|
194
|
+
this.errorHandler = errorHandler;
|
130
195
|
}
|
131
196
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
197
|
+
// element names from xhtml1-strict.dtd
|
198
|
+
private static String[][] element_names = {
|
199
|
+
{"a", "abbr", "acronym", "address", "area"},
|
200
|
+
{"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
|
201
|
+
{"caption", "cite", "code", "col", "colgroup"},
|
202
|
+
{"dd", "del", "dfn", "div", "dl", "dt"},
|
203
|
+
{"em"},
|
204
|
+
{"fieldset", "font", "form", "frame", "frameset"},
|
205
|
+
{}, // g
|
206
|
+
{"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
|
207
|
+
{"i", "iframe", "img", "input", "ins"},
|
208
|
+
{}, // j
|
209
|
+
{"kbd"},
|
210
|
+
{"label", "legend", "li", "link"},
|
211
|
+
{"map", "meta"},
|
212
|
+
{"noframes", "noscript"},
|
213
|
+
{"object", "ol", "optgroup", "option"},
|
214
|
+
{"p", "param", "pre"},
|
215
|
+
{"q"},
|
216
|
+
{}, // r
|
217
|
+
{"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
|
218
|
+
{"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
|
219
|
+
{"u", "ul"},
|
220
|
+
{"var"},
|
221
|
+
{}, // w
|
222
|
+
{}, // x
|
223
|
+
{}, // y
|
224
|
+
{} // z
|
225
|
+
};
|
226
|
+
|
227
|
+
private static boolean
|
228
|
+
isValid(final String name)
|
229
|
+
{
|
230
|
+
int index = name.charAt(0) - 97;
|
231
|
+
if (index >= element_names.length) { return false; }
|
232
|
+
String[] elementNames = element_names[index];
|
233
|
+
for (int i = 0; i < elementNames.length; i++) {
|
234
|
+
if (name.equals(elementNames[i])) {
|
235
|
+
return true;
|
171
236
|
}
|
172
|
-
|
237
|
+
}
|
238
|
+
return false;
|
173
239
|
}
|
174
240
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
for (i = 0; i < attrs.getLength(); ++i) {
|
184
|
-
if (isNamespace(attrs.getQName(i))) {
|
185
|
-
attrs.removeAttributeAt(i);
|
186
|
-
--i;
|
187
|
-
}
|
188
|
-
}
|
189
|
-
|
190
|
-
element.uri = null;
|
191
|
-
super.startElement(element, attrs, augs);
|
192
|
-
}
|
193
|
-
}
|
194
|
-
|
195
|
-
public static class ElementValidityCheckFilter extends DefaultFilter {
|
196
|
-
private NokogiriErrorHandler errorHandler;
|
197
|
-
|
198
|
-
private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) {
|
199
|
-
this.errorHandler = errorHandler;
|
200
|
-
}
|
201
|
-
|
202
|
-
// element names from xhtml1-strict.dtd
|
203
|
-
private static String[][] element_names = {
|
204
|
-
{"a", "abbr", "acronym", "address", "area"},
|
205
|
-
{"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
|
206
|
-
{"caption", "cite", "code", "col", "colgroup"},
|
207
|
-
{"dd", "del", "dfn", "div", "dl", "dt"},
|
208
|
-
{"em"},
|
209
|
-
{"fieldset", "font", "form", "frame", "frameset"},
|
210
|
-
{}, // g
|
211
|
-
{"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
|
212
|
-
{"i", "iframe", "img", "input", "ins"},
|
213
|
-
{}, // j
|
214
|
-
{"kbd"},
|
215
|
-
{"label", "legend", "li", "link"},
|
216
|
-
{"map", "meta"},
|
217
|
-
{"noframes", "noscript"},
|
218
|
-
{"object", "ol", "optgroup", "option"},
|
219
|
-
{"p", "param", "pre"},
|
220
|
-
{"q"},
|
221
|
-
{}, // r
|
222
|
-
{"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
|
223
|
-
{"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
|
224
|
-
{"u", "ul"},
|
225
|
-
{"var"},
|
226
|
-
{}, // w
|
227
|
-
{}, // x
|
228
|
-
{}, // y
|
229
|
-
{} // z
|
230
|
-
};
|
231
|
-
|
232
|
-
private static boolean isValid(final String name) {
|
233
|
-
int index = name.charAt(0) - 97;
|
234
|
-
if (index >= element_names.length) return false;
|
235
|
-
String[] elementNames = element_names[index];
|
236
|
-
for (int i=0; i<elementNames.length; i++) {
|
237
|
-
if (name.equals(elementNames[i])) {
|
238
|
-
return true;
|
239
|
-
}
|
240
|
-
}
|
241
|
-
return false;
|
242
|
-
}
|
243
|
-
|
244
|
-
@Override
|
245
|
-
public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
|
246
|
-
if (!isValid(name.rawname)) {
|
247
|
-
errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
|
248
|
-
}
|
249
|
-
super.startElement(name, attrs, augs);
|
250
|
-
}
|
241
|
+
@Override
|
242
|
+
public void
|
243
|
+
startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException
|
244
|
+
{
|
245
|
+
if (!isValid(name.rawname)) {
|
246
|
+
errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
|
247
|
+
}
|
248
|
+
super.startElement(name, attrs, augs);
|
251
249
|
}
|
250
|
+
}
|
252
251
|
}
|