nokogiri 1.11.1-java → 1.11.2-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +12 -12
- data/LICENSE.md +1 -1
- data/README.md +20 -15
- data/ext/java/nokogiri/EncodingHandler.java +78 -59
- data/ext/java/nokogiri/HtmlDocument.java +137 -114
- data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
- data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
- data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
- data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
- data/ext/java/nokogiri/NokogiriService.java +597 -526
- data/ext/java/nokogiri/XmlAttr.java +120 -96
- data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
- data/ext/java/nokogiri/XmlCdata.java +35 -26
- data/ext/java/nokogiri/XmlComment.java +48 -37
- data/ext/java/nokogiri/XmlDocument.java +642 -540
- data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
- data/ext/java/nokogiri/XmlDtd.java +450 -384
- data/ext/java/nokogiri/XmlElement.java +25 -18
- data/ext/java/nokogiri/XmlElementContent.java +345 -286
- data/ext/java/nokogiri/XmlElementDecl.java +126 -95
- data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
- data/ext/java/nokogiri/XmlEntityReference.java +51 -42
- data/ext/java/nokogiri/XmlNamespace.java +177 -145
- data/ext/java/nokogiri/XmlNode.java +1843 -1588
- data/ext/java/nokogiri/XmlNodeSet.java +361 -299
- data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
- data/ext/java/nokogiri/XmlReader.java +513 -418
- data/ext/java/nokogiri/XmlRelaxng.java +91 -78
- data/ext/java/nokogiri/XmlSaxParserContext.java +330 -285
- data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
- data/ext/java/nokogiri/XmlSchema.java +328 -263
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -83
- data/ext/java/nokogiri/XmlText.java +57 -46
- data/ext/java/nokogiri/XmlXpathContext.java +240 -206
- data/ext/java/nokogiri/XsltStylesheet.java +282 -239
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +199 -168
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +83 -68
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +118 -101
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -24
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
- data/ext/java/nokogiri/internals/ParserContext.java +206 -179
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
- data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
- data/ext/nokogiri/depend +34 -474
- data/ext/nokogiri/extconf.rb +253 -183
- data/ext/nokogiri/html_document.c +10 -15
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +66 -65
- data/ext/nokogiri/html_sax_push_parser.c +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +171 -63
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +221 -164
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +338 -286
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +117 -112
- data/ext/nokogiri/xml_sax_parser_context.c +100 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +48 -42
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +134 -127
- data/ext/nokogiri/xslt_stylesheet.c +157 -157
- data/lib/nokogiri.rb +1 -22
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +31 -8
- data/lib/nokogiri/xml/document.rb +31 -11
- data/lib/nokogiri/xml/node.rb +38 -42
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +7 -8
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -1,9 +1,12 @@
|
|
1
1
|
package nokogiri.internals;
|
2
2
|
|
3
3
|
@SuppressWarnings("serial")
|
4
|
-
public class ClosedStreamException extends Exception
|
4
|
+
public class ClosedStreamException extends Exception
|
5
|
+
{
|
5
6
|
|
6
|
-
public
|
7
|
+
public
|
8
|
+
ClosedStreamException(String message)
|
9
|
+
{
|
7
10
|
super(message);
|
8
11
|
}
|
9
12
|
|
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -52,6 +52,7 @@ import org.cyberneko.html.filters.DefaultFilter;
|
|
52
52
|
import org.jruby.Ruby;
|
53
53
|
import org.jruby.RubyClass;
|
54
54
|
import org.jruby.runtime.ThreadContext;
|
55
|
+
import org.jruby.runtime.Helpers;
|
55
56
|
import org.jruby.runtime.builtin.IRubyObject;
|
56
57
|
import org.w3c.dom.Document;
|
57
58
|
import org.w3c.dom.NamedNodeMap;
|
@@ -60,193 +61,223 @@ import org.w3c.dom.NodeList;
|
|
60
61
|
|
61
62
|
/**
|
62
63
|
* Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
|
63
|
-
*
|
64
|
+
*
|
64
65
|
* @author sergio
|
65
66
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
66
67
|
* @author Yoko Harada <yokolet@gmail.com>
|
67
68
|
*/
|
68
|
-
public class HtmlDomParserContext extends XmlDomParserContext
|
69
|
+
public class HtmlDomParserContext extends XmlDomParserContext
|
70
|
+
{
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
72
|
+
public
|
73
|
+
HtmlDomParserContext(Ruby runtime, IRubyObject options)
|
74
|
+
{
|
75
|
+
this(runtime, runtime.getNil(), options);
|
76
|
+
}
|
73
77
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
+
public
|
79
|
+
HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
|
80
|
+
{
|
81
|
+
super(runtime, encoding, options);
|
82
|
+
java_encoding = NokogiriHelpers.getValidEncoding(encoding);
|
83
|
+
}
|
78
84
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
parser = new NokogiriDomParser(config);
|
90
|
-
|
91
|
-
// see http://nekohtml.sourceforge.net/settings.html for details
|
92
|
-
setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
|
93
|
-
setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
|
94
|
-
setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
|
95
|
-
setProperty("http://cyberneko.org/html/properties/filters", filters);
|
96
|
-
setFeature("http://cyberneko.org/html/features/report-errors", true);
|
97
|
-
setFeature("http://xml.org/sax/features/namespaces", false);
|
98
|
-
}
|
99
|
-
|
100
|
-
@Override
|
101
|
-
public void setEncoding(String encoding) {
|
102
|
-
super.setEncoding(encoding);
|
103
|
-
}
|
85
|
+
@Override
|
86
|
+
protected void
|
87
|
+
initParser(Ruby runtime)
|
88
|
+
{
|
89
|
+
XMLParserConfiguration config = new HTMLConfiguration();
|
90
|
+
//XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
|
91
|
+
XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
|
92
|
+
//XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
|
93
|
+
XMLDocumentFilter[] filters = { elementValidityCheckFilter};
|
104
94
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
95
|
+
config.setErrorHandler(this.errorHandler);
|
96
|
+
|
97
|
+
parser = new NokogiriDomParser(config);
|
98
|
+
|
99
|
+
// see http://nekohtml.sourceforge.net/settings.html for details
|
100
|
+
setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
|
101
|
+
setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
|
102
|
+
setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
|
103
|
+
setProperty("http://cyberneko.org/html/properties/filters", filters);
|
104
|
+
setFeature("http://cyberneko.org/html/features/report-errors", true);
|
105
|
+
setFeature("http://xml.org/sax/features/namespaces", false);
|
106
|
+
}
|
107
|
+
|
108
|
+
@Override
|
109
|
+
public void
|
110
|
+
setEncoding(String encoding)
|
111
|
+
{
|
112
|
+
super.setEncoding(encoding);
|
113
|
+
}
|
114
|
+
|
115
|
+
/**
|
116
|
+
* Enable NekoHTML feature for balancing tags in a document fragment.
|
117
|
+
*
|
118
|
+
* This method is used in XmlNode#in_context method.
|
119
|
+
*/
|
120
|
+
public void
|
121
|
+
enableDocumentFragment()
|
122
|
+
{
|
123
|
+
setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
|
124
|
+
}
|
125
|
+
|
126
|
+
@Override
|
127
|
+
public XmlDocument
|
128
|
+
parse(ThreadContext context, RubyClass klass, IRubyObject url)
|
129
|
+
{
|
130
|
+
XmlDocument xmlDoc = super.parse(context, klass, url);
|
131
|
+
|
132
|
+
// let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
|
133
|
+
// https://github.com/sparklemotion/nokogiri/issues/2130
|
134
|
+
if (!options.recover && errorHandler.getErrors().size() > 0) {
|
135
|
+
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
|
136
|
+
String exceptionMsg = String.format("%s: '%s'",
|
137
|
+
"Parser without recover option encountered error or warning",
|
138
|
+
errorHandler.getErrors().get(0));
|
139
|
+
xmlSyntaxError.setException(new Exception(exceptionMsg));
|
140
|
+
throw xmlSyntaxError.toThrowable();
|
112
141
|
}
|
113
142
|
|
114
|
-
|
115
|
-
|
116
|
-
XmlDocument xmlDoc = super.parse(context, klass, url);
|
117
|
-
|
118
|
-
// let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
|
119
|
-
// https://github.com/sparklemotion/nokogiri/issues/2130
|
120
|
-
if (!options.recover && errorHandler.getErrors().size() > 0) {
|
121
|
-
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
|
122
|
-
String exceptionMsg = String.format("%s: '%s'",
|
123
|
-
"Parser without recover option encountered error or warning",
|
124
|
-
errorHandler.getErrors().get(0));
|
125
|
-
xmlSyntaxError.setException(new Exception(exceptionMsg));
|
126
|
-
throw xmlSyntaxError.toThrowable();
|
127
|
-
}
|
143
|
+
return xmlDoc;
|
144
|
+
}
|
128
145
|
|
129
|
-
|
146
|
+
@Override
|
147
|
+
protected XmlDocument
|
148
|
+
wrapDocument(ThreadContext context, RubyClass klass, Document document)
|
149
|
+
{
|
150
|
+
HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
|
151
|
+
htmlDocument.setDocumentNode(context.runtime, document);
|
152
|
+
Helpers.invoke(context, htmlDocument, "initialize");
|
153
|
+
|
154
|
+
if (ruby_encoding.isNil()) {
|
155
|
+
// ruby_encoding might have detected by HtmlDocument::EncodingReader
|
156
|
+
if (detected_encoding != null && !detected_encoding.isNil()) {
|
157
|
+
ruby_encoding = detected_encoding;
|
158
|
+
} else {
|
159
|
+
// no encoding given & no encoding detected, then try to get it
|
160
|
+
String charset = tryGetCharsetFromHtml5MetaTag(document);
|
161
|
+
ruby_encoding = stringOrNil(context.runtime, charset);
|
162
|
+
}
|
130
163
|
}
|
164
|
+
htmlDocument.setEncoding(ruby_encoding);
|
165
|
+
htmlDocument.setParsedEncoding(java_encoding);
|
166
|
+
return htmlDocument;
|
167
|
+
}
|
131
168
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
169
|
+
// NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
|
170
|
+
// from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
|
171
|
+
// so, this method attempts to find the charset.
|
172
|
+
private static String
|
173
|
+
tryGetCharsetFromHtml5MetaTag(Document document)
|
174
|
+
{
|
175
|
+
if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) { return null; }
|
176
|
+
NodeList list = document.getDocumentElement().getChildNodes();
|
177
|
+
Node item;
|
178
|
+
for (int i = 0; i < list.getLength(); i++) {
|
179
|
+
if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
|
180
|
+
NodeList headers = item.getChildNodes();
|
181
|
+
for (int j = 0; j < headers.getLength(); j++) {
|
182
|
+
if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
|
183
|
+
NamedNodeMap nodeMap = item.getAttributes();
|
184
|
+
for (int k = 0; k < nodeMap.getLength(); k++) {
|
185
|
+
if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
|
186
|
+
return item.getNodeValue();
|
187
|
+
}
|
144
188
|
}
|
189
|
+
}
|
145
190
|
}
|
146
|
-
|
147
|
-
htmlDocument.setParsedEncoding(java_encoding);
|
148
|
-
return htmlDocument;
|
191
|
+
}
|
149
192
|
}
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
}
|
169
|
-
}
|
170
|
-
}
|
193
|
+
return null;
|
194
|
+
}
|
195
|
+
|
196
|
+
/**
|
197
|
+
* Filter to strip out attributes that pertain to XML namespaces.
|
198
|
+
*/
|
199
|
+
public static class RemoveNSAttrsFilter extends DefaultFilter
|
200
|
+
{
|
201
|
+
@Override
|
202
|
+
public void
|
203
|
+
startElement(QName element, XMLAttributes attrs,
|
204
|
+
Augmentations augs) throws XNIException
|
205
|
+
{
|
206
|
+
int i;
|
207
|
+
for (i = 0; i < attrs.getLength(); ++i) {
|
208
|
+
if (isNamespace(attrs.getQName(i))) {
|
209
|
+
attrs.removeAttributeAt(i);
|
210
|
+
--i;
|
171
211
|
}
|
172
|
-
|
212
|
+
}
|
213
|
+
|
214
|
+
element.uri = null;
|
215
|
+
super.startElement(element, attrs, augs);
|
173
216
|
}
|
217
|
+
}
|
174
218
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
public static class RemoveNSAttrsFilter extends DefaultFilter {
|
179
|
-
@Override
|
180
|
-
public void startElement(QName element, XMLAttributes attrs,
|
181
|
-
Augmentations augs) throws XNIException {
|
182
|
-
int i;
|
183
|
-
for (i = 0; i < attrs.getLength(); ++i) {
|
184
|
-
if (isNamespace(attrs.getQName(i))) {
|
185
|
-
attrs.removeAttributeAt(i);
|
186
|
-
--i;
|
187
|
-
}
|
188
|
-
}
|
219
|
+
public static class ElementValidityCheckFilter extends DefaultFilter
|
220
|
+
{
|
221
|
+
private NokogiriErrorHandler errorHandler;
|
189
222
|
|
190
|
-
|
191
|
-
|
192
|
-
|
223
|
+
private
|
224
|
+
ElementValidityCheckFilter(NokogiriErrorHandler errorHandler)
|
225
|
+
{
|
226
|
+
this.errorHandler = errorHandler;
|
193
227
|
}
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
if (index >= element_names.length) return false;
|
235
|
-
String[] elementNames = element_names[index];
|
236
|
-
for (int i=0; i<elementNames.length; i++) {
|
237
|
-
if (name.equals(elementNames[i])) {
|
238
|
-
return true;
|
239
|
-
}
|
240
|
-
}
|
241
|
-
return false;
|
242
|
-
}
|
243
|
-
|
244
|
-
@Override
|
245
|
-
public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
|
246
|
-
if (!isValid(name.rawname)) {
|
247
|
-
errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
|
248
|
-
}
|
249
|
-
super.startElement(name, attrs, augs);
|
228
|
+
|
229
|
+
// element names from xhtml1-strict.dtd
|
230
|
+
private static String[][] element_names = {
|
231
|
+
{"a", "abbr", "acronym", "address", "area"},
|
232
|
+
{"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
|
233
|
+
{"caption", "cite", "code", "col", "colgroup"},
|
234
|
+
{"dd", "del", "dfn", "div", "dl", "dt"},
|
235
|
+
{"em"},
|
236
|
+
{"fieldset", "font", "form", "frame", "frameset"},
|
237
|
+
{}, // g
|
238
|
+
{"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
|
239
|
+
{"i", "iframe", "img", "input", "ins"},
|
240
|
+
{}, // j
|
241
|
+
{"kbd"},
|
242
|
+
{"label", "legend", "li", "link"},
|
243
|
+
{"map", "meta"},
|
244
|
+
{"noframes", "noscript"},
|
245
|
+
{"object", "ol", "optgroup", "option"},
|
246
|
+
{"p", "param", "pre"},
|
247
|
+
{"q"},
|
248
|
+
{}, // r
|
249
|
+
{"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
|
250
|
+
{"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
|
251
|
+
{"u", "ul"},
|
252
|
+
{"var"},
|
253
|
+
{}, // w
|
254
|
+
{}, // x
|
255
|
+
{}, // y
|
256
|
+
{} // z
|
257
|
+
};
|
258
|
+
|
259
|
+
private static boolean
|
260
|
+
isValid(final String name)
|
261
|
+
{
|
262
|
+
int index = name.charAt(0) - 97;
|
263
|
+
if (index >= element_names.length) { return false; }
|
264
|
+
String[] elementNames = element_names[index];
|
265
|
+
for (int i = 0; i < elementNames.length; i++) {
|
266
|
+
if (name.equals(elementNames[i])) {
|
267
|
+
return true;
|
250
268
|
}
|
269
|
+
}
|
270
|
+
return false;
|
271
|
+
}
|
272
|
+
|
273
|
+
@Override
|
274
|
+
public void
|
275
|
+
startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException
|
276
|
+
{
|
277
|
+
if (!isValid(name.rawname)) {
|
278
|
+
errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
|
279
|
+
}
|
280
|
+
super.startElement(name, attrs, augs);
|
251
281
|
}
|
282
|
+
}
|
252
283
|
}
|
@@ -4,17 +4,24 @@ import org.xml.sax.ErrorHandler;
|
|
4
4
|
import org.xml.sax.SAXException;
|
5
5
|
import org.xml.sax.SAXParseException;
|
6
6
|
|
7
|
-
public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler
|
7
|
+
public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler
|
8
|
+
{
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
@Override
|
11
|
+
public void
|
12
|
+
warning(SAXParseException exception) throws SAXException
|
13
|
+
{
|
14
|
+
}
|
12
15
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
+
@Override
|
17
|
+
public void
|
18
|
+
error(SAXParseException exception) throws SAXException
|
19
|
+
{
|
20
|
+
}
|
16
21
|
|
17
|
-
|
18
|
-
|
19
|
-
|
22
|
+
@Override
|
23
|
+
public void
|
24
|
+
fatalError(SAXParseException exception) throws SAXException
|
25
|
+
{
|
26
|
+
}
|
20
27
|
}
|