nokogiri 1.11.1-java → 1.11.2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +12 -12
  3. data/LICENSE.md +1 -1
  4. data/README.md +20 -15
  5. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  6. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  7. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  8. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  9. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  10. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  11. data/ext/java/nokogiri/NokogiriService.java +597 -526
  12. data/ext/java/nokogiri/XmlAttr.java +120 -96
  13. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  14. data/ext/java/nokogiri/XmlCdata.java +35 -26
  15. data/ext/java/nokogiri/XmlComment.java +48 -37
  16. data/ext/java/nokogiri/XmlDocument.java +642 -540
  17. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  18. data/ext/java/nokogiri/XmlDtd.java +450 -384
  19. data/ext/java/nokogiri/XmlElement.java +25 -18
  20. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  21. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  22. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  23. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  24. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  25. data/ext/java/nokogiri/XmlNode.java +1843 -1588
  26. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  27. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  28. data/ext/java/nokogiri/XmlReader.java +513 -418
  29. data/ext/java/nokogiri/XmlRelaxng.java +91 -78
  30. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -285
  31. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  32. data/ext/java/nokogiri/XmlSchema.java +328 -263
  33. data/ext/java/nokogiri/XmlSyntaxError.java +113 -83
  34. data/ext/java/nokogiri/XmlText.java +57 -46
  35. data/ext/java/nokogiri/XmlXpathContext.java +240 -206
  36. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  37. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  38. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +199 -168
  39. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  40. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  41. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  42. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  43. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  44. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  45. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  46. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +83 -68
  48. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  50. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  51. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +118 -101
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -24
  53. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  54. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  55. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  56. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  57. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  58. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  59. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  60. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  61. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  62. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  63. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  64. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  65. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  66. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  67. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  77. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  80. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  81. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  82. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  83. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  84. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  85. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  86. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  87. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  88. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  89. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  90. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  92. data/ext/nokogiri/depend +34 -474
  93. data/ext/nokogiri/extconf.rb +253 -183
  94. data/ext/nokogiri/html_document.c +10 -15
  95. data/ext/nokogiri/html_element_description.c +84 -71
  96. data/ext/nokogiri/html_entity_lookup.c +21 -16
  97. data/ext/nokogiri/html_sax_parser_context.c +66 -65
  98. data/ext/nokogiri/html_sax_push_parser.c +29 -27
  99. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  100. data/ext/nokogiri/nokogiri.c +171 -63
  101. data/ext/nokogiri/test_global_handlers.c +3 -4
  102. data/ext/nokogiri/xml_attr.c +15 -15
  103. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  104. data/ext/nokogiri/xml_cdata.c +13 -18
  105. data/ext/nokogiri/xml_comment.c +19 -26
  106. data/ext/nokogiri/xml_document.c +221 -164
  107. data/ext/nokogiri/xml_document_fragment.c +13 -15
  108. data/ext/nokogiri/xml_dtd.c +54 -48
  109. data/ext/nokogiri/xml_element_content.c +30 -27
  110. data/ext/nokogiri/xml_element_decl.c +22 -22
  111. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  112. data/ext/nokogiri/xml_entity_decl.c +32 -30
  113. data/ext/nokogiri/xml_entity_reference.c +16 -18
  114. data/ext/nokogiri/xml_namespace.c +56 -49
  115. data/ext/nokogiri/xml_node.c +338 -286
  116. data/ext/nokogiri/xml_node_set.c +168 -156
  117. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  118. data/ext/nokogiri/xml_reader.c +191 -157
  119. data/ext/nokogiri/xml_relax_ng.c +29 -23
  120. data/ext/nokogiri/xml_sax_parser.c +117 -112
  121. data/ext/nokogiri/xml_sax_parser_context.c +100 -85
  122. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  123. data/ext/nokogiri/xml_schema.c +48 -42
  124. data/ext/nokogiri/xml_syntax_error.c +21 -23
  125. data/ext/nokogiri/xml_text.c +13 -17
  126. data/ext/nokogiri/xml_xpath_context.c +134 -127
  127. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  128. data/lib/nokogiri.rb +1 -22
  129. data/lib/nokogiri/css/parser.rb +1 -1
  130. data/lib/nokogiri/extension.rb +26 -0
  131. data/lib/nokogiri/html/document_fragment.rb +15 -15
  132. data/lib/nokogiri/nokogiri.jar +0 -0
  133. data/lib/nokogiri/version/constant.rb +1 -1
  134. data/lib/nokogiri/version/info.rb +31 -8
  135. data/lib/nokogiri/xml/document.rb +31 -11
  136. data/lib/nokogiri/xml/node.rb +38 -42
  137. data/lib/nokogiri/xml/reader.rb +2 -9
  138. data/lib/nokogiri/xml/xpath.rb +1 -3
  139. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  140. metadata +7 -8
  141. data/ext/nokogiri/xml_io.c +0 -63
  142. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -1,9 +1,12 @@
1
1
  package nokogiri.internals;
2
2
 
3
3
  @SuppressWarnings("serial")
4
- public class ClosedStreamException extends Exception {
4
+ public class ClosedStreamException extends Exception
5
+ {
5
6
 
6
- public ClosedStreamException(String message) {
7
+ public
8
+ ClosedStreamException(String message)
9
+ {
7
10
  super(message);
8
11
  }
9
12
 
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -52,6 +52,7 @@ import org.cyberneko.html.filters.DefaultFilter;
52
52
  import org.jruby.Ruby;
53
53
  import org.jruby.RubyClass;
54
54
  import org.jruby.runtime.ThreadContext;
55
+ import org.jruby.runtime.Helpers;
55
56
  import org.jruby.runtime.builtin.IRubyObject;
56
57
  import org.w3c.dom.Document;
57
58
  import org.w3c.dom.NamedNodeMap;
@@ -60,193 +61,223 @@ import org.w3c.dom.NodeList;
60
61
 
61
62
  /**
62
63
  * Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
63
- *
64
+ *
64
65
  * @author sergio
65
66
  * @author Patrick Mahoney <pat@polycrystal.org>
66
67
  * @author Yoko Harada <yokolet@gmail.com>
67
68
  */
68
- public class HtmlDomParserContext extends XmlDomParserContext {
69
+ public class HtmlDomParserContext extends XmlDomParserContext
70
+ {
69
71
 
70
- public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
71
- this(runtime, runtime.getNil(), options);
72
- }
72
+ public
73
+ HtmlDomParserContext(Ruby runtime, IRubyObject options)
74
+ {
75
+ this(runtime, runtime.getNil(), options);
76
+ }
73
77
 
74
- public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
75
- super(runtime, encoding, options);
76
- java_encoding = NokogiriHelpers.getValidEncoding(encoding);
77
- }
78
+ public
79
+ HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
80
+ {
81
+ super(runtime, encoding, options);
82
+ java_encoding = NokogiriHelpers.getValidEncoding(encoding);
83
+ }
78
84
 
79
- @Override
80
- protected void initParser(Ruby runtime) {
81
- XMLParserConfiguration config = new HTMLConfiguration();
82
- //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
83
- XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
84
- //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
85
- XMLDocumentFilter[] filters = { elementValidityCheckFilter};
86
-
87
- config.setErrorHandler(this.errorHandler);
88
-
89
- parser = new NokogiriDomParser(config);
90
-
91
- // see http://nekohtml.sourceforge.net/settings.html for details
92
- setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
93
- setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
94
- setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
95
- setProperty("http://cyberneko.org/html/properties/filters", filters);
96
- setFeature("http://cyberneko.org/html/features/report-errors", true);
97
- setFeature("http://xml.org/sax/features/namespaces", false);
98
- }
99
-
100
- @Override
101
- public void setEncoding(String encoding) {
102
- super.setEncoding(encoding);
103
- }
85
+ @Override
86
+ protected void
87
+ initParser(Ruby runtime)
88
+ {
89
+ XMLParserConfiguration config = new HTMLConfiguration();
90
+ //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
91
+ XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
92
+ //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
93
+ XMLDocumentFilter[] filters = { elementValidityCheckFilter};
104
94
 
105
- /**
106
- * Enable NekoHTML feature for balancing tags in a document fragment.
107
- *
108
- * This method is used in XmlNode#in_context method.
109
- */
110
- public void enableDocumentFragment() {
111
- setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
95
+ config.setErrorHandler(this.errorHandler);
96
+
97
+ parser = new NokogiriDomParser(config);
98
+
99
+ // see http://nekohtml.sourceforge.net/settings.html for details
100
+ setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
101
+ setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
102
+ setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
103
+ setProperty("http://cyberneko.org/html/properties/filters", filters);
104
+ setFeature("http://cyberneko.org/html/features/report-errors", true);
105
+ setFeature("http://xml.org/sax/features/namespaces", false);
106
+ }
107
+
108
+ @Override
109
+ public void
110
+ setEncoding(String encoding)
111
+ {
112
+ super.setEncoding(encoding);
113
+ }
114
+
115
+ /**
116
+ * Enable NekoHTML feature for balancing tags in a document fragment.
117
+ *
118
+ * This method is used in XmlNode#in_context method.
119
+ */
120
+ public void
121
+ enableDocumentFragment()
122
+ {
123
+ setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
124
+ }
125
+
126
+ @Override
127
+ public XmlDocument
128
+ parse(ThreadContext context, RubyClass klass, IRubyObject url)
129
+ {
130
+ XmlDocument xmlDoc = super.parse(context, klass, url);
131
+
132
+ // let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
133
+ // https://github.com/sparklemotion/nokogiri/issues/2130
134
+ if (!options.recover && errorHandler.getErrors().size() > 0) {
135
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
136
+ String exceptionMsg = String.format("%s: '%s'",
137
+ "Parser without recover option encountered error or warning",
138
+ errorHandler.getErrors().get(0));
139
+ xmlSyntaxError.setException(new Exception(exceptionMsg));
140
+ throw xmlSyntaxError.toThrowable();
112
141
  }
113
142
 
114
- @Override
115
- public XmlDocument parse(ThreadContext context, RubyClass klass, IRubyObject url) {
116
- XmlDocument xmlDoc = super.parse(context, klass, url);
117
-
118
- // let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
119
- // https://github.com/sparklemotion/nokogiri/issues/2130
120
- if (!options.recover && errorHandler.getErrors().size() > 0) {
121
- XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
122
- String exceptionMsg = String.format("%s: '%s'",
123
- "Parser without recover option encountered error or warning",
124
- errorHandler.getErrors().get(0));
125
- xmlSyntaxError.setException(new Exception(exceptionMsg));
126
- throw xmlSyntaxError.toThrowable();
127
- }
143
+ return xmlDoc;
144
+ }
128
145
 
129
- return xmlDoc;
146
+ @Override
147
+ protected XmlDocument
148
+ wrapDocument(ThreadContext context, RubyClass klass, Document document)
149
+ {
150
+ HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
151
+ htmlDocument.setDocumentNode(context.runtime, document);
152
+ Helpers.invoke(context, htmlDocument, "initialize");
153
+
154
+ if (ruby_encoding.isNil()) {
155
+ // ruby_encoding might have detected by HtmlDocument::EncodingReader
156
+ if (detected_encoding != null && !detected_encoding.isNil()) {
157
+ ruby_encoding = detected_encoding;
158
+ } else {
159
+ // no encoding given & no encoding detected, then try to get it
160
+ String charset = tryGetCharsetFromHtml5MetaTag(document);
161
+ ruby_encoding = stringOrNil(context.runtime, charset);
162
+ }
130
163
  }
164
+ htmlDocument.setEncoding(ruby_encoding);
165
+ htmlDocument.setParsedEncoding(java_encoding);
166
+ return htmlDocument;
167
+ }
131
168
 
132
- @Override
133
- protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) {
134
- HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
135
- htmlDocument.setDocumentNode(context.runtime, document);
136
- if (ruby_encoding.isNil()) {
137
- // ruby_encoding might have detected by HtmlDocument::EncodingReader
138
- if (detected_encoding != null && !detected_encoding.isNil()) {
139
- ruby_encoding = detected_encoding;
140
- } else {
141
- // no encoding given & no encoding detected, then try to get it
142
- String charset = tryGetCharsetFromHtml5MetaTag(document);
143
- ruby_encoding = stringOrNil(context.runtime, charset);
169
+ // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
170
+ // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
171
+ // so, this method attempts to find the charset.
172
+ private static String
173
+ tryGetCharsetFromHtml5MetaTag(Document document)
174
+ {
175
+ if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) { return null; }
176
+ NodeList list = document.getDocumentElement().getChildNodes();
177
+ Node item;
178
+ for (int i = 0; i < list.getLength(); i++) {
179
+ if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
180
+ NodeList headers = item.getChildNodes();
181
+ for (int j = 0; j < headers.getLength(); j++) {
182
+ if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
183
+ NamedNodeMap nodeMap = item.getAttributes();
184
+ for (int k = 0; k < nodeMap.getLength(); k++) {
185
+ if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
186
+ return item.getNodeValue();
187
+ }
144
188
  }
189
+ }
145
190
  }
146
- htmlDocument.setEncoding(ruby_encoding);
147
- htmlDocument.setParsedEncoding(java_encoding);
148
- return htmlDocument;
191
+ }
149
192
  }
150
-
151
- // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
152
- // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
153
- // so, this method attempts to find the charset.
154
- private static String tryGetCharsetFromHtml5MetaTag(Document document) {
155
- if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null;
156
- NodeList list = document.getDocumentElement().getChildNodes(); Node item;
157
- for (int i = 0; i < list.getLength(); i++) {
158
- if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
159
- NodeList headers = item.getChildNodes();
160
- for (int j = 0; j < headers.getLength(); j++) {
161
- if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
162
- NamedNodeMap nodeMap = item.getAttributes();
163
- for (int k = 0; k < nodeMap.getLength(); k++) {
164
- if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
165
- return item.getNodeValue();
166
- }
167
- }
168
- }
169
- }
170
- }
193
+ return null;
194
+ }
195
+
196
+ /**
197
+ * Filter to strip out attributes that pertain to XML namespaces.
198
+ */
199
+ public static class RemoveNSAttrsFilter extends DefaultFilter
200
+ {
201
+ @Override
202
+ public void
203
+ startElement(QName element, XMLAttributes attrs,
204
+ Augmentations augs) throws XNIException
205
+ {
206
+ int i;
207
+ for (i = 0; i < attrs.getLength(); ++i) {
208
+ if (isNamespace(attrs.getQName(i))) {
209
+ attrs.removeAttributeAt(i);
210
+ --i;
171
211
  }
172
- return null;
212
+ }
213
+
214
+ element.uri = null;
215
+ super.startElement(element, attrs, augs);
173
216
  }
217
+ }
174
218
 
175
- /**
176
- * Filter to strip out attributes that pertain to XML namespaces.
177
- */
178
- public static class RemoveNSAttrsFilter extends DefaultFilter {
179
- @Override
180
- public void startElement(QName element, XMLAttributes attrs,
181
- Augmentations augs) throws XNIException {
182
- int i;
183
- for (i = 0; i < attrs.getLength(); ++i) {
184
- if (isNamespace(attrs.getQName(i))) {
185
- attrs.removeAttributeAt(i);
186
- --i;
187
- }
188
- }
219
+ public static class ElementValidityCheckFilter extends DefaultFilter
220
+ {
221
+ private NokogiriErrorHandler errorHandler;
189
222
 
190
- element.uri = null;
191
- super.startElement(element, attrs, augs);
192
- }
223
+ private
224
+ ElementValidityCheckFilter(NokogiriErrorHandler errorHandler)
225
+ {
226
+ this.errorHandler = errorHandler;
193
227
  }
194
-
195
- public static class ElementValidityCheckFilter extends DefaultFilter {
196
- private NokogiriErrorHandler errorHandler;
197
-
198
- private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) {
199
- this.errorHandler = errorHandler;
200
- }
201
-
202
- // element names from xhtml1-strict.dtd
203
- private static String[][] element_names = {
204
- {"a", "abbr", "acronym", "address", "area"},
205
- {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
206
- {"caption", "cite", "code", "col", "colgroup"},
207
- {"dd", "del", "dfn", "div", "dl", "dt"},
208
- {"em"},
209
- {"fieldset", "font", "form", "frame", "frameset"},
210
- {}, // g
211
- {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
212
- {"i", "iframe", "img", "input", "ins"},
213
- {}, // j
214
- {"kbd"},
215
- {"label", "legend", "li", "link"},
216
- {"map", "meta"},
217
- {"noframes", "noscript"},
218
- {"object", "ol", "optgroup", "option"},
219
- {"p", "param", "pre"},
220
- {"q"},
221
- {}, // r
222
- {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
223
- {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
224
- {"u", "ul"},
225
- {"var"},
226
- {}, // w
227
- {}, // x
228
- {}, // y
229
- {} // z
230
- };
231
-
232
- private static boolean isValid(final String name) {
233
- int index = name.charAt(0) - 97;
234
- if (index >= element_names.length) return false;
235
- String[] elementNames = element_names[index];
236
- for (int i=0; i<elementNames.length; i++) {
237
- if (name.equals(elementNames[i])) {
238
- return true;
239
- }
240
- }
241
- return false;
242
- }
243
-
244
- @Override
245
- public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
246
- if (!isValid(name.rawname)) {
247
- errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
248
- }
249
- super.startElement(name, attrs, augs);
228
+
229
+ // element names from xhtml1-strict.dtd
230
+ private static String[][] element_names = {
231
+ {"a", "abbr", "acronym", "address", "area"},
232
+ {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
233
+ {"caption", "cite", "code", "col", "colgroup"},
234
+ {"dd", "del", "dfn", "div", "dl", "dt"},
235
+ {"em"},
236
+ {"fieldset", "font", "form", "frame", "frameset"},
237
+ {}, // g
238
+ {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
239
+ {"i", "iframe", "img", "input", "ins"},
240
+ {}, // j
241
+ {"kbd"},
242
+ {"label", "legend", "li", "link"},
243
+ {"map", "meta"},
244
+ {"noframes", "noscript"},
245
+ {"object", "ol", "optgroup", "option"},
246
+ {"p", "param", "pre"},
247
+ {"q"},
248
+ {}, // r
249
+ {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
250
+ {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
251
+ {"u", "ul"},
252
+ {"var"},
253
+ {}, // w
254
+ {}, // x
255
+ {}, // y
256
+ {} // z
257
+ };
258
+
259
+ private static boolean
260
+ isValid(final String name)
261
+ {
262
+ int index = name.charAt(0) - 97;
263
+ if (index >= element_names.length) { return false; }
264
+ String[] elementNames = element_names[index];
265
+ for (int i = 0; i < elementNames.length; i++) {
266
+ if (name.equals(elementNames[i])) {
267
+ return true;
250
268
  }
269
+ }
270
+ return false;
271
+ }
272
+
273
+ @Override
274
+ public void
275
+ startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException
276
+ {
277
+ if (!isValid(name.rawname)) {
278
+ errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
279
+ }
280
+ super.startElement(name, attrs, augs);
251
281
  }
282
+ }
252
283
  }
@@ -4,17 +4,24 @@ import org.xml.sax.ErrorHandler;
4
4
  import org.xml.sax.SAXException;
5
5
  import org.xml.sax.SAXParseException;
6
6
 
7
- public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler {
7
+ public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler
8
+ {
8
9
 
9
- @Override
10
- public void warning(SAXParseException exception) throws SAXException {
11
- }
10
+ @Override
11
+ public void
12
+ warning(SAXParseException exception) throws SAXException
13
+ {
14
+ }
12
15
 
13
- @Override
14
- public void error(SAXParseException exception) throws SAXException {
15
- }
16
+ @Override
17
+ public void
18
+ error(SAXParseException exception) throws SAXException
19
+ {
20
+ }
16
21
 
17
- @Override
18
- public void fatalError(SAXParseException exception) throws SAXException {
19
- }
22
+ @Override
23
+ public void
24
+ fatalError(SAXParseException exception) throws SAXException
25
+ {
26
+ }
20
27
  }