nokogiri 1.11.0.rc1-java → 1.11.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  7. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  8. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  12. data/ext/java/nokogiri/NokogiriService.java +597 -526
  13. data/ext/java/nokogiri/XmlAttr.java +120 -96
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  15. data/ext/java/nokogiri/XmlCdata.java +35 -26
  16. data/ext/java/nokogiri/XmlComment.java +48 -37
  17. data/ext/java/nokogiri/XmlDocument.java +642 -540
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  19. data/ext/java/nokogiri/XmlDtd.java +450 -384
  20. data/ext/java/nokogiri/XmlElement.java +25 -18
  21. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  22. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  23. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  24. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  25. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1590
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  29. data/ext/java/nokogiri/XmlReader.java +513 -418
  30. data/ext/java/nokogiri/XmlRelaxng.java +92 -72
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -280
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  33. data/ext/java/nokogiri/XmlSchema.java +335 -210
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -87
  35. data/ext/java/nokogiri/XmlText.java +57 -46
  36. data/ext/java/nokogiri/XmlXpathContext.java +242 -178
  37. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +203 -160
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -59
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +121 -48
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -22
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  93. data/ext/nokogiri/depend +37 -358
  94. data/ext/nokogiri/extconf.rb +585 -374
  95. data/ext/nokogiri/html_document.c +78 -82
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  99. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +192 -93
  102. data/ext/nokogiri/test_global_handlers.c +40 -0
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +225 -163
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +338 -286
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +195 -172
  120. data/ext/nokogiri/xml_relax_ng.c +52 -28
  121. data/ext/nokogiri/xml_sax_parser.c +118 -118
  122. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  123. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  124. data/ext/nokogiri/xml_schema.c +111 -34
  125. data/ext/nokogiri/xml_syntax_error.c +42 -21
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +206 -123
  128. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  129. data/lib/nokogiri.rb +4 -8
  130. data/lib/nokogiri/css/parser.rb +62 -62
  131. data/lib/nokogiri/css/parser.y +2 -2
  132. data/lib/nokogiri/css/parser_extras.rb +38 -36
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -148
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/builder.rb +2 -2
  142. data/lib/nokogiri/xml/document.rb +48 -18
  143. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  144. data/lib/nokogiri/xml/node.rb +599 -279
  145. data/lib/nokogiri/xml/parse_options.rb +6 -0
  146. data/lib/nokogiri/xml/reader.rb +2 -9
  147. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  148. data/lib/nokogiri/xml/schema.rb +12 -4
  149. data/lib/nokogiri/xml/searchable.rb +24 -16
  150. data/lib/nokogiri/xml/xpath.rb +1 -3
  151. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  152. metadata +87 -158
  153. data/ext/nokogiri/html_document.h +0 -10
  154. data/ext/nokogiri/html_element_description.h +0 -10
  155. data/ext/nokogiri/html_entity_lookup.h +0 -8
  156. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  157. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  158. data/ext/nokogiri/nokogiri.h +0 -122
  159. data/ext/nokogiri/xml_attr.h +0 -9
  160. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  161. data/ext/nokogiri/xml_cdata.h +0 -9
  162. data/ext/nokogiri/xml_comment.h +0 -9
  163. data/ext/nokogiri/xml_document.h +0 -23
  164. data/ext/nokogiri/xml_document_fragment.h +0 -10
  165. data/ext/nokogiri/xml_dtd.h +0 -10
  166. data/ext/nokogiri/xml_element_content.h +0 -10
  167. data/ext/nokogiri/xml_element_decl.h +0 -9
  168. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  169. data/ext/nokogiri/xml_entity_decl.h +0 -10
  170. data/ext/nokogiri/xml_entity_reference.h +0 -9
  171. data/ext/nokogiri/xml_io.c +0 -61
  172. data/ext/nokogiri/xml_io.h +0 -11
  173. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  174. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  175. data/ext/nokogiri/xml_namespace.h +0 -14
  176. data/ext/nokogiri/xml_node.h +0 -13
  177. data/ext/nokogiri/xml_node_set.h +0 -12
  178. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  179. data/ext/nokogiri/xml_reader.h +0 -10
  180. data/ext/nokogiri/xml_relax_ng.h +0 -9
  181. data/ext/nokogiri/xml_sax_parser.h +0 -39
  182. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  183. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_schema.h +0 -9
  185. data/ext/nokogiri/xml_syntax_error.h +0 -13
  186. data/ext/nokogiri/xml_text.h +0 -9
  187. data/ext/nokogiri/xml_xpath_context.h +0 -10
  188. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,9 +1,12 @@
1
1
  package nokogiri.internals;
2
2
 
3
3
  @SuppressWarnings("serial")
4
- public class ClosedStreamException extends Exception {
4
+ public class ClosedStreamException extends Exception
5
+ {
5
6
 
6
- public ClosedStreamException(String message) {
7
+ public
8
+ ClosedStreamException(String message)
9
+ {
7
10
  super(message);
8
11
  }
9
12
 
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -35,9 +35,11 @@ package nokogiri.internals;
35
35
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
36
  import static nokogiri.internals.NokogiriHelpers.isNamespace;
37
37
  import static nokogiri.internals.NokogiriHelpers.stringOrNil;
38
+
38
39
  import nokogiri.HtmlDocument;
39
40
  import nokogiri.NokogiriService;
40
41
  import nokogiri.XmlDocument;
42
+ import nokogiri.XmlSyntaxError;
41
43
 
42
44
  import org.apache.xerces.xni.Augmentations;
43
45
  import org.apache.xerces.xni.QName;
@@ -50,6 +52,7 @@ import org.cyberneko.html.filters.DefaultFilter;
50
52
  import org.jruby.Ruby;
51
53
  import org.jruby.RubyClass;
52
54
  import org.jruby.runtime.ThreadContext;
55
+ import org.jruby.runtime.Helpers;
53
56
  import org.jruby.runtime.builtin.IRubyObject;
54
57
  import org.w3c.dom.Document;
55
58
  import org.w3c.dom.NamedNodeMap;
@@ -58,183 +61,223 @@ import org.w3c.dom.NodeList;
58
61
 
59
62
  /**
60
63
  * Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
61
- *
64
+ *
62
65
  * @author sergio
63
66
  * @author Patrick Mahoney <pat@polycrystal.org>
64
67
  * @author Yoko Harada <yokolet@gmail.com>
65
68
  */
66
- public class HtmlDomParserContext extends XmlDomParserContext {
69
+ public class HtmlDomParserContext extends XmlDomParserContext
70
+ {
67
71
 
68
- public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
69
- super(runtime, options);
70
- }
71
-
72
- public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
73
- super(runtime, encoding, options);
74
- }
72
+ public
73
+ HtmlDomParserContext(Ruby runtime, IRubyObject options)
74
+ {
75
+ this(runtime, runtime.getNil(), options);
76
+ }
75
77
 
76
- @Override
77
- protected void initErrorHandler() {
78
- if (options.strict) {
79
- errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
80
- } else {
81
- errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning);
82
- }
83
- }
78
+ public
79
+ HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
80
+ {
81
+ super(runtime, encoding, options);
82
+ java_encoding = NokogiriHelpers.getValidEncoding(encoding);
83
+ }
84
84
 
85
- @Override
86
- protected void initParser(Ruby runtime) {
87
- XMLParserConfiguration config = new HTMLConfiguration();
88
- //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
89
- XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
90
- //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
91
- XMLDocumentFilter[] filters = { elementValidityCheckFilter};
92
-
93
- config.setErrorHandler(this.errorHandler);
94
-
95
- parser = new NokogiriDomParser(config);
96
-
97
- // see http://nekohtml.sourceforge.net/settings.html for details
98
- setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
99
- setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
100
- setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
101
- setProperty("http://cyberneko.org/html/properties/filters", filters);
102
- setFeature("http://cyberneko.org/html/features/report-errors", true);
103
- setFeature("http://xml.org/sax/features/namespaces", false);
104
- }
105
-
106
- @Override
107
- public void setEncoding(String encoding) {
108
- super.setEncoding(encoding);
85
+ @Override
86
+ protected void
87
+ initParser(Ruby runtime)
88
+ {
89
+ XMLParserConfiguration config = new HTMLConfiguration();
90
+ //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
91
+ XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
92
+ //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
93
+ XMLDocumentFilter[] filters = { elementValidityCheckFilter};
94
+
95
+ config.setErrorHandler(this.errorHandler);
96
+
97
+ parser = new NokogiriDomParser(config);
98
+
99
+ // see http://nekohtml.sourceforge.net/settings.html for details
100
+ setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
101
+ setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
102
+ setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
103
+ setProperty("http://cyberneko.org/html/properties/filters", filters);
104
+ setFeature("http://cyberneko.org/html/features/report-errors", true);
105
+ setFeature("http://xml.org/sax/features/namespaces", false);
106
+ }
107
+
108
+ @Override
109
+ public void
110
+ setEncoding(String encoding)
111
+ {
112
+ super.setEncoding(encoding);
113
+ }
114
+
115
+ /**
116
+ * Enable NekoHTML feature for balancing tags in a document fragment.
117
+ *
118
+ * This method is used in XmlNode#in_context method.
119
+ */
120
+ public void
121
+ enableDocumentFragment()
122
+ {
123
+ setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
124
+ }
125
+
126
+ @Override
127
+ public XmlDocument
128
+ parse(ThreadContext context, RubyClass klass, IRubyObject url)
129
+ {
130
+ XmlDocument xmlDoc = super.parse(context, klass, url);
131
+
132
+ // let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
133
+ // https://github.com/sparklemotion/nokogiri/issues/2130
134
+ if (!options.recover && errorHandler.getErrors().size() > 0) {
135
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
136
+ String exceptionMsg = String.format("%s: '%s'",
137
+ "Parser without recover option encountered error or warning",
138
+ errorHandler.getErrors().get(0));
139
+ xmlSyntaxError.setException(new Exception(exceptionMsg));
140
+ throw xmlSyntaxError.toThrowable();
109
141
  }
110
142
 
111
- /**
112
- * Enable NekoHTML feature for balancing tags in a document fragment.
113
- *
114
- * This method is used in XmlNode#in_context method.
115
- */
116
- public void enableDocumentFragment() {
117
- setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
143
+ return xmlDoc;
144
+ }
145
+
146
+ @Override
147
+ protected XmlDocument
148
+ wrapDocument(ThreadContext context, RubyClass klass, Document document)
149
+ {
150
+ HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
151
+ htmlDocument.setDocumentNode(context.runtime, document);
152
+ Helpers.invoke(context, htmlDocument, "initialize");
153
+
154
+ if (ruby_encoding.isNil()) {
155
+ // ruby_encoding might have detected by HtmlDocument::EncodingReader
156
+ if (detected_encoding != null && !detected_encoding.isNil()) {
157
+ ruby_encoding = detected_encoding;
158
+ } else {
159
+ // no encoding given & no encoding detected, then try to get it
160
+ String charset = tryGetCharsetFromHtml5MetaTag(document);
161
+ ruby_encoding = stringOrNil(context.runtime, charset);
162
+ }
118
163
  }
164
+ htmlDocument.setEncoding(ruby_encoding);
165
+ htmlDocument.setParsedEncoding(java_encoding);
166
+ return htmlDocument;
167
+ }
119
168
 
120
- @Override
121
- protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) {
122
- HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
123
- htmlDocument.setDocumentNode(context.runtime, document);
124
- if (ruby_encoding.isNil()) {
125
- // ruby_encoding might have detected by HtmlDocument::EncodingReader
126
- if (detected_encoding != null && !detected_encoding.isNil()) {
127
- ruby_encoding = detected_encoding;
128
- } else {
129
- // no encoding given & no encoding detected, then try to get it
130
- String charset = tryGetCharsetFromHtml5MetaTag(document);
131
- ruby_encoding = stringOrNil(context.runtime, charset);
169
+ // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
170
+ // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
171
+ // so, this method attempts to find the charset.
172
+ private static String
173
+ tryGetCharsetFromHtml5MetaTag(Document document)
174
+ {
175
+ if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) { return null; }
176
+ NodeList list = document.getDocumentElement().getChildNodes();
177
+ Node item;
178
+ for (int i = 0; i < list.getLength(); i++) {
179
+ if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
180
+ NodeList headers = item.getChildNodes();
181
+ for (int j = 0; j < headers.getLength(); j++) {
182
+ if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
183
+ NamedNodeMap nodeMap = item.getAttributes();
184
+ for (int k = 0; k < nodeMap.getLength(); k++) {
185
+ if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
186
+ return item.getNodeValue();
187
+ }
132
188
  }
189
+ }
133
190
  }
134
- htmlDocument.setEncoding(ruby_encoding);
135
- htmlDocument.setParsedEncoding(java_encoding);
136
- return htmlDocument;
191
+ }
137
192
  }
138
-
139
- // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
140
- // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
141
- // so, this method attempts to find the charset.
142
- private static String tryGetCharsetFromHtml5MetaTag(Document document) {
143
- if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null;
144
- NodeList list = document.getDocumentElement().getChildNodes(); Node item;
145
- for (int i = 0; i < list.getLength(); i++) {
146
- if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
147
- NodeList headers = item.getChildNodes();
148
- for (int j = 0; j < headers.getLength(); j++) {
149
- if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
150
- NamedNodeMap nodeMap = item.getAttributes();
151
- for (int k = 0; k < nodeMap.getLength(); k++) {
152
- if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
153
- return item.getNodeValue();
154
- }
155
- }
156
- }
157
- }
158
- }
193
+ return null;
194
+ }
195
+
196
+ /**
197
+ * Filter to strip out attributes that pertain to XML namespaces.
198
+ */
199
+ public static class RemoveNSAttrsFilter extends DefaultFilter
200
+ {
201
+ @Override
202
+ public void
203
+ startElement(QName element, XMLAttributes attrs,
204
+ Augmentations augs) throws XNIException
205
+ {
206
+ int i;
207
+ for (i = 0; i < attrs.getLength(); ++i) {
208
+ if (isNamespace(attrs.getQName(i))) {
209
+ attrs.removeAttributeAt(i);
210
+ --i;
159
211
  }
160
- return null;
212
+ }
213
+
214
+ element.uri = null;
215
+ super.startElement(element, attrs, augs);
161
216
  }
217
+ }
162
218
 
163
- /**
164
- * Filter to strip out attributes that pertain to XML namespaces.
165
- */
166
- public static class RemoveNSAttrsFilter extends DefaultFilter {
167
- @Override
168
- public void startElement(QName element, XMLAttributes attrs,
169
- Augmentations augs) throws XNIException {
170
- int i;
171
- for (i = 0; i < attrs.getLength(); ++i) {
172
- if (isNamespace(attrs.getQName(i))) {
173
- attrs.removeAttributeAt(i);
174
- --i;
175
- }
176
- }
219
+ public static class ElementValidityCheckFilter extends DefaultFilter
220
+ {
221
+ private NokogiriErrorHandler errorHandler;
177
222
 
178
- element.uri = null;
179
- super.startElement(element, attrs, augs);
180
- }
223
+ private
224
+ ElementValidityCheckFilter(NokogiriErrorHandler errorHandler)
225
+ {
226
+ this.errorHandler = errorHandler;
181
227
  }
182
-
183
- public static class ElementValidityCheckFilter extends DefaultFilter {
184
- private NokogiriErrorHandler errorHandler;
185
-
186
- private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) {
187
- this.errorHandler = errorHandler;
188
- }
189
-
190
- // element names from xhtml1-strict.dtd
191
- private static String[][] element_names = {
192
- {"a", "abbr", "acronym", "address", "area"},
193
- {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
194
- {"caption", "cite", "code", "col", "colgroup"},
195
- {"dd", "del", "dfn", "div", "dl", "dt"},
196
- {"em"},
197
- {"fieldset", "font", "form", "frame", "frameset"},
198
- {}, // g
199
- {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
200
- {"i", "iframe", "img", "input", "ins"},
201
- {}, // j
202
- {"kbd"},
203
- {"label", "legend", "li", "link"},
204
- {"map", "meta"},
205
- {"noframes", "noscript"},
206
- {"object", "ol", "optgroup", "option"},
207
- {"p", "param", "pre"},
208
- {"q"},
209
- {}, // r
210
- {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
211
- {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
212
- {"u", "ul"},
213
- {"var"},
214
- {}, // w
215
- {}, // x
216
- {}, // y
217
- {} // z
218
- };
219
-
220
- private static boolean isValid(final String name) {
221
- int index = name.charAt(0) - 97;
222
- if (index >= element_names.length) return false;
223
- String[] elementNames = element_names[index];
224
- for (int i=0; i<elementNames.length; i++) {
225
- if (name.equals(elementNames[i])) {
226
- return true;
227
- }
228
- }
229
- return false;
230
- }
231
-
232
- @Override
233
- public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
234
- if (!isValid(name.rawname)) {
235
- errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
236
- }
237
- super.startElement(name, attrs, augs);
228
+
229
+ // element names from xhtml1-strict.dtd
230
+ private static String[][] element_names = {
231
+ {"a", "abbr", "acronym", "address", "area"},
232
+ {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
233
+ {"caption", "cite", "code", "col", "colgroup"},
234
+ {"dd", "del", "dfn", "div", "dl", "dt"},
235
+ {"em"},
236
+ {"fieldset", "font", "form", "frame", "frameset"},
237
+ {}, // g
238
+ {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
239
+ {"i", "iframe", "img", "input", "ins"},
240
+ {}, // j
241
+ {"kbd"},
242
+ {"label", "legend", "li", "link"},
243
+ {"map", "meta"},
244
+ {"noframes", "noscript"},
245
+ {"object", "ol", "optgroup", "option"},
246
+ {"p", "param", "pre"},
247
+ {"q"},
248
+ {}, // r
249
+ {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
250
+ {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
251
+ {"u", "ul"},
252
+ {"var"},
253
+ {}, // w
254
+ {}, // x
255
+ {}, // y
256
+ {} // z
257
+ };
258
+
259
+ private static boolean
260
+ isValid(final String name)
261
+ {
262
+ int index = name.charAt(0) - 97;
263
+ if (index >= element_names.length) { return false; }
264
+ String[] elementNames = element_names[index];
265
+ for (int i = 0; i < elementNames.length; i++) {
266
+ if (name.equals(elementNames[i])) {
267
+ return true;
238
268
  }
269
+ }
270
+ return false;
271
+ }
272
+
273
+ @Override
274
+ public void
275
+ startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException
276
+ {
277
+ if (!isValid(name.rawname)) {
278
+ errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
279
+ }
280
+ super.startElement(name, attrs, augs);
239
281
  }
282
+ }
240
283
  }
@@ -4,17 +4,24 @@ import org.xml.sax.ErrorHandler;
4
4
  import org.xml.sax.SAXException;
5
5
  import org.xml.sax.SAXParseException;
6
6
 
7
- public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler {
7
+ public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler
8
+ {
8
9
 
9
- @Override
10
- public void warning(SAXParseException exception) throws SAXException {
11
- }
10
+ @Override
11
+ public void
12
+ warning(SAXParseException exception) throws SAXException
13
+ {
14
+ }
12
15
 
13
- @Override
14
- public void error(SAXParseException exception) throws SAXException {
15
- }
16
+ @Override
17
+ public void
18
+ error(SAXParseException exception) throws SAXException
19
+ {
20
+ }
16
21
 
17
- @Override
18
- public void fatalError(SAXParseException exception) throws SAXException {
19
- }
22
+ @Override
23
+ public void
24
+ fatalError(SAXParseException exception) throws SAXException
25
+ {
26
+ }
20
27
  }