nokogiri 1.11.0.rc2-java → 1.11.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  7. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  8. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  12. data/ext/java/nokogiri/NokogiriService.java +595 -556
  13. data/ext/java/nokogiri/XmlAttr.java +118 -126
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  15. data/ext/java/nokogiri/XmlCdata.java +35 -58
  16. data/ext/java/nokogiri/XmlComment.java +46 -67
  17. data/ext/java/nokogiri/XmlDocument.java +645 -572
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  19. data/ext/java/nokogiri/XmlDtd.java +448 -414
  20. data/ext/java/nokogiri/XmlElement.java +23 -48
  21. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  22. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  23. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  24. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  25. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1622
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  29. data/ext/java/nokogiri/XmlReader.java +513 -450
  30. data/ext/java/nokogiri/XmlRelaxng.java +89 -101
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  33. data/ext/java/nokogiri/XmlSchema.java +335 -242
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
  35. data/ext/java/nokogiri/XmlText.java +55 -76
  36. data/ext/java/nokogiri/XmlXpathContext.java +242 -210
  37. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  93. data/ext/nokogiri/depend +37 -358
  94. data/ext/nokogiri/extconf.rb +581 -374
  95. data/ext/nokogiri/html_document.c +78 -82
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  99. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +192 -93
  102. data/ext/nokogiri/test_global_handlers.c +40 -0
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +250 -187
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +371 -320
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +191 -157
  120. data/ext/nokogiri/xml_relax_ng.c +52 -28
  121. data/ext/nokogiri/xml_sax_parser.c +118 -118
  122. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  123. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  124. data/ext/nokogiri/xml_schema.c +95 -47
  125. data/ext/nokogiri/xml_syntax_error.c +42 -21
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +206 -123
  128. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  129. data/lib/nokogiri.rb +4 -8
  130. data/lib/nokogiri/css/parser.rb +62 -62
  131. data/lib/nokogiri/css/parser.y +2 -2
  132. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  133. data/lib/nokogiri/extension.rb +26 -0
  134. data/lib/nokogiri/html/document.rb +12 -26
  135. data/lib/nokogiri/html/document_fragment.rb +15 -15
  136. data/lib/nokogiri/nokogiri.jar +0 -0
  137. data/lib/nokogiri/version.rb +2 -148
  138. data/lib/nokogiri/version/constant.rb +5 -0
  139. data/lib/nokogiri/version/info.rb +205 -0
  140. data/lib/nokogiri/xml/builder.rb +2 -2
  141. data/lib/nokogiri/xml/document.rb +91 -35
  142. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  143. data/lib/nokogiri/xml/node.rb +89 -69
  144. data/lib/nokogiri/xml/parse_options.rb +6 -0
  145. data/lib/nokogiri/xml/reader.rb +2 -9
  146. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  147. data/lib/nokogiri/xml/schema.rb +12 -4
  148. data/lib/nokogiri/xml/searchable.rb +3 -1
  149. data/lib/nokogiri/xml/xpath.rb +1 -3
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  151. metadata +87 -164
  152. data/ext/nokogiri/html_document.h +0 -10
  153. data/ext/nokogiri/html_element_description.h +0 -10
  154. data/ext/nokogiri/html_entity_lookup.h +0 -8
  155. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  156. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  157. data/ext/nokogiri/nokogiri.h +0 -134
  158. data/ext/nokogiri/xml_attr.h +0 -9
  159. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  160. data/ext/nokogiri/xml_cdata.h +0 -9
  161. data/ext/nokogiri/xml_comment.h +0 -9
  162. data/ext/nokogiri/xml_document.h +0 -23
  163. data/ext/nokogiri/xml_document_fragment.h +0 -10
  164. data/ext/nokogiri/xml_dtd.h +0 -10
  165. data/ext/nokogiri/xml_element_content.h +0 -10
  166. data/ext/nokogiri/xml_element_decl.h +0 -9
  167. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  168. data/ext/nokogiri/xml_entity_decl.h +0 -10
  169. data/ext/nokogiri/xml_entity_reference.h +0 -9
  170. data/ext/nokogiri/xml_io.c +0 -61
  171. data/ext/nokogiri/xml_io.h +0 -11
  172. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  173. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  174. data/ext/nokogiri/xml_namespace.h +0 -14
  175. data/ext/nokogiri/xml_node.h +0 -13
  176. data/ext/nokogiri/xml_node_set.h +0 -12
  177. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  178. data/ext/nokogiri/xml_reader.h +0 -10
  179. data/ext/nokogiri/xml_relax_ng.h +0 -9
  180. data/ext/nokogiri/xml_sax_parser.h +0 -39
  181. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  182. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_schema.h +0 -9
  184. data/ext/nokogiri/xml_syntax_error.h +0 -13
  185. data/ext/nokogiri/xml_text.h +0 -9
  186. data/ext/nokogiri/xml_xpath_context.h +0 -10
  187. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,9 +1,12 @@
1
1
  package nokogiri.internals;
2
2
 
3
3
  @SuppressWarnings("serial")
4
- public class ClosedStreamException extends Exception {
4
+ public class ClosedStreamException extends Exception
5
+ {
5
6
 
6
- public ClosedStreamException(String message) {
7
+ public
8
+ ClosedStreamException(String message)
9
+ {
7
10
  super(message);
8
11
  }
9
12
 
@@ -1,43 +1,13 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri.internals;
34
2
 
35
3
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
4
  import static nokogiri.internals.NokogiriHelpers.isNamespace;
37
5
  import static nokogiri.internals.NokogiriHelpers.stringOrNil;
6
+
38
7
  import nokogiri.HtmlDocument;
39
8
  import nokogiri.NokogiriService;
40
9
  import nokogiri.XmlDocument;
10
+ import nokogiri.XmlSyntaxError;
41
11
 
42
12
  import org.apache.xerces.xni.Augmentations;
43
13
  import org.apache.xerces.xni.QName;
@@ -50,6 +20,7 @@ import org.cyberneko.html.filters.DefaultFilter;
50
20
  import org.jruby.Ruby;
51
21
  import org.jruby.RubyClass;
52
22
  import org.jruby.runtime.ThreadContext;
23
+ import org.jruby.runtime.Helpers;
53
24
  import org.jruby.runtime.builtin.IRubyObject;
54
25
  import org.w3c.dom.Document;
55
26
  import org.w3c.dom.NamedNodeMap;
@@ -58,183 +29,223 @@ import org.w3c.dom.NodeList;
58
29
 
59
30
  /**
60
31
  * Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
61
- *
32
+ *
62
33
  * @author sergio
63
34
  * @author Patrick Mahoney <pat@polycrystal.org>
64
35
  * @author Yoko Harada <yokolet@gmail.com>
65
36
  */
66
- public class HtmlDomParserContext extends XmlDomParserContext {
37
+ public class HtmlDomParserContext extends XmlDomParserContext
38
+ {
67
39
 
68
- public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
69
- super(runtime, options);
70
- }
71
-
72
- public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
73
- super(runtime, encoding, options);
74
- }
40
+ public
41
+ HtmlDomParserContext(Ruby runtime, IRubyObject options)
42
+ {
43
+ this(runtime, runtime.getNil(), options);
44
+ }
75
45
 
76
- @Override
77
- protected void initErrorHandler() {
78
- if (options.strict) {
79
- errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
80
- } else {
81
- errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning);
82
- }
83
- }
46
+ public
47
+ HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
48
+ {
49
+ super(runtime, encoding, options);
50
+ java_encoding = NokogiriHelpers.getValidEncoding(encoding);
51
+ }
84
52
 
85
- @Override
86
- protected void initParser(Ruby runtime) {
87
- XMLParserConfiguration config = new HTMLConfiguration();
88
- //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
89
- XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
90
- //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
91
- XMLDocumentFilter[] filters = { elementValidityCheckFilter};
92
-
93
- config.setErrorHandler(this.errorHandler);
94
-
95
- parser = new NokogiriDomParser(config);
96
-
97
- // see http://nekohtml.sourceforge.net/settings.html for details
98
- setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
99
- setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
100
- setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
101
- setProperty("http://cyberneko.org/html/properties/filters", filters);
102
- setFeature("http://cyberneko.org/html/features/report-errors", true);
103
- setFeature("http://xml.org/sax/features/namespaces", false);
104
- }
105
-
106
- @Override
107
- public void setEncoding(String encoding) {
108
- super.setEncoding(encoding);
53
+ @Override
54
+ protected void
55
+ initParser(Ruby runtime)
56
+ {
57
+ XMLParserConfiguration config = new HTMLConfiguration();
58
+ //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
59
+ XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
60
+ //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
61
+ XMLDocumentFilter[] filters = { elementValidityCheckFilter};
62
+
63
+ config.setErrorHandler(this.errorHandler);
64
+
65
+ parser = new NokogiriDomParser(config);
66
+
67
+ // see http://nekohtml.sourceforge.net/settings.html for details
68
+ setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
69
+ setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
70
+ setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
71
+ setProperty("http://cyberneko.org/html/properties/filters", filters);
72
+ setFeature("http://cyberneko.org/html/features/report-errors", true);
73
+ setFeature("http://xml.org/sax/features/namespaces", false);
74
+ }
75
+
76
+ @Override
77
+ public void
78
+ setEncoding(String encoding)
79
+ {
80
+ super.setEncoding(encoding);
81
+ }
82
+
83
+ /**
84
+ * Enable NekoHTML feature for balancing tags in a document fragment.
85
+ *
86
+ * This method is used in XmlNode#in_context method.
87
+ */
88
+ public void
89
+ enableDocumentFragment()
90
+ {
91
+ setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
92
+ }
93
+
94
+ @Override
95
+ public XmlDocument
96
+ parse(ThreadContext context, RubyClass klass, IRubyObject url)
97
+ {
98
+ XmlDocument xmlDoc = super.parse(context, klass, url);
99
+
100
+ // let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
101
+ // https://github.com/sparklemotion/nokogiri/issues/2130
102
+ if (!options.recover && errorHandler.getErrors().size() > 0) {
103
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
104
+ String exceptionMsg = String.format("%s: '%s'",
105
+ "Parser without recover option encountered error or warning",
106
+ errorHandler.getErrors().get(0));
107
+ xmlSyntaxError.setException(new Exception(exceptionMsg));
108
+ throw xmlSyntaxError.toThrowable();
109
109
  }
110
110
 
111
- /**
112
- * Enable NekoHTML feature for balancing tags in a document fragment.
113
- *
114
- * This method is used in XmlNode#in_context method.
115
- */
116
- public void enableDocumentFragment() {
117
- setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
111
+ return xmlDoc;
112
+ }
113
+
114
+ @Override
115
+ protected XmlDocument
116
+ wrapDocument(ThreadContext context, RubyClass klass, Document document)
117
+ {
118
+ HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
119
+ htmlDocument.setDocumentNode(context.runtime, document);
120
+ Helpers.invoke(context, htmlDocument, "initialize");
121
+
122
+ if (ruby_encoding.isNil()) {
123
+ // ruby_encoding might have detected by HtmlDocument::EncodingReader
124
+ if (detected_encoding != null && !detected_encoding.isNil()) {
125
+ ruby_encoding = detected_encoding;
126
+ } else {
127
+ // no encoding given & no encoding detected, then try to get it
128
+ String charset = tryGetCharsetFromHtml5MetaTag(document);
129
+ ruby_encoding = stringOrNil(context.runtime, charset);
130
+ }
118
131
  }
132
+ htmlDocument.setEncoding(ruby_encoding);
133
+ htmlDocument.setParsedEncoding(java_encoding);
134
+ return htmlDocument;
135
+ }
119
136
 
120
- @Override
121
- protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) {
122
- HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
123
- htmlDocument.setDocumentNode(context.runtime, document);
124
- if (ruby_encoding.isNil()) {
125
- // ruby_encoding might have detected by HtmlDocument::EncodingReader
126
- if (detected_encoding != null && !detected_encoding.isNil()) {
127
- ruby_encoding = detected_encoding;
128
- } else {
129
- // no encoding given & no encoding detected, then try to get it
130
- String charset = tryGetCharsetFromHtml5MetaTag(document);
131
- ruby_encoding = stringOrNil(context.runtime, charset);
137
+ // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
138
+ // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
139
+ // so, this method attempts to find the charset.
140
+ private static String
141
+ tryGetCharsetFromHtml5MetaTag(Document document)
142
+ {
143
+ if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) { return null; }
144
+ NodeList list = document.getDocumentElement().getChildNodes();
145
+ Node item;
146
+ for (int i = 0; i < list.getLength(); i++) {
147
+ if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
148
+ NodeList headers = item.getChildNodes();
149
+ for (int j = 0; j < headers.getLength(); j++) {
150
+ if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
151
+ NamedNodeMap nodeMap = item.getAttributes();
152
+ for (int k = 0; k < nodeMap.getLength(); k++) {
153
+ if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
154
+ return item.getNodeValue();
155
+ }
132
156
  }
157
+ }
133
158
  }
134
- htmlDocument.setEncoding(ruby_encoding);
135
- htmlDocument.setParsedEncoding(java_encoding);
136
- return htmlDocument;
159
+ }
137
160
  }
138
-
139
- // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
140
- // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
141
- // so, this method attempts to find the charset.
142
- private static String tryGetCharsetFromHtml5MetaTag(Document document) {
143
- if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null;
144
- NodeList list = document.getDocumentElement().getChildNodes(); Node item;
145
- for (int i = 0; i < list.getLength(); i++) {
146
- if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
147
- NodeList headers = item.getChildNodes();
148
- for (int j = 0; j < headers.getLength(); j++) {
149
- if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
150
- NamedNodeMap nodeMap = item.getAttributes();
151
- for (int k = 0; k < nodeMap.getLength(); k++) {
152
- if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
153
- return item.getNodeValue();
154
- }
155
- }
156
- }
157
- }
158
- }
161
+ return null;
162
+ }
163
+
164
+ /**
165
+ * Filter to strip out attributes that pertain to XML namespaces.
166
+ */
167
+ public static class RemoveNSAttrsFilter extends DefaultFilter
168
+ {
169
+ @Override
170
+ public void
171
+ startElement(QName element, XMLAttributes attrs,
172
+ Augmentations augs) throws XNIException
173
+ {
174
+ int i;
175
+ for (i = 0; i < attrs.getLength(); ++i) {
176
+ if (isNamespace(attrs.getQName(i))) {
177
+ attrs.removeAttributeAt(i);
178
+ --i;
159
179
  }
160
- return null;
180
+ }
181
+
182
+ element.uri = null;
183
+ super.startElement(element, attrs, augs);
161
184
  }
185
+ }
162
186
 
163
- /**
164
- * Filter to strip out attributes that pertain to XML namespaces.
165
- */
166
- public static class RemoveNSAttrsFilter extends DefaultFilter {
167
- @Override
168
- public void startElement(QName element, XMLAttributes attrs,
169
- Augmentations augs) throws XNIException {
170
- int i;
171
- for (i = 0; i < attrs.getLength(); ++i) {
172
- if (isNamespace(attrs.getQName(i))) {
173
- attrs.removeAttributeAt(i);
174
- --i;
175
- }
176
- }
187
+ public static class ElementValidityCheckFilter extends DefaultFilter
188
+ {
189
+ private NokogiriErrorHandler errorHandler;
177
190
 
178
- element.uri = null;
179
- super.startElement(element, attrs, augs);
180
- }
191
+ private
192
+ ElementValidityCheckFilter(NokogiriErrorHandler errorHandler)
193
+ {
194
+ this.errorHandler = errorHandler;
181
195
  }
182
-
183
- public static class ElementValidityCheckFilter extends DefaultFilter {
184
- private NokogiriErrorHandler errorHandler;
185
-
186
- private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) {
187
- this.errorHandler = errorHandler;
188
- }
189
-
190
- // element names from xhtml1-strict.dtd
191
- private static String[][] element_names = {
192
- {"a", "abbr", "acronym", "address", "area"},
193
- {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
194
- {"caption", "cite", "code", "col", "colgroup"},
195
- {"dd", "del", "dfn", "div", "dl", "dt"},
196
- {"em"},
197
- {"fieldset", "font", "form", "frame", "frameset"},
198
- {}, // g
199
- {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
200
- {"i", "iframe", "img", "input", "ins"},
201
- {}, // j
202
- {"kbd"},
203
- {"label", "legend", "li", "link"},
204
- {"map", "meta"},
205
- {"noframes", "noscript"},
206
- {"object", "ol", "optgroup", "option"},
207
- {"p", "param", "pre"},
208
- {"q"},
209
- {}, // r
210
- {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
211
- {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
212
- {"u", "ul"},
213
- {"var"},
214
- {}, // w
215
- {}, // x
216
- {}, // y
217
- {} // z
218
- };
219
-
220
- private static boolean isValid(final String name) {
221
- int index = name.charAt(0) - 97;
222
- if (index >= element_names.length) return false;
223
- String[] elementNames = element_names[index];
224
- for (int i=0; i<elementNames.length; i++) {
225
- if (name.equals(elementNames[i])) {
226
- return true;
227
- }
228
- }
229
- return false;
230
- }
231
-
232
- @Override
233
- public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
234
- if (!isValid(name.rawname)) {
235
- errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
236
- }
237
- super.startElement(name, attrs, augs);
196
+
197
+ // element names from xhtml1-strict.dtd
198
+ private static String[][] element_names = {
199
+ {"a", "abbr", "acronym", "address", "area"},
200
+ {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
201
+ {"caption", "cite", "code", "col", "colgroup"},
202
+ {"dd", "del", "dfn", "div", "dl", "dt"},
203
+ {"em"},
204
+ {"fieldset", "font", "form", "frame", "frameset"},
205
+ {}, // g
206
+ {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
207
+ {"i", "iframe", "img", "input", "ins"},
208
+ {}, // j
209
+ {"kbd"},
210
+ {"label", "legend", "li", "link"},
211
+ {"map", "meta"},
212
+ {"noframes", "noscript"},
213
+ {"object", "ol", "optgroup", "option"},
214
+ {"p", "param", "pre"},
215
+ {"q"},
216
+ {}, // r
217
+ {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
218
+ {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
219
+ {"u", "ul"},
220
+ {"var"},
221
+ {}, // w
222
+ {}, // x
223
+ {}, // y
224
+ {} // z
225
+ };
226
+
227
+ private static boolean
228
+ isValid(final String name)
229
+ {
230
+ int index = name.charAt(0) - 97;
231
+ if (index >= element_names.length) { return false; }
232
+ String[] elementNames = element_names[index];
233
+ for (int i = 0; i < elementNames.length; i++) {
234
+ if (name.equals(elementNames[i])) {
235
+ return true;
238
236
  }
237
+ }
238
+ return false;
239
+ }
240
+
241
+ @Override
242
+ public void
243
+ startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException
244
+ {
245
+ if (!isValid(name.rawname)) {
246
+ errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
247
+ }
248
+ super.startElement(name, attrs, augs);
239
249
  }
250
+ }
240
251
  }
@@ -4,17 +4,24 @@ import org.xml.sax.ErrorHandler;
4
4
  import org.xml.sax.SAXException;
5
5
  import org.xml.sax.SAXParseException;
6
6
 
7
- public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler {
7
+ public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler
8
+ {
8
9
 
9
- @Override
10
- public void warning(SAXParseException exception) throws SAXException {
11
- }
10
+ @Override
11
+ public void
12
+ warning(SAXParseException exception) throws SAXException
13
+ {
14
+ }
12
15
 
13
- @Override
14
- public void error(SAXParseException exception) throws SAXException {
15
- }
16
+ @Override
17
+ public void
18
+ error(SAXParseException exception) throws SAXException
19
+ {
20
+ }
16
21
 
17
- @Override
18
- public void fatalError(SAXParseException exception) throws SAXException {
19
- }
22
+ @Override
23
+ public void
24
+ fatalError(SAXParseException exception) throws SAXException
25
+ {
26
+ }
20
27
  }