nokogiri 1.11.0.rc3-java → 1.11.4-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (187) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/dependencies.yml +12 -12
  7. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  8. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  9. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  10. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  11. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  12. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  13. data/ext/java/nokogiri/NokogiriService.java +595 -556
  14. data/ext/java/nokogiri/XmlAttr.java +118 -126
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  16. data/ext/java/nokogiri/XmlCdata.java +35 -58
  17. data/ext/java/nokogiri/XmlComment.java +46 -67
  18. data/ext/java/nokogiri/XmlDocument.java +645 -572
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  20. data/ext/java/nokogiri/XmlDtd.java +448 -414
  21. data/ext/java/nokogiri/XmlElement.java +23 -48
  22. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  23. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  24. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  25. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  26. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  27. data/ext/java/nokogiri/XmlNode.java +1843 -1622
  28. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  30. data/ext/java/nokogiri/XmlReader.java +513 -450
  31. data/ext/java/nokogiri/XmlRelaxng.java +89 -101
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  34. data/ext/java/nokogiri/XmlSchema.java +335 -242
  35. data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
  36. data/ext/java/nokogiri/XmlText.java +55 -76
  37. data/ext/java/nokogiri/XmlXpathContext.java +242 -210
  38. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  39. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  40. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
  41. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  42. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  43. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  44. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  45. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  46. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  47. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  52. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
  55. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  56. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  57. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  58. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  59. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  60. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  61. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  62. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  63. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  64. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  65. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  66. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  67. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  68. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  82. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  83. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  84. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  85. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  86. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  87. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  88. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  89. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  90. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  91. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  94. data/ext/nokogiri/depend +37 -358
  95. data/ext/nokogiri/extconf.rb +581 -374
  96. data/ext/nokogiri/html_document.c +78 -82
  97. data/ext/nokogiri/html_element_description.c +84 -71
  98. data/ext/nokogiri/html_entity_lookup.c +21 -16
  99. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  100. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  101. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  102. data/ext/nokogiri/nokogiri.c +192 -93
  103. data/ext/nokogiri/test_global_handlers.c +40 -0
  104. data/ext/nokogiri/xml_attr.c +15 -15
  105. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  106. data/ext/nokogiri/xml_cdata.c +13 -18
  107. data/ext/nokogiri/xml_comment.c +19 -26
  108. data/ext/nokogiri/xml_document.c +246 -188
  109. data/ext/nokogiri/xml_document_fragment.c +13 -15
  110. data/ext/nokogiri/xml_dtd.c +54 -48
  111. data/ext/nokogiri/xml_element_content.c +30 -27
  112. data/ext/nokogiri/xml_element_decl.c +22 -22
  113. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  114. data/ext/nokogiri/xml_entity_decl.c +32 -30
  115. data/ext/nokogiri/xml_entity_reference.c +16 -18
  116. data/ext/nokogiri/xml_namespace.c +56 -49
  117. data/ext/nokogiri/xml_node.c +371 -320
  118. data/ext/nokogiri/xml_node_set.c +168 -156
  119. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  120. data/ext/nokogiri/xml_reader.c +191 -157
  121. data/ext/nokogiri/xml_relax_ng.c +52 -28
  122. data/ext/nokogiri/xml_sax_parser.c +118 -118
  123. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  124. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  125. data/ext/nokogiri/xml_schema.c +95 -47
  126. data/ext/nokogiri/xml_syntax_error.c +42 -21
  127. data/ext/nokogiri/xml_text.c +13 -17
  128. data/ext/nokogiri/xml_xpath_context.c +206 -123
  129. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  130. data/lib/nokogiri.rb +3 -7
  131. data/lib/nokogiri/css/parser.rb +3 -3
  132. data/lib/nokogiri/css/parser.y +2 -2
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -149
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/document.rb +91 -35
  142. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  143. data/lib/nokogiri/xml/node.rb +89 -69
  144. data/lib/nokogiri/xml/parse_options.rb +6 -0
  145. data/lib/nokogiri/xml/reader.rb +2 -9
  146. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  147. data/lib/nokogiri/xml/schema.rb +12 -4
  148. data/lib/nokogiri/xml/searchable.rb +3 -1
  149. data/lib/nokogiri/xml/xpath.rb +1 -3
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  151. metadata +86 -177
  152. data/ext/nokogiri/html_document.h +0 -10
  153. data/ext/nokogiri/html_element_description.h +0 -10
  154. data/ext/nokogiri/html_entity_lookup.h +0 -8
  155. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  156. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  157. data/ext/nokogiri/nokogiri.h +0 -134
  158. data/ext/nokogiri/xml_attr.h +0 -9
  159. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  160. data/ext/nokogiri/xml_cdata.h +0 -9
  161. data/ext/nokogiri/xml_comment.h +0 -9
  162. data/ext/nokogiri/xml_document.h +0 -23
  163. data/ext/nokogiri/xml_document_fragment.h +0 -10
  164. data/ext/nokogiri/xml_dtd.h +0 -10
  165. data/ext/nokogiri/xml_element_content.h +0 -10
  166. data/ext/nokogiri/xml_element_decl.h +0 -9
  167. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  168. data/ext/nokogiri/xml_entity_decl.h +0 -10
  169. data/ext/nokogiri/xml_entity_reference.h +0 -9
  170. data/ext/nokogiri/xml_io.c +0 -63
  171. data/ext/nokogiri/xml_io.h +0 -11
  172. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  173. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  174. data/ext/nokogiri/xml_namespace.h +0 -14
  175. data/ext/nokogiri/xml_node.h +0 -13
  176. data/ext/nokogiri/xml_node_set.h +0 -12
  177. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  178. data/ext/nokogiri/xml_reader.h +0 -10
  179. data/ext/nokogiri/xml_relax_ng.h +0 -9
  180. data/ext/nokogiri/xml_sax_parser.h +0 -39
  181. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  182. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_schema.h +0 -9
  184. data/ext/nokogiri/xml_syntax_error.h +0 -13
  185. data/ext/nokogiri/xml_text.h +0 -9
  186. data/ext/nokogiri/xml_xpath_context.h +0 -10
  187. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,9 +1,12 @@
1
1
  package nokogiri.internals;
2
2
 
3
3
  @SuppressWarnings("serial")
4
- public class ClosedStreamException extends Exception {
4
+ public class ClosedStreamException extends Exception
5
+ {
5
6
 
6
- public ClosedStreamException(String message) {
7
+ public
8
+ ClosedStreamException(String message)
9
+ {
7
10
  super(message);
8
11
  }
9
12
 
@@ -1,43 +1,13 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri.internals;
34
2
 
35
3
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
4
  import static nokogiri.internals.NokogiriHelpers.isNamespace;
37
5
  import static nokogiri.internals.NokogiriHelpers.stringOrNil;
6
+
38
7
  import nokogiri.HtmlDocument;
39
8
  import nokogiri.NokogiriService;
40
9
  import nokogiri.XmlDocument;
10
+ import nokogiri.XmlSyntaxError;
41
11
 
42
12
  import org.apache.xerces.xni.Augmentations;
43
13
  import org.apache.xerces.xni.QName;
@@ -50,6 +20,7 @@ import org.cyberneko.html.filters.DefaultFilter;
50
20
  import org.jruby.Ruby;
51
21
  import org.jruby.RubyClass;
52
22
  import org.jruby.runtime.ThreadContext;
23
+ import org.jruby.runtime.Helpers;
53
24
  import org.jruby.runtime.builtin.IRubyObject;
54
25
  import org.w3c.dom.Document;
55
26
  import org.w3c.dom.NamedNodeMap;
@@ -58,183 +29,223 @@ import org.w3c.dom.NodeList;
58
29
 
59
30
  /**
60
31
  * Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
61
- *
32
+ *
62
33
  * @author sergio
63
34
  * @author Patrick Mahoney <pat@polycrystal.org>
64
35
  * @author Yoko Harada <yokolet@gmail.com>
65
36
  */
66
- public class HtmlDomParserContext extends XmlDomParserContext {
37
+ public class HtmlDomParserContext extends XmlDomParserContext
38
+ {
67
39
 
68
- public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
69
- super(runtime, options);
70
- }
71
-
72
- public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
73
- super(runtime, encoding, options);
74
- }
40
+ public
41
+ HtmlDomParserContext(Ruby runtime, IRubyObject options)
42
+ {
43
+ this(runtime, runtime.getNil(), options);
44
+ }
75
45
 
76
- @Override
77
- protected void initErrorHandler() {
78
- if (options.strict) {
79
- errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
80
- } else {
81
- errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning);
82
- }
83
- }
46
+ public
47
+ HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
48
+ {
49
+ super(runtime, encoding, options);
50
+ java_encoding = NokogiriHelpers.getValidEncoding(encoding);
51
+ }
84
52
 
85
- @Override
86
- protected void initParser(Ruby runtime) {
87
- XMLParserConfiguration config = new HTMLConfiguration();
88
- //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
89
- XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
90
- //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
91
- XMLDocumentFilter[] filters = { elementValidityCheckFilter};
92
-
93
- config.setErrorHandler(this.errorHandler);
94
-
95
- parser = new NokogiriDomParser(config);
96
-
97
- // see http://nekohtml.sourceforge.net/settings.html for details
98
- setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
99
- setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
100
- setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
101
- setProperty("http://cyberneko.org/html/properties/filters", filters);
102
- setFeature("http://cyberneko.org/html/features/report-errors", true);
103
- setFeature("http://xml.org/sax/features/namespaces", false);
104
- }
105
-
106
- @Override
107
- public void setEncoding(String encoding) {
108
- super.setEncoding(encoding);
53
+ @Override
54
+ protected void
55
+ initParser(Ruby runtime)
56
+ {
57
+ XMLParserConfiguration config = new HTMLConfiguration();
58
+ //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
59
+ XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
60
+ //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
61
+ XMLDocumentFilter[] filters = { elementValidityCheckFilter};
62
+
63
+ config.setErrorHandler(this.errorHandler);
64
+
65
+ parser = new NokogiriDomParser(config);
66
+
67
+ // see http://nekohtml.sourceforge.net/settings.html for details
68
+ setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
69
+ setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
70
+ setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
71
+ setProperty("http://cyberneko.org/html/properties/filters", filters);
72
+ setFeature("http://cyberneko.org/html/features/report-errors", true);
73
+ setFeature("http://xml.org/sax/features/namespaces", false);
74
+ }
75
+
76
+ @Override
77
+ public void
78
+ setEncoding(String encoding)
79
+ {
80
+ super.setEncoding(encoding);
81
+ }
82
+
83
+ /**
84
+ * Enable NekoHTML feature for balancing tags in a document fragment.
85
+ *
86
+ * This method is used in XmlNode#in_context method.
87
+ */
88
+ public void
89
+ enableDocumentFragment()
90
+ {
91
+ setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
92
+ }
93
+
94
+ @Override
95
+ public XmlDocument
96
+ parse(ThreadContext context, RubyClass klass, IRubyObject url)
97
+ {
98
+ XmlDocument xmlDoc = super.parse(context, klass, url);
99
+
100
+ // let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
101
+ // https://github.com/sparklemotion/nokogiri/issues/2130
102
+ if (!options.recover && errorHandler.getErrors().size() > 0) {
103
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
104
+ String exceptionMsg = String.format("%s: '%s'",
105
+ "Parser without recover option encountered error or warning",
106
+ errorHandler.getErrors().get(0));
107
+ xmlSyntaxError.setException(new Exception(exceptionMsg));
108
+ throw xmlSyntaxError.toThrowable();
109
109
  }
110
110
 
111
- /**
112
- * Enable NekoHTML feature for balancing tags in a document fragment.
113
- *
114
- * This method is used in XmlNode#in_context method.
115
- */
116
- public void enableDocumentFragment() {
117
- setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
111
+ return xmlDoc;
112
+ }
113
+
114
+ @Override
115
+ protected XmlDocument
116
+ wrapDocument(ThreadContext context, RubyClass klass, Document document)
117
+ {
118
+ HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
119
+ htmlDocument.setDocumentNode(context.runtime, document);
120
+ Helpers.invoke(context, htmlDocument, "initialize");
121
+
122
+ if (ruby_encoding.isNil()) {
123
+ // ruby_encoding might have detected by HtmlDocument::EncodingReader
124
+ if (detected_encoding != null && !detected_encoding.isNil()) {
125
+ ruby_encoding = detected_encoding;
126
+ } else {
127
+ // no encoding given & no encoding detected, then try to get it
128
+ String charset = tryGetCharsetFromHtml5MetaTag(document);
129
+ ruby_encoding = stringOrNil(context.runtime, charset);
130
+ }
118
131
  }
132
+ htmlDocument.setEncoding(ruby_encoding);
133
+ htmlDocument.setParsedEncoding(java_encoding);
134
+ return htmlDocument;
135
+ }
119
136
 
120
- @Override
121
- protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) {
122
- HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
123
- htmlDocument.setDocumentNode(context.runtime, document);
124
- if (ruby_encoding.isNil()) {
125
- // ruby_encoding might have detected by HtmlDocument::EncodingReader
126
- if (detected_encoding != null && !detected_encoding.isNil()) {
127
- ruby_encoding = detected_encoding;
128
- } else {
129
- // no encoding given & no encoding detected, then try to get it
130
- String charset = tryGetCharsetFromHtml5MetaTag(document);
131
- ruby_encoding = stringOrNil(context.runtime, charset);
137
+ // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
138
+ // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
139
+ // so, this method attempts to find the charset.
140
+ private static String
141
+ tryGetCharsetFromHtml5MetaTag(Document document)
142
+ {
143
+ if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) { return null; }
144
+ NodeList list = document.getDocumentElement().getChildNodes();
145
+ Node item;
146
+ for (int i = 0; i < list.getLength(); i++) {
147
+ if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
148
+ NodeList headers = item.getChildNodes();
149
+ for (int j = 0; j < headers.getLength(); j++) {
150
+ if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
151
+ NamedNodeMap nodeMap = item.getAttributes();
152
+ for (int k = 0; k < nodeMap.getLength(); k++) {
153
+ if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
154
+ return item.getNodeValue();
155
+ }
132
156
  }
157
+ }
133
158
  }
134
- htmlDocument.setEncoding(ruby_encoding);
135
- htmlDocument.setParsedEncoding(java_encoding);
136
- return htmlDocument;
159
+ }
137
160
  }
138
-
139
- // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
140
- // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
141
- // so, this method attempts to find the charset.
142
- private static String tryGetCharsetFromHtml5MetaTag(Document document) {
143
- if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null;
144
- NodeList list = document.getDocumentElement().getChildNodes(); Node item;
145
- for (int i = 0; i < list.getLength(); i++) {
146
- if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
147
- NodeList headers = item.getChildNodes();
148
- for (int j = 0; j < headers.getLength(); j++) {
149
- if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
150
- NamedNodeMap nodeMap = item.getAttributes();
151
- for (int k = 0; k < nodeMap.getLength(); k++) {
152
- if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
153
- return item.getNodeValue();
154
- }
155
- }
156
- }
157
- }
158
- }
161
+ return null;
162
+ }
163
+
164
+ /**
165
+ * Filter to strip out attributes that pertain to XML namespaces.
166
+ */
167
+ public static class RemoveNSAttrsFilter extends DefaultFilter
168
+ {
169
+ @Override
170
+ public void
171
+ startElement(QName element, XMLAttributes attrs,
172
+ Augmentations augs) throws XNIException
173
+ {
174
+ int i;
175
+ for (i = 0; i < attrs.getLength(); ++i) {
176
+ if (isNamespace(attrs.getQName(i))) {
177
+ attrs.removeAttributeAt(i);
178
+ --i;
159
179
  }
160
- return null;
180
+ }
181
+
182
+ element.uri = null;
183
+ super.startElement(element, attrs, augs);
161
184
  }
185
+ }
162
186
 
163
- /**
164
- * Filter to strip out attributes that pertain to XML namespaces.
165
- */
166
- public static class RemoveNSAttrsFilter extends DefaultFilter {
167
- @Override
168
- public void startElement(QName element, XMLAttributes attrs,
169
- Augmentations augs) throws XNIException {
170
- int i;
171
- for (i = 0; i < attrs.getLength(); ++i) {
172
- if (isNamespace(attrs.getQName(i))) {
173
- attrs.removeAttributeAt(i);
174
- --i;
175
- }
176
- }
187
+ public static class ElementValidityCheckFilter extends DefaultFilter
188
+ {
189
+ private NokogiriErrorHandler errorHandler;
177
190
 
178
- element.uri = null;
179
- super.startElement(element, attrs, augs);
180
- }
191
+ private
192
+ ElementValidityCheckFilter(NokogiriErrorHandler errorHandler)
193
+ {
194
+ this.errorHandler = errorHandler;
181
195
  }
182
-
183
- public static class ElementValidityCheckFilter extends DefaultFilter {
184
- private NokogiriErrorHandler errorHandler;
185
-
186
- private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) {
187
- this.errorHandler = errorHandler;
188
- }
189
-
190
- // element names from xhtml1-strict.dtd
191
- private static String[][] element_names = {
192
- {"a", "abbr", "acronym", "address", "area"},
193
- {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
194
- {"caption", "cite", "code", "col", "colgroup"},
195
- {"dd", "del", "dfn", "div", "dl", "dt"},
196
- {"em"},
197
- {"fieldset", "font", "form", "frame", "frameset"},
198
- {}, // g
199
- {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
200
- {"i", "iframe", "img", "input", "ins"},
201
- {}, // j
202
- {"kbd"},
203
- {"label", "legend", "li", "link"},
204
- {"map", "meta"},
205
- {"noframes", "noscript"},
206
- {"object", "ol", "optgroup", "option"},
207
- {"p", "param", "pre"},
208
- {"q"},
209
- {}, // r
210
- {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
211
- {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
212
- {"u", "ul"},
213
- {"var"},
214
- {}, // w
215
- {}, // x
216
- {}, // y
217
- {} // z
218
- };
219
-
220
- private static boolean isValid(final String name) {
221
- int index = name.charAt(0) - 97;
222
- if (index >= element_names.length) return false;
223
- String[] elementNames = element_names[index];
224
- for (int i=0; i<elementNames.length; i++) {
225
- if (name.equals(elementNames[i])) {
226
- return true;
227
- }
228
- }
229
- return false;
230
- }
231
-
232
- @Override
233
- public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
234
- if (!isValid(name.rawname)) {
235
- errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
236
- }
237
- super.startElement(name, attrs, augs);
196
+
197
+ // element names from xhtml1-strict.dtd
198
+ private static String[][] element_names = {
199
+ {"a", "abbr", "acronym", "address", "area"},
200
+ {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
201
+ {"caption", "cite", "code", "col", "colgroup"},
202
+ {"dd", "del", "dfn", "div", "dl", "dt"},
203
+ {"em"},
204
+ {"fieldset", "font", "form", "frame", "frameset"},
205
+ {}, // g
206
+ {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
207
+ {"i", "iframe", "img", "input", "ins"},
208
+ {}, // j
209
+ {"kbd"},
210
+ {"label", "legend", "li", "link"},
211
+ {"map", "meta"},
212
+ {"noframes", "noscript"},
213
+ {"object", "ol", "optgroup", "option"},
214
+ {"p", "param", "pre"},
215
+ {"q"},
216
+ {}, // r
217
+ {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
218
+ {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
219
+ {"u", "ul"},
220
+ {"var"},
221
+ {}, // w
222
+ {}, // x
223
+ {}, // y
224
+ {} // z
225
+ };
226
+
227
+ private static boolean
228
+ isValid(final String name)
229
+ {
230
+ int index = name.charAt(0) - 97;
231
+ if (index >= element_names.length) { return false; }
232
+ String[] elementNames = element_names[index];
233
+ for (int i = 0; i < elementNames.length; i++) {
234
+ if (name.equals(elementNames[i])) {
235
+ return true;
238
236
  }
237
+ }
238
+ return false;
239
+ }
240
+
241
+ @Override
242
+ public void
243
+ startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException
244
+ {
245
+ if (!isValid(name.rawname)) {
246
+ errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
247
+ }
248
+ super.startElement(name, attrs, augs);
239
249
  }
250
+ }
240
251
  }
@@ -4,17 +4,24 @@ import org.xml.sax.ErrorHandler;
4
4
  import org.xml.sax.SAXException;
5
5
  import org.xml.sax.SAXParseException;
6
6
 
7
- public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler {
7
+ public class IgnoreSchemaErrorsErrorHandler implements ErrorHandler
8
+ {
8
9
 
9
- @Override
10
- public void warning(SAXParseException exception) throws SAXException {
11
- }
10
+ @Override
11
+ public void
12
+ warning(SAXParseException exception) throws SAXException
13
+ {
14
+ }
12
15
 
13
- @Override
14
- public void error(SAXParseException exception) throws SAXException {
15
- }
16
+ @Override
17
+ public void
18
+ error(SAXParseException exception) throws SAXException
19
+ {
20
+ }
16
21
 
17
- @Override
18
- public void fatalError(SAXParseException exception) throws SAXException {
19
- }
22
+ @Override
23
+ public void
24
+ fatalError(SAXParseException exception) throws SAXException
25
+ {
26
+ }
20
27
  }