nokogiri 1.11.1-java → 1.11.6-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +12 -12
  3. data/LICENSE.md +1 -1
  4. data/README.md +21 -16
  5. data/dependencies.yml +12 -12
  6. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  7. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  8. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  12. data/ext/java/nokogiri/NokogiriService.java +595 -556
  13. data/ext/java/nokogiri/XmlAttr.java +118 -126
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  15. data/ext/java/nokogiri/XmlCdata.java +35 -58
  16. data/ext/java/nokogiri/XmlComment.java +46 -67
  17. data/ext/java/nokogiri/XmlDocument.java +645 -572
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  19. data/ext/java/nokogiri/XmlDtd.java +448 -414
  20. data/ext/java/nokogiri/XmlElement.java +23 -48
  21. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  22. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  23. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  24. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  25. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1620
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  29. data/ext/java/nokogiri/XmlReader.java +513 -450
  30. data/ext/java/nokogiri/XmlRelaxng.java +85 -104
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -315
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  33. data/ext/java/nokogiri/XmlSchema.java +328 -295
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -115
  35. data/ext/java/nokogiri/XmlText.java +55 -76
  36. data/ext/java/nokogiri/XmlXpathContext.java +240 -238
  37. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -202
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -98
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +116 -131
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -56
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  93. data/ext/nokogiri/depend +34 -474
  94. data/ext/nokogiri/extconf.rb +253 -183
  95. data/ext/nokogiri/html_document.c +10 -15
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +66 -65
  99. data/ext/nokogiri/html_sax_push_parser.c +29 -27
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +190 -63
  102. data/ext/nokogiri/test_global_handlers.c +3 -4
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +246 -188
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +385 -326
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +191 -157
  120. data/ext/nokogiri/xml_relax_ng.c +29 -23
  121. data/ext/nokogiri/xml_sax_parser.c +117 -112
  122. data/ext/nokogiri/xml_sax_parser_context.c +100 -85
  123. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  124. data/ext/nokogiri/xml_schema.c +48 -42
  125. data/ext/nokogiri/xml_syntax_error.c +21 -23
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +134 -127
  128. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  129. data/lib/nokogiri.rb +1 -22
  130. data/lib/nokogiri/css/parser.rb +1 -1
  131. data/lib/nokogiri/extension.rb +26 -0
  132. data/lib/nokogiri/html/document_fragment.rb +15 -15
  133. data/lib/nokogiri/nokogiri.jar +0 -0
  134. data/lib/nokogiri/version/constant.rb +1 -1
  135. data/lib/nokogiri/version/info.rb +32 -8
  136. data/lib/nokogiri/xml/document.rb +74 -28
  137. data/lib/nokogiri/xml/node.rb +39 -42
  138. data/lib/nokogiri/xml/reader.rb +2 -9
  139. data/lib/nokogiri/xml/xpath.rb +1 -3
  140. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  141. metadata +7 -8
  142. data/ext/nokogiri/xml_io.c +0 -63
  143. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -1,9 +1,12 @@
1
1
  package nokogiri.internals;
2
2
 
3
3
  @SuppressWarnings("serial")
4
- public class ClosedStreamException extends Exception {
4
+ public class ClosedStreamException extends Exception
5
+ {
5
6
 
6
- public ClosedStreamException(String message) {
7
+ public
8
+ ClosedStreamException(String message)
9
+ {
7
10
  super(message);
8
11
  }
9
12
 
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri.internals;
34
2
 
35
3
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
@@ -52,6 +20,7 @@ import org.cyberneko.html.filters.DefaultFilter;
52
20
  import org.jruby.Ruby;
53
21
  import org.jruby.RubyClass;
54
22
  import org.jruby.runtime.ThreadContext;
23
+ import org.jruby.runtime.Helpers;
55
24
  import org.jruby.runtime.builtin.IRubyObject;
56
25
  import org.w3c.dom.Document;
57
26
  import org.w3c.dom.NamedNodeMap;
@@ -60,193 +29,223 @@ import org.w3c.dom.NodeList;
60
29
 
61
30
  /**
62
31
  * Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
63
- *
32
+ *
64
33
  * @author sergio
65
34
  * @author Patrick Mahoney <pat@polycrystal.org>
66
35
  * @author Yoko Harada <yokolet@gmail.com>
67
36
  */
68
- public class HtmlDomParserContext extends XmlDomParserContext {
69
-
70
- public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
71
- this(runtime, runtime.getNil(), options);
37
+ public class HtmlDomParserContext extends XmlDomParserContext
38
+ {
39
+
40
+ public
41
+ HtmlDomParserContext(Ruby runtime, IRubyObject options)
42
+ {
43
+ this(runtime, runtime.getNil(), options);
44
+ }
45
+
46
+ public
47
+ HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
48
+ {
49
+ super(runtime, encoding, options);
50
+ java_encoding = NokogiriHelpers.getValidEncoding(encoding);
51
+ }
52
+
53
+ @Override
54
+ protected void
55
+ initParser(Ruby runtime)
56
+ {
57
+ XMLParserConfiguration config = new HTMLConfiguration();
58
+ //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
59
+ XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
60
+ //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
61
+ XMLDocumentFilter[] filters = { elementValidityCheckFilter};
62
+
63
+ config.setErrorHandler(this.errorHandler);
64
+
65
+ parser = new NokogiriDomParser(config);
66
+
67
+ // see http://nekohtml.sourceforge.net/settings.html for details
68
+ setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
69
+ setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
70
+ setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
71
+ setProperty("http://cyberneko.org/html/properties/filters", filters);
72
+ setFeature("http://cyberneko.org/html/features/report-errors", true);
73
+ setFeature("http://xml.org/sax/features/namespaces", false);
74
+ }
75
+
76
+ @Override
77
+ public void
78
+ setEncoding(String encoding)
79
+ {
80
+ super.setEncoding(encoding);
81
+ }
82
+
83
+ /**
84
+ * Enable NekoHTML feature for balancing tags in a document fragment.
85
+ *
86
+ * This method is used in XmlNode#in_context method.
87
+ */
88
+ public void
89
+ enableDocumentFragment()
90
+ {
91
+ setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
92
+ }
93
+
94
+ @Override
95
+ public XmlDocument
96
+ parse(ThreadContext context, RubyClass klass, IRubyObject url)
97
+ {
98
+ XmlDocument xmlDoc = super.parse(context, klass, url);
99
+
100
+ // let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
101
+ // https://github.com/sparklemotion/nokogiri/issues/2130
102
+ if (!options.recover && errorHandler.getErrors().size() > 0) {
103
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
104
+ String exceptionMsg = String.format("%s: '%s'",
105
+ "Parser without recover option encountered error or warning",
106
+ errorHandler.getErrors().get(0));
107
+ xmlSyntaxError.setException(new Exception(exceptionMsg));
108
+ throw xmlSyntaxError.toThrowable();
72
109
  }
73
110
 
74
- public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
75
- super(runtime, encoding, options);
76
- java_encoding = NokogiriHelpers.getValidEncoding(encoding);
111
+ return xmlDoc;
112
+ }
113
+
114
+ @Override
115
+ protected XmlDocument
116
+ wrapDocument(ThreadContext context, RubyClass klass, Document document)
117
+ {
118
+ HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
119
+ htmlDocument.setDocumentNode(context.runtime, document);
120
+ Helpers.invoke(context, htmlDocument, "initialize");
121
+
122
+ if (ruby_encoding.isNil()) {
123
+ // ruby_encoding might have detected by HtmlDocument::EncodingReader
124
+ if (detected_encoding != null && !detected_encoding.isNil()) {
125
+ ruby_encoding = detected_encoding;
126
+ } else {
127
+ // no encoding given & no encoding detected, then try to get it
128
+ String charset = tryGetCharsetFromHtml5MetaTag(document);
129
+ ruby_encoding = stringOrNil(context.runtime, charset);
130
+ }
77
131
  }
78
-
79
- @Override
80
- protected void initParser(Ruby runtime) {
81
- XMLParserConfiguration config = new HTMLConfiguration();
82
- //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
83
- XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
84
- //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
85
- XMLDocumentFilter[] filters = { elementValidityCheckFilter};
86
-
87
- config.setErrorHandler(this.errorHandler);
88
-
89
- parser = new NokogiriDomParser(config);
90
-
91
- // see http://nekohtml.sourceforge.net/settings.html for details
92
- setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
93
- setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
94
- setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
95
- setProperty("http://cyberneko.org/html/properties/filters", filters);
96
- setFeature("http://cyberneko.org/html/features/report-errors", true);
97
- setFeature("http://xml.org/sax/features/namespaces", false);
132
+ htmlDocument.setEncoding(ruby_encoding);
133
+ htmlDocument.setParsedEncoding(java_encoding);
134
+ return htmlDocument;
135
+ }
136
+
137
+ // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
138
+ // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
139
+ // so, this method attempts to find the charset.
140
+ private static String
141
+ tryGetCharsetFromHtml5MetaTag(Document document)
142
+ {
143
+ if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) { return null; }
144
+ NodeList list = document.getDocumentElement().getChildNodes();
145
+ Node item;
146
+ for (int i = 0; i < list.getLength(); i++) {
147
+ if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
148
+ NodeList headers = item.getChildNodes();
149
+ for (int j = 0; j < headers.getLength(); j++) {
150
+ if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
151
+ NamedNodeMap nodeMap = item.getAttributes();
152
+ for (int k = 0; k < nodeMap.getLength(); k++) {
153
+ if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
154
+ return item.getNodeValue();
155
+ }
156
+ }
157
+ }
158
+ }
159
+ }
98
160
  }
99
-
161
+ return null;
162
+ }
163
+
164
+ /**
165
+ * Filter to strip out attributes that pertain to XML namespaces.
166
+ */
167
+ public static class RemoveNSAttrsFilter extends DefaultFilter
168
+ {
100
169
  @Override
101
- public void setEncoding(String encoding) {
102
- super.setEncoding(encoding);
103
- }
170
+ public void
171
+ startElement(QName element, XMLAttributes attrs,
172
+ Augmentations augs) throws XNIException
173
+ {
174
+ int i;
175
+ for (i = 0; i < attrs.getLength(); ++i) {
176
+ if (isNamespace(attrs.getQName(i))) {
177
+ attrs.removeAttributeAt(i);
178
+ --i;
179
+ }
180
+ }
104
181
 
105
- /**
106
- * Enable NekoHTML feature for balancing tags in a document fragment.
107
- *
108
- * This method is used in XmlNode#in_context method.
109
- */
110
- public void enableDocumentFragment() {
111
- setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
182
+ element.uri = null;
183
+ super.startElement(element, attrs, augs);
112
184
  }
185
+ }
113
186
 
114
- @Override
115
- public XmlDocument parse(ThreadContext context, RubyClass klass, IRubyObject url) {
116
- XmlDocument xmlDoc = super.parse(context, klass, url);
117
-
118
- // let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
119
- // https://github.com/sparklemotion/nokogiri/issues/2130
120
- if (!options.recover && errorHandler.getErrors().size() > 0) {
121
- XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
122
- String exceptionMsg = String.format("%s: '%s'",
123
- "Parser without recover option encountered error or warning",
124
- errorHandler.getErrors().get(0));
125
- xmlSyntaxError.setException(new Exception(exceptionMsg));
126
- throw xmlSyntaxError.toThrowable();
127
- }
187
+ public static class ElementValidityCheckFilter extends DefaultFilter
188
+ {
189
+ private NokogiriErrorHandler errorHandler;
128
190
 
129
- return xmlDoc;
191
+ private
192
+ ElementValidityCheckFilter(NokogiriErrorHandler errorHandler)
193
+ {
194
+ this.errorHandler = errorHandler;
130
195
  }
131
196
 
132
- @Override
133
- protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) {
134
- HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
135
- htmlDocument.setDocumentNode(context.runtime, document);
136
- if (ruby_encoding.isNil()) {
137
- // ruby_encoding might have detected by HtmlDocument::EncodingReader
138
- if (detected_encoding != null && !detected_encoding.isNil()) {
139
- ruby_encoding = detected_encoding;
140
- } else {
141
- // no encoding given & no encoding detected, then try to get it
142
- String charset = tryGetCharsetFromHtml5MetaTag(document);
143
- ruby_encoding = stringOrNil(context.runtime, charset);
144
- }
145
- }
146
- htmlDocument.setEncoding(ruby_encoding);
147
- htmlDocument.setParsedEncoding(java_encoding);
148
- return htmlDocument;
149
- }
150
-
151
- // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
152
- // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
153
- // so, this method attempts to find the charset.
154
- private static String tryGetCharsetFromHtml5MetaTag(Document document) {
155
- if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null;
156
- NodeList list = document.getDocumentElement().getChildNodes(); Node item;
157
- for (int i = 0; i < list.getLength(); i++) {
158
- if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
159
- NodeList headers = item.getChildNodes();
160
- for (int j = 0; j < headers.getLength(); j++) {
161
- if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
162
- NamedNodeMap nodeMap = item.getAttributes();
163
- for (int k = 0; k < nodeMap.getLength(); k++) {
164
- if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
165
- return item.getNodeValue();
166
- }
167
- }
168
- }
169
- }
170
- }
197
+ // element names from xhtml1-strict.dtd
198
+ private static String[][] element_names = {
199
+ {"a", "abbr", "acronym", "address", "area"},
200
+ {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
201
+ {"caption", "cite", "code", "col", "colgroup"},
202
+ {"dd", "del", "dfn", "div", "dl", "dt"},
203
+ {"em"},
204
+ {"fieldset", "font", "form", "frame", "frameset"},
205
+ {}, // g
206
+ {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
207
+ {"i", "iframe", "img", "input", "ins"},
208
+ {}, // j
209
+ {"kbd"},
210
+ {"label", "legend", "li", "link"},
211
+ {"map", "meta"},
212
+ {"noframes", "noscript"},
213
+ {"object", "ol", "optgroup", "option"},
214
+ {"p", "param", "pre"},
215
+ {"q"},
216
+ {}, // r
217
+ {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
218
+ {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
219
+ {"u", "ul"},
220
+ {"var"},
221
+ {}, // w
222
+ {}, // x
223
+ {}, // y
224
+ {} // z
225
+ };
226
+
227
+ private static boolean
228
+ isValid(final String name)
229
+ {
230
+ int index = name.charAt(0) - 97;
231
+ if (index >= element_names.length) { return false; }
232
+ String[] elementNames = element_names[index];
233
+ for (int i = 0; i < elementNames.length; i++) {
234
+ if (name.equals(elementNames[i])) {
235
+ return true;
171
236
  }
172
- return null;
237
+ }
238
+ return false;
173
239
  }
174
240
 
175
- /**
176
- * Filter to strip out attributes that pertain to XML namespaces.
177
- */
178
- public static class RemoveNSAttrsFilter extends DefaultFilter {
179
- @Override
180
- public void startElement(QName element, XMLAttributes attrs,
181
- Augmentations augs) throws XNIException {
182
- int i;
183
- for (i = 0; i < attrs.getLength(); ++i) {
184
- if (isNamespace(attrs.getQName(i))) {
185
- attrs.removeAttributeAt(i);
186
- --i;
187
- }
188
- }
189
-
190
- element.uri = null;
191
- super.startElement(element, attrs, augs);
192
- }
193
- }
194
-
195
- public static class ElementValidityCheckFilter extends DefaultFilter {
196
- private NokogiriErrorHandler errorHandler;
197
-
198
- private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) {
199
- this.errorHandler = errorHandler;
200
- }
201
-
202
- // element names from xhtml1-strict.dtd
203
- private static String[][] element_names = {
204
- {"a", "abbr", "acronym", "address", "area"},
205
- {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
206
- {"caption", "cite", "code", "col", "colgroup"},
207
- {"dd", "del", "dfn", "div", "dl", "dt"},
208
- {"em"},
209
- {"fieldset", "font", "form", "frame", "frameset"},
210
- {}, // g
211
- {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
212
- {"i", "iframe", "img", "input", "ins"},
213
- {}, // j
214
- {"kbd"},
215
- {"label", "legend", "li", "link"},
216
- {"map", "meta"},
217
- {"noframes", "noscript"},
218
- {"object", "ol", "optgroup", "option"},
219
- {"p", "param", "pre"},
220
- {"q"},
221
- {}, // r
222
- {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
223
- {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
224
- {"u", "ul"},
225
- {"var"},
226
- {}, // w
227
- {}, // x
228
- {}, // y
229
- {} // z
230
- };
231
-
232
- private static boolean isValid(final String name) {
233
- int index = name.charAt(0) - 97;
234
- if (index >= element_names.length) return false;
235
- String[] elementNames = element_names[index];
236
- for (int i=0; i<elementNames.length; i++) {
237
- if (name.equals(elementNames[i])) {
238
- return true;
239
- }
240
- }
241
- return false;
242
- }
243
-
244
- @Override
245
- public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
246
- if (!isValid(name.rawname)) {
247
- errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
248
- }
249
- super.startElement(name, attrs, augs);
250
- }
241
+ @Override
242
+ public void
243
+ startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException
244
+ {
245
+ if (!isValid(name.rawname)) {
246
+ errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
247
+ }
248
+ super.startElement(name, attrs, augs);
251
249
  }
250
+ }
252
251
  }