nokogiri 1.11.0.rc1-java → 1.11.2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (188) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  7. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  8. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  12. data/ext/java/nokogiri/NokogiriService.java +597 -526
  13. data/ext/java/nokogiri/XmlAttr.java +120 -96
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  15. data/ext/java/nokogiri/XmlCdata.java +35 -26
  16. data/ext/java/nokogiri/XmlComment.java +48 -37
  17. data/ext/java/nokogiri/XmlDocument.java +642 -540
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  19. data/ext/java/nokogiri/XmlDtd.java +450 -384
  20. data/ext/java/nokogiri/XmlElement.java +25 -18
  21. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  22. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  23. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  24. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  25. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1590
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  29. data/ext/java/nokogiri/XmlReader.java +513 -418
  30. data/ext/java/nokogiri/XmlRelaxng.java +92 -72
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -280
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  33. data/ext/java/nokogiri/XmlSchema.java +335 -210
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -87
  35. data/ext/java/nokogiri/XmlText.java +57 -46
  36. data/ext/java/nokogiri/XmlXpathContext.java +242 -178
  37. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +203 -160
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -59
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +121 -48
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -22
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  93. data/ext/nokogiri/depend +37 -358
  94. data/ext/nokogiri/extconf.rb +585 -374
  95. data/ext/nokogiri/html_document.c +78 -82
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  99. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +192 -93
  102. data/ext/nokogiri/test_global_handlers.c +40 -0
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +225 -163
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +338 -286
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +195 -172
  120. data/ext/nokogiri/xml_relax_ng.c +52 -28
  121. data/ext/nokogiri/xml_sax_parser.c +118 -118
  122. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  123. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  124. data/ext/nokogiri/xml_schema.c +111 -34
  125. data/ext/nokogiri/xml_syntax_error.c +42 -21
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +206 -123
  128. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  129. data/lib/nokogiri.rb +4 -8
  130. data/lib/nokogiri/css/parser.rb +62 -62
  131. data/lib/nokogiri/css/parser.y +2 -2
  132. data/lib/nokogiri/css/parser_extras.rb +38 -36
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -148
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/builder.rb +2 -2
  142. data/lib/nokogiri/xml/document.rb +48 -18
  143. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  144. data/lib/nokogiri/xml/node.rb +599 -279
  145. data/lib/nokogiri/xml/parse_options.rb +6 -0
  146. data/lib/nokogiri/xml/reader.rb +2 -9
  147. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  148. data/lib/nokogiri/xml/schema.rb +12 -4
  149. data/lib/nokogiri/xml/searchable.rb +24 -16
  150. data/lib/nokogiri/xml/xpath.rb +1 -3
  151. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  152. metadata +87 -158
  153. data/ext/nokogiri/html_document.h +0 -10
  154. data/ext/nokogiri/html_element_description.h +0 -10
  155. data/ext/nokogiri/html_entity_lookup.h +0 -8
  156. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  157. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  158. data/ext/nokogiri/nokogiri.h +0 -122
  159. data/ext/nokogiri/xml_attr.h +0 -9
  160. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  161. data/ext/nokogiri/xml_cdata.h +0 -9
  162. data/ext/nokogiri/xml_comment.h +0 -9
  163. data/ext/nokogiri/xml_document.h +0 -23
  164. data/ext/nokogiri/xml_document_fragment.h +0 -10
  165. data/ext/nokogiri/xml_dtd.h +0 -10
  166. data/ext/nokogiri/xml_element_content.h +0 -10
  167. data/ext/nokogiri/xml_element_decl.h +0 -9
  168. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  169. data/ext/nokogiri/xml_entity_decl.h +0 -10
  170. data/ext/nokogiri/xml_entity_reference.h +0 -9
  171. data/ext/nokogiri/xml_io.c +0 -61
  172. data/ext/nokogiri/xml_io.h +0 -11
  173. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  174. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  175. data/ext/nokogiri/xml_namespace.h +0 -14
  176. data/ext/nokogiri/xml_node.h +0 -13
  177. data/ext/nokogiri/xml_node_set.h +0 -12
  178. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  179. data/ext/nokogiri/xml_reader.h +0 -10
  180. data/ext/nokogiri/xml_relax_ng.h +0 -9
  181. data/ext/nokogiri/xml_sax_parser.h +0 -39
  182. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  183. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_schema.h +0 -9
  185. data/ext/nokogiri/xml_syntax_error.h +0 -13
  186. data/ext/nokogiri/xml_text.h +0 -9
  187. data/ext/nokogiri/xml_xpath_context.h +0 -10
  188. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -45,35 +45,40 @@ import org.jruby.runtime.builtin.IRubyObject;
45
45
 
46
46
  /**
47
47
  * Class for Nokogiri::HTML::EntityLookup.
48
- *
48
+ *
49
49
  * @author Patrick Mahoney <pat@polycrystal.org>
50
50
  */
51
- @JRubyClass(name="Nokogiri::HTML::EntityLookup")
52
- public class HtmlEntityLookup extends RubyObject {
51
+ @JRubyClass(name = "Nokogiri::HTML::EntityLookup")
52
+ public class HtmlEntityLookup extends RubyObject
53
+ {
53
54
 
54
- public HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) {
55
- super(runtime, rubyClass);
56
- }
55
+ public
56
+ HtmlEntityLookup(Ruby runtime, RubyClass rubyClass)
57
+ {
58
+ super(runtime, rubyClass);
59
+ }
57
60
 
58
- /**
59
- * Looks up an HTML entity <code>key</code>.
60
- *
61
- * The description is a bit lacking.
62
- */
63
- @JRubyMethod()
64
- public IRubyObject get(ThreadContext context, IRubyObject key) {
65
- Ruby ruby = context.getRuntime();
66
- String name = key.toString();
67
- int val = HTMLEntities.get(name);
68
- if (val == -1) return ruby.getNil();
61
+ /**
62
+ * Looks up an HTML entity <code>key</code>.
63
+ *
64
+ * The description is a bit lacking.
65
+ */
66
+ @JRubyMethod()
67
+ public IRubyObject
68
+ get(ThreadContext context, IRubyObject key)
69
+ {
70
+ Ruby ruby = context.getRuntime();
71
+ String name = key.toString();
72
+ int val = HTMLEntities.get(name);
73
+ if (val == -1) { return ruby.getNil(); }
69
74
 
70
- IRubyObject edClass =
71
- ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
72
- IRubyObject edObj = invoke(context, edClass, "new",
73
- ruby.newFixnum(val), ruby.newString(name),
74
- ruby.newString(name + " entity"));
75
+ IRubyObject edClass =
76
+ ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
77
+ IRubyObject edObj = invoke(context, edClass, "new",
78
+ ruby.newFixnum(val), ruby.newString(name),
79
+ ruby.newString(name + " entity"));
75
80
 
76
- return edObj;
77
- }
81
+ return edObj;
82
+ }
78
83
 
79
84
  }
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -63,220 +63,248 @@ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
63
63
  * @author Yoko Harada <yokolet@gmail.com>
64
64
  */
65
65
 
66
- @JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext")
67
- public class HtmlSaxParserContext extends XmlSaxParserContext {
66
+ @JRubyClass(name = "Nokogiri::HTML::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext")
67
+ public class HtmlSaxParserContext extends XmlSaxParserContext
68
+ {
68
69
 
69
- static HtmlSaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) {
70
- HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
71
- instance.initialize(runtime);
72
- return instance;
73
- }
70
+ static HtmlSaxParserContext
71
+ newInstance(final Ruby runtime, final RubyClass klazz)
72
+ {
73
+ HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
74
+ instance.initialize(runtime);
75
+ return instance;
76
+ }
74
77
 
75
- public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) {
76
- super(ruby, rubyClass);
77
- }
78
+ public
79
+ HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass)
80
+ {
81
+ super(ruby, rubyClass);
82
+ }
83
+
84
+ @Override
85
+ protected AbstractSAXParser
86
+ createParser() throws SAXException
87
+ {
88
+ SAXParser parser = new SAXParser();
78
89
 
79
- @Override
80
- protected AbstractSAXParser createParser() throws SAXException {
81
- SAXParser parser = new SAXParser();
82
-
83
- try{
84
- parser.setProperty(
85
- "http://cyberneko.org/html/properties/names/elems", "lower");
86
- parser.setProperty(
87
- "http://cyberneko.org/html/properties/names/attrs", "lower");
88
-
89
- // NekoHTML should not try to guess the encoding based on the meta
90
- // tags or other information in the document. This is already
91
- // handled by the EncodingReader.
92
- parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
93
- return parser;
94
- } catch(SAXException ex) {
95
- throw new SAXException(
96
- "Problem while creating HTML SAX Parser: " + ex.toString());
97
- }
90
+ try {
91
+ parser.setProperty(
92
+ "http://cyberneko.org/html/properties/names/elems", "lower");
93
+ parser.setProperty(
94
+ "http://cyberneko.org/html/properties/names/attrs", "lower");
95
+
96
+ // NekoHTML should not try to guess the encoding based on the meta
97
+ // tags or other information in the document. This is already
98
+ // handled by the EncodingReader.
99
+ parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
100
+ return parser;
101
+ } catch (SAXException ex) {
102
+ throw new SAXException(
103
+ "Problem while creating HTML SAX Parser: " + ex.toString());
98
104
  }
105
+ }
99
106
 
100
- @JRubyMethod(name="memory", meta=true)
101
- public static IRubyObject parse_memory(ThreadContext context,
102
- IRubyObject klazz,
103
- IRubyObject data,
104
- IRubyObject encoding) {
105
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
106
- String javaEncoding = findEncodingName(context, encoding);
107
- if (javaEncoding != null) {
108
- CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
109
- ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
110
- ctx.setInputSource(istream);
111
- ctx.getInputSource().setEncoding(javaEncoding);
112
- }
113
- return ctx;
107
+ @JRubyMethod(name = "memory", meta = true)
108
+ public static IRubyObject
109
+ parse_memory(ThreadContext context,
110
+ IRubyObject klazz,
111
+ IRubyObject data,
112
+ IRubyObject encoding)
113
+ {
114
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
115
+ String javaEncoding = findEncodingName(context, encoding);
116
+ if (javaEncoding != null) {
117
+ CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
118
+ ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
119
+ ctx.setInputSource(istream);
120
+ ctx.getInputSource().setEncoding(javaEncoding);
114
121
  }
122
+ return ctx;
123
+ }
124
+
125
+ public enum EncodingType {
126
+ NONE(0, "NONE"),
127
+ UTF_8(1, "UTF-8"),
128
+ UTF16LE(2, "UTF16LE"),
129
+ UTF16BE(3, "UTF16BE"),
130
+ UCS4LE(4, "UCS4LE"),
131
+ UCS4BE(5, "UCS4BE"),
132
+ EBCDIC(6, "EBCDIC"),
133
+ UCS4_2143(7, "ICS4-2143"),
134
+ UCS4_3412(8, "UCS4-3412"),
135
+ UCS2(9, "UCS2"),
136
+ ISO_8859_1(10, "ISO-8859-1"),
137
+ ISO_8859_2(11, "ISO-8859-2"),
138
+ ISO_8859_3(12, "ISO-8859-3"),
139
+ ISO_8859_4(13, "ISO-8859-4"),
140
+ ISO_8859_5(14, "ISO-8859-5"),
141
+ ISO_8859_6(15, "ISO-8859-6"),
142
+ ISO_8859_7(16, "ISO-8859-7"),
143
+ ISO_8859_8(17, "ISO-8859-8"),
144
+ ISO_8859_9(18, "ISO-8859-9"),
145
+ ISO_2022_JP(19, "ISO-2022-JP"),
146
+ SHIFT_JIS(20, "SHIFT-JIS"),
147
+ EUC_JP(21, "EUC-JP"),
148
+ ASCII(22, "ASCII");
115
149
 
116
- public enum EncodingType {
117
- NONE(0, "NONE"),
118
- UTF_8(1, "UTF-8"),
119
- UTF16LE(2, "UTF16LE"),
120
- UTF16BE(3, "UTF16BE"),
121
- UCS4LE(4, "UCS4LE"),
122
- UCS4BE(5, "UCS4BE"),
123
- EBCDIC(6, "EBCDIC"),
124
- UCS4_2143(7, "ICS4-2143"),
125
- UCS4_3412(8, "UCS4-3412"),
126
- UCS2(9, "UCS2"),
127
- ISO_8859_1(10, "ISO-8859-1"),
128
- ISO_8859_2(11, "ISO-8859-2"),
129
- ISO_8859_3(12, "ISO-8859-3"),
130
- ISO_8859_4(13, "ISO-8859-4"),
131
- ISO_8859_5(14, "ISO-8859-5"),
132
- ISO_8859_6(15, "ISO-8859-6"),
133
- ISO_8859_7(16, "ISO-8859-7"),
134
- ISO_8859_8(17, "ISO-8859-8"),
135
- ISO_8859_9(18, "ISO-8859-9"),
136
- ISO_2022_JP(19, "ISO-2022-JP"),
137
- SHIFT_JIS(20, "SHIFT-JIS"),
138
- EUC_JP(21, "EUC-JP"),
139
- ASCII(22, "ASCII");
140
-
141
- private final int value;
142
- private final String name;
143
-
144
- EncodingType(int value, String name) {
145
- this.value = value;
146
- this.name = name;
147
- }
148
-
149
- public int getValue() {
150
- return value;
151
- }
152
-
153
- public String toString() {
154
- return name;
155
- }
156
-
157
- private static transient EncodingType[] values;
158
-
159
- // NOTE: assuming ordinal == value
160
- static EncodingType get(final int ordinal) {
161
- EncodingType[] values = EncodingType.values;
162
- if (values == null) {
163
- values = EncodingType.values();
164
- EncodingType.values = values;
165
- }
166
- if (ordinal >= 0 && ordinal < values.length) {
167
- return values[ordinal];
168
- }
169
- return null;
170
- }
150
+ private final int value;
151
+ private final String name;
171
152
 
153
+ EncodingType(int value, String name)
154
+ {
155
+ this.value = value;
156
+ this.name = name;
172
157
  }
173
158
 
174
- private static String findEncodingName(final int value) {
175
- EncodingType type = EncodingType.get(value);
176
- if (type == null) return null;
177
- assert type.value == value;
178
- return type.name;
159
+ public int getValue()
160
+ {
161
+ return value;
179
162
  }
180
163
 
181
- private static String findEncodingName(ThreadContext context, IRubyObject encoding) {
182
- String rubyEncoding = null;
183
- if (encoding instanceof RubyString) {
184
- rubyEncoding = rubyStringToString((RubyString) encoding);
185
- }
186
- else if (encoding instanceof RubyFixnum) {
187
- rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
188
- }
189
- if (rubyEncoding == null) return null;
190
- try {
191
- return Charset.forName(rubyEncoding).displayName();
192
- }
193
- catch (UnsupportedCharsetException e) {
194
- throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
195
- }
196
- catch (IllegalCharsetNameException e) {
197
- throw context.getRuntime().newInvalidEncoding(e.getMessage());
198
- }
164
+ public String toString()
165
+ {
166
+ return name;
199
167
  }
200
168
 
201
- private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+", Pattern.CASE_INSENSITIVE);
202
-
203
- private static CharSequence applyEncoding(final String input, final String enc) {
204
- int start_pos = 0; int end_pos = 0;
205
- if (containsIgnoreCase(input, "charset")) {
206
- Matcher m = CHARSET_PATTERN.matcher(input);
207
- while (m.find()) {
208
- start_pos = m.start();
209
- end_pos = m.end();
210
- }
211
- }
212
- if (start_pos != end_pos) {
213
- return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
214
- }
215
- return input;
169
+ private static transient EncodingType[] values;
170
+
171
+ // NOTE: assuming ordinal == value
172
+ static EncodingType get(final int ordinal)
173
+ {
174
+ EncodingType[] values = EncodingType.values;
175
+ if (values == null) {
176
+ values = EncodingType.values();
177
+ EncodingType.values = values;
178
+ }
179
+ if (ordinal >= 0 && ordinal < values.length) {
180
+ return values[ordinal];
181
+ }
182
+ return null;
216
183
  }
217
184
 
218
- private static boolean containsIgnoreCase(final String str, final String sub) {
219
- final int len = sub.length();
220
- final int max = str.length() - len;
221
-
222
- if (len == 0) return true;
223
- final char c0Lower = Character.toLowerCase(sub.charAt(0));
224
- final char c0Upper = Character.toUpperCase(sub.charAt(0));
225
-
226
- for (int i = 0; i <= max; i++) {
227
- final char ch = str.charAt(i);
228
- if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
229
- continue; // first char doesn't match
230
- }
231
-
232
- if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
233
- return true;
234
- }
235
- }
236
- return false;
185
+ }
186
+
187
+ private static String
188
+ findEncodingName(final int value)
189
+ {
190
+ EncodingType type = EncodingType.get(value);
191
+ if (type == null) { return null; }
192
+ assert type.value == value;
193
+ return type.name;
194
+ }
195
+
196
+ private static String
197
+ findEncodingName(ThreadContext context, IRubyObject encoding)
198
+ {
199
+ String rubyEncoding = null;
200
+ if (encoding instanceof RubyString) {
201
+ rubyEncoding = rubyStringToString((RubyString) encoding);
202
+ } else if (encoding instanceof RubyFixnum) {
203
+ rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
204
+ }
205
+ if (rubyEncoding == null) { return null; }
206
+ try {
207
+ return Charset.forName(rubyEncoding).displayName();
208
+ } catch (UnsupportedCharsetException e) {
209
+ throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
210
+ } catch (IllegalCharsetNameException e) {
211
+ throw context.getRuntime().newEncodingError(e.getMessage());
237
212
  }
213
+ }
214
+
215
+ private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+",
216
+ Pattern.CASE_INSENSITIVE);
238
217
 
239
- @JRubyMethod(name="file", meta=true)
240
- public static IRubyObject parse_file(ThreadContext context,
241
- IRubyObject klass,
242
- IRubyObject data,
243
- IRubyObject encoding) {
244
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
245
- ctx.setInputSourceFile(context, data);
246
- String javaEncoding = findEncodingName(context, encoding);
247
- if (javaEncoding != null) {
248
- ctx.getInputSource().setEncoding(javaEncoding);
249
- }
250
- return ctx;
218
+ private static CharSequence
219
+ applyEncoding(final String input, final String enc)
220
+ {
221
+ int start_pos = 0;
222
+ int end_pos = 0;
223
+ if (containsIgnoreCase(input, "charset")) {
224
+ Matcher m = CHARSET_PATTERN.matcher(input);
225
+ while (m.find()) {
226
+ start_pos = m.start();
227
+ end_pos = m.end();
228
+ }
251
229
  }
230
+ if (start_pos != end_pos) {
231
+ return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
232
+ }
233
+ return input;
234
+ }
235
+
236
+ private static boolean
237
+ containsIgnoreCase(final String str, final String sub)
238
+ {
239
+ final int len = sub.length();
240
+ final int max = str.length() - len;
252
241
 
253
- @JRubyMethod(name="io", meta=true)
254
- public static IRubyObject parse_io(ThreadContext context,
255
- IRubyObject klass,
256
- IRubyObject data,
257
- IRubyObject encoding) {
258
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
259
- ctx.setIOInputSource(context, data, context.nil);
260
- String javaEncoding = findEncodingName(context, encoding);
261
- if (javaEncoding != null) {
262
- ctx.getInputSource().setEncoding(javaEncoding);
263
- }
264
- return ctx;
242
+ if (len == 0) { return true; }
243
+ final char c0Lower = Character.toLowerCase(sub.charAt(0));
244
+ final char c0Upper = Character.toUpperCase(sub.charAt(0));
245
+
246
+ for (int i = 0; i <= max; i++) {
247
+ final char ch = str.charAt(i);
248
+ if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
249
+ continue; // first char doesn't match
250
+ }
251
+
252
+ if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
253
+ return true;
254
+ }
265
255
  }
256
+ return false;
257
+ }
266
258
 
267
- /**
268
- * Create a new parser context that will read from a raw input stream.
269
- * Meant to be run in a separate thread by HtmlSaxPushParser.
270
- */
271
- static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) {
272
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
273
- ctx.setInputSource(stream);
274
- return ctx;
259
+ @JRubyMethod(name = "file", meta = true)
260
+ public static IRubyObject
261
+ parse_file(ThreadContext context,
262
+ IRubyObject klass,
263
+ IRubyObject data,
264
+ IRubyObject encoding)
265
+ {
266
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
267
+ ctx.setInputSourceFile(context, data);
268
+ String javaEncoding = findEncodingName(context, encoding);
269
+ if (javaEncoding != null) {
270
+ ctx.getInputSource().setEncoding(javaEncoding);
275
271
  }
272
+ return ctx;
273
+ }
276
274
 
277
- @Override
278
- protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
279
- // this function is meant to be empty. It overrides the one in XmlSaxParserContext
275
+ @JRubyMethod(name = "io", meta = true)
276
+ public static IRubyObject
277
+ parse_io(ThreadContext context,
278
+ IRubyObject klass,
279
+ IRubyObject data,
280
+ IRubyObject encoding)
281
+ {
282
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
283
+ ctx.setIOInputSource(context, data, context.nil);
284
+ String javaEncoding = findEncodingName(context, encoding);
285
+ if (javaEncoding != null) {
286
+ ctx.getInputSource().setEncoding(javaEncoding);
280
287
  }
288
+ return ctx;
289
+ }
290
+
291
+ /**
292
+ * Create a new parser context that will read from a raw input stream.
293
+ * Meant to be run in a separate thread by HtmlSaxPushParser.
294
+ */
295
+ static HtmlSaxParserContext
296
+ parse_stream(final Ruby runtime, RubyClass klass, InputStream stream)
297
+ {
298
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
299
+ ctx.setInputSource(stream);
300
+ return ctx;
301
+ }
302
+
303
+ @Override
304
+ protected void
305
+ preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
306
+ {
307
+ // this function is meant to be empty. It overrides the one in XmlSaxParserContext
308
+ }
281
309
 
282
310
  }