nokogiri 1.11.0.rc4-java → 1.11.5-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (144) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +12 -12
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/dependencies.yml +12 -12
  7. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  8. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  9. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  10. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  11. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  12. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  13. data/ext/java/nokogiri/NokogiriService.java +595 -556
  14. data/ext/java/nokogiri/XmlAttr.java +118 -126
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  16. data/ext/java/nokogiri/XmlCdata.java +35 -58
  17. data/ext/java/nokogiri/XmlComment.java +46 -67
  18. data/ext/java/nokogiri/XmlDocument.java +645 -572
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  20. data/ext/java/nokogiri/XmlDtd.java +448 -414
  21. data/ext/java/nokogiri/XmlElement.java +23 -48
  22. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  23. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  24. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  25. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  26. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  27. data/ext/java/nokogiri/XmlNode.java +1843 -1620
  28. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  30. data/ext/java/nokogiri/XmlReader.java +513 -450
  31. data/ext/java/nokogiri/XmlRelaxng.java +85 -104
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -315
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  34. data/ext/java/nokogiri/XmlSchema.java +328 -295
  35. data/ext/java/nokogiri/XmlSyntaxError.java +113 -115
  36. data/ext/java/nokogiri/XmlText.java +55 -76
  37. data/ext/java/nokogiri/XmlXpathContext.java +240 -238
  38. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  39. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  40. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -202
  41. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  42. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  43. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  44. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  45. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  46. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  47. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -98
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  52. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +116 -131
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -56
  55. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  56. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  57. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  58. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  59. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  60. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  61. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  62. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  63. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  64. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  65. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  66. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  67. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  68. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  82. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  83. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  84. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  85. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  86. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  87. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  88. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  89. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  90. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  91. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  94. data/ext/nokogiri/depend +34 -474
  95. data/ext/nokogiri/extconf.rb +270 -183
  96. data/ext/nokogiri/html_document.c +10 -15
  97. data/ext/nokogiri/html_element_description.c +84 -71
  98. data/ext/nokogiri/html_entity_lookup.c +21 -16
  99. data/ext/nokogiri/html_sax_parser_context.c +67 -64
  100. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  101. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  102. data/ext/nokogiri/nokogiri.c +190 -60
  103. data/ext/nokogiri/test_global_handlers.c +40 -0
  104. data/ext/nokogiri/xml_attr.c +15 -15
  105. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  106. data/ext/nokogiri/xml_cdata.c +13 -18
  107. data/ext/nokogiri/xml_comment.c +19 -26
  108. data/ext/nokogiri/xml_document.c +246 -188
  109. data/ext/nokogiri/xml_document_fragment.c +13 -15
  110. data/ext/nokogiri/xml_dtd.c +54 -48
  111. data/ext/nokogiri/xml_element_content.c +30 -27
  112. data/ext/nokogiri/xml_element_decl.c +22 -22
  113. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  114. data/ext/nokogiri/xml_entity_decl.c +32 -30
  115. data/ext/nokogiri/xml_entity_reference.c +16 -18
  116. data/ext/nokogiri/xml_namespace.c +56 -49
  117. data/ext/nokogiri/xml_node.c +371 -320
  118. data/ext/nokogiri/xml_node_set.c +168 -156
  119. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  120. data/ext/nokogiri/xml_reader.c +191 -157
  121. data/ext/nokogiri/xml_relax_ng.c +29 -23
  122. data/ext/nokogiri/xml_sax_parser.c +117 -112
  123. data/ext/nokogiri/xml_sax_parser_context.c +101 -84
  124. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  125. data/ext/nokogiri/xml_schema.c +48 -42
  126. data/ext/nokogiri/xml_syntax_error.c +42 -21
  127. data/ext/nokogiri/xml_text.c +13 -17
  128. data/ext/nokogiri/xml_xpath_context.c +134 -127
  129. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  130. data/lib/nokogiri.rb +2 -6
  131. data/lib/nokogiri/css/parser.rb +1 -1
  132. data/lib/nokogiri/extension.rb +26 -0
  133. data/lib/nokogiri/html/document_fragment.rb +15 -15
  134. data/lib/nokogiri/nokogiri.jar +0 -0
  135. data/lib/nokogiri/version/constant.rb +1 -1
  136. data/lib/nokogiri/version/info.rb +32 -8
  137. data/lib/nokogiri/xml/document.rb +74 -28
  138. data/lib/nokogiri/xml/node.rb +39 -42
  139. data/lib/nokogiri/xml/reader.rb +2 -9
  140. data/lib/nokogiri/xml/xpath.rb +1 -3
  141. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  142. metadata +62 -127
  143. data/ext/nokogiri/xml_io.c +0 -63
  144. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import static org.jruby.runtime.Helpers.invoke;
@@ -45,35 +13,40 @@ import org.jruby.runtime.builtin.IRubyObject;
45
13
 
46
14
  /**
47
15
  * Class for Nokogiri::HTML::EntityLookup.
48
- *
16
+ *
49
17
  * @author Patrick Mahoney <pat@polycrystal.org>
50
18
  */
51
- @JRubyClass(name="Nokogiri::HTML::EntityLookup")
52
- public class HtmlEntityLookup extends RubyObject {
53
-
54
- public HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) {
55
- super(runtime, rubyClass);
56
- }
57
-
58
- /**
59
- * Looks up an HTML entity <code>key</code>.
60
- *
61
- * The description is a bit lacking.
62
- */
63
- @JRubyMethod()
64
- public IRubyObject get(ThreadContext context, IRubyObject key) {
65
- Ruby ruby = context.getRuntime();
66
- String name = key.toString();
67
- int val = HTMLEntities.get(name);
68
- if (val == -1) return ruby.getNil();
69
-
70
- IRubyObject edClass =
71
- ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
72
- IRubyObject edObj = invoke(context, edClass, "new",
73
- ruby.newFixnum(val), ruby.newString(name),
74
- ruby.newString(name + " entity"));
75
-
76
- return edObj;
77
- }
19
+ @JRubyClass(name = "Nokogiri::HTML::EntityLookup")
20
+ public class HtmlEntityLookup extends RubyObject
21
+ {
22
+
23
+ public
24
+ HtmlEntityLookup(Ruby runtime, RubyClass rubyClass)
25
+ {
26
+ super(runtime, rubyClass);
27
+ }
28
+
29
+ /**
30
+ * Looks up an HTML entity <code>key</code>.
31
+ *
32
+ * The description is a bit lacking.
33
+ */
34
+ @JRubyMethod()
35
+ public IRubyObject
36
+ get(ThreadContext context, IRubyObject key)
37
+ {
38
+ Ruby ruby = context.getRuntime();
39
+ String name = key.toString();
40
+ int val = HTMLEntities.get(name);
41
+ if (val == -1) { return ruby.getNil(); }
42
+
43
+ IRubyObject edClass =
44
+ ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
45
+ IRubyObject edObj = invoke(context, edClass, "new",
46
+ ruby.newFixnum(val), ruby.newString(name),
47
+ ruby.newString(name + " entity"));
48
+
49
+ return edObj;
50
+ }
78
51
 
79
52
  }
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import java.io.ByteArrayInputStream;
@@ -63,220 +31,248 @@ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
63
31
  * @author Yoko Harada <yokolet@gmail.com>
64
32
  */
65
33
 
66
- @JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext")
67
- public class HtmlSaxParserContext extends XmlSaxParserContext {
34
+ @JRubyClass(name = "Nokogiri::HTML::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext")
35
+ public class HtmlSaxParserContext extends XmlSaxParserContext
36
+ {
68
37
 
69
- static HtmlSaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) {
70
- HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
71
- instance.initialize(runtime);
72
- return instance;
73
- }
38
+ static HtmlSaxParserContext
39
+ newInstance(final Ruby runtime, final RubyClass klazz)
40
+ {
41
+ HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
42
+ instance.initialize(runtime);
43
+ return instance;
44
+ }
74
45
 
75
- public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) {
76
- super(ruby, rubyClass);
77
- }
46
+ public
47
+ HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass)
48
+ {
49
+ super(ruby, rubyClass);
50
+ }
51
+
52
+ @Override
53
+ protected AbstractSAXParser
54
+ createParser() throws SAXException
55
+ {
56
+ SAXParser parser = new SAXParser();
78
57
 
79
- @Override
80
- protected AbstractSAXParser createParser() throws SAXException {
81
- SAXParser parser = new SAXParser();
82
-
83
- try{
84
- parser.setProperty(
85
- "http://cyberneko.org/html/properties/names/elems", "lower");
86
- parser.setProperty(
87
- "http://cyberneko.org/html/properties/names/attrs", "lower");
88
-
89
- // NekoHTML should not try to guess the encoding based on the meta
90
- // tags or other information in the document. This is already
91
- // handled by the EncodingReader.
92
- parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
93
- return parser;
94
- } catch(SAXException ex) {
95
- throw new SAXException(
96
- "Problem while creating HTML SAX Parser: " + ex.toString());
97
- }
58
+ try {
59
+ parser.setProperty(
60
+ "http://cyberneko.org/html/properties/names/elems", "lower");
61
+ parser.setProperty(
62
+ "http://cyberneko.org/html/properties/names/attrs", "lower");
63
+
64
+ // NekoHTML should not try to guess the encoding based on the meta
65
+ // tags or other information in the document. This is already
66
+ // handled by the EncodingReader.
67
+ parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
68
+ return parser;
69
+ } catch (SAXException ex) {
70
+ throw new SAXException(
71
+ "Problem while creating HTML SAX Parser: " + ex.toString());
98
72
  }
73
+ }
99
74
 
100
- @JRubyMethod(name="memory", meta=true)
101
- public static IRubyObject parse_memory(ThreadContext context,
102
- IRubyObject klazz,
103
- IRubyObject data,
104
- IRubyObject encoding) {
105
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
106
- String javaEncoding = findEncodingName(context, encoding);
107
- if (javaEncoding != null) {
108
- CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
109
- ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
110
- ctx.setInputSource(istream);
111
- ctx.getInputSource().setEncoding(javaEncoding);
112
- }
113
- return ctx;
75
+ @JRubyMethod(name = "memory", meta = true)
76
+ public static IRubyObject
77
+ parse_memory(ThreadContext context,
78
+ IRubyObject klazz,
79
+ IRubyObject data,
80
+ IRubyObject encoding)
81
+ {
82
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
83
+ String javaEncoding = findEncodingName(context, encoding);
84
+ if (javaEncoding != null) {
85
+ CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
86
+ ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
87
+ ctx.setInputSource(istream);
88
+ ctx.getInputSource().setEncoding(javaEncoding);
114
89
  }
90
+ return ctx;
91
+ }
92
+
93
+ public enum EncodingType {
94
+ NONE(0, "NONE"),
95
+ UTF_8(1, "UTF-8"),
96
+ UTF16LE(2, "UTF16LE"),
97
+ UTF16BE(3, "UTF16BE"),
98
+ UCS4LE(4, "UCS4LE"),
99
+ UCS4BE(5, "UCS4BE"),
100
+ EBCDIC(6, "EBCDIC"),
101
+ UCS4_2143(7, "ICS4-2143"),
102
+ UCS4_3412(8, "UCS4-3412"),
103
+ UCS2(9, "UCS2"),
104
+ ISO_8859_1(10, "ISO-8859-1"),
105
+ ISO_8859_2(11, "ISO-8859-2"),
106
+ ISO_8859_3(12, "ISO-8859-3"),
107
+ ISO_8859_4(13, "ISO-8859-4"),
108
+ ISO_8859_5(14, "ISO-8859-5"),
109
+ ISO_8859_6(15, "ISO-8859-6"),
110
+ ISO_8859_7(16, "ISO-8859-7"),
111
+ ISO_8859_8(17, "ISO-8859-8"),
112
+ ISO_8859_9(18, "ISO-8859-9"),
113
+ ISO_2022_JP(19, "ISO-2022-JP"),
114
+ SHIFT_JIS(20, "SHIFT-JIS"),
115
+ EUC_JP(21, "EUC-JP"),
116
+ ASCII(22, "ASCII");
115
117
 
116
- public enum EncodingType {
117
- NONE(0, "NONE"),
118
- UTF_8(1, "UTF-8"),
119
- UTF16LE(2, "UTF16LE"),
120
- UTF16BE(3, "UTF16BE"),
121
- UCS4LE(4, "UCS4LE"),
122
- UCS4BE(5, "UCS4BE"),
123
- EBCDIC(6, "EBCDIC"),
124
- UCS4_2143(7, "ICS4-2143"),
125
- UCS4_3412(8, "UCS4-3412"),
126
- UCS2(9, "UCS2"),
127
- ISO_8859_1(10, "ISO-8859-1"),
128
- ISO_8859_2(11, "ISO-8859-2"),
129
- ISO_8859_3(12, "ISO-8859-3"),
130
- ISO_8859_4(13, "ISO-8859-4"),
131
- ISO_8859_5(14, "ISO-8859-5"),
132
- ISO_8859_6(15, "ISO-8859-6"),
133
- ISO_8859_7(16, "ISO-8859-7"),
134
- ISO_8859_8(17, "ISO-8859-8"),
135
- ISO_8859_9(18, "ISO-8859-9"),
136
- ISO_2022_JP(19, "ISO-2022-JP"),
137
- SHIFT_JIS(20, "SHIFT-JIS"),
138
- EUC_JP(21, "EUC-JP"),
139
- ASCII(22, "ASCII");
140
-
141
- private final int value;
142
- private final String name;
143
-
144
- EncodingType(int value, String name) {
145
- this.value = value;
146
- this.name = name;
147
- }
148
-
149
- public int getValue() {
150
- return value;
151
- }
152
-
153
- public String toString() {
154
- return name;
155
- }
156
-
157
- private static transient EncodingType[] values;
158
-
159
- // NOTE: assuming ordinal == value
160
- static EncodingType get(final int ordinal) {
161
- EncodingType[] values = EncodingType.values;
162
- if (values == null) {
163
- values = EncodingType.values();
164
- EncodingType.values = values;
165
- }
166
- if (ordinal >= 0 && ordinal < values.length) {
167
- return values[ordinal];
168
- }
169
- return null;
170
- }
118
+ private final int value;
119
+ private final String name;
171
120
 
121
+ EncodingType(int value, String name)
122
+ {
123
+ this.value = value;
124
+ this.name = name;
172
125
  }
173
126
 
174
- private static String findEncodingName(final int value) {
175
- EncodingType type = EncodingType.get(value);
176
- if (type == null) return null;
177
- assert type.value == value;
178
- return type.name;
127
+ public int getValue()
128
+ {
129
+ return value;
179
130
  }
180
131
 
181
- private static String findEncodingName(ThreadContext context, IRubyObject encoding) {
182
- String rubyEncoding = null;
183
- if (encoding instanceof RubyString) {
184
- rubyEncoding = rubyStringToString((RubyString) encoding);
185
- }
186
- else if (encoding instanceof RubyFixnum) {
187
- rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
188
- }
189
- if (rubyEncoding == null) return null;
190
- try {
191
- return Charset.forName(rubyEncoding).displayName();
192
- }
193
- catch (UnsupportedCharsetException e) {
194
- throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
195
- }
196
- catch (IllegalCharsetNameException e) {
197
- throw context.getRuntime().newEncodingError(e.getMessage());
198
- }
132
+ public String toString()
133
+ {
134
+ return name;
199
135
  }
200
136
 
201
- private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+", Pattern.CASE_INSENSITIVE);
202
-
203
- private static CharSequence applyEncoding(final String input, final String enc) {
204
- int start_pos = 0; int end_pos = 0;
205
- if (containsIgnoreCase(input, "charset")) {
206
- Matcher m = CHARSET_PATTERN.matcher(input);
207
- while (m.find()) {
208
- start_pos = m.start();
209
- end_pos = m.end();
210
- }
211
- }
212
- if (start_pos != end_pos) {
213
- return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
214
- }
215
- return input;
137
+ private static transient EncodingType[] values;
138
+
139
+ // NOTE: assuming ordinal == value
140
+ static EncodingType get(final int ordinal)
141
+ {
142
+ EncodingType[] values = EncodingType.values;
143
+ if (values == null) {
144
+ values = EncodingType.values();
145
+ EncodingType.values = values;
146
+ }
147
+ if (ordinal >= 0 && ordinal < values.length) {
148
+ return values[ordinal];
149
+ }
150
+ return null;
216
151
  }
217
152
 
218
- private static boolean containsIgnoreCase(final String str, final String sub) {
219
- final int len = sub.length();
220
- final int max = str.length() - len;
221
-
222
- if (len == 0) return true;
223
- final char c0Lower = Character.toLowerCase(sub.charAt(0));
224
- final char c0Upper = Character.toUpperCase(sub.charAt(0));
225
-
226
- for (int i = 0; i <= max; i++) {
227
- final char ch = str.charAt(i);
228
- if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
229
- continue; // first char doesn't match
230
- }
231
-
232
- if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
233
- return true;
234
- }
235
- }
236
- return false;
153
+ }
154
+
155
+ private static String
156
+ findEncodingName(final int value)
157
+ {
158
+ EncodingType type = EncodingType.get(value);
159
+ if (type == null) { return null; }
160
+ assert type.value == value;
161
+ return type.name;
162
+ }
163
+
164
+ private static String
165
+ findEncodingName(ThreadContext context, IRubyObject encoding)
166
+ {
167
+ String rubyEncoding = null;
168
+ if (encoding instanceof RubyString) {
169
+ rubyEncoding = rubyStringToString((RubyString) encoding);
170
+ } else if (encoding instanceof RubyFixnum) {
171
+ rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
237
172
  }
173
+ if (rubyEncoding == null) { return null; }
174
+ try {
175
+ return Charset.forName(rubyEncoding).displayName();
176
+ } catch (UnsupportedCharsetException e) {
177
+ throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
178
+ } catch (IllegalCharsetNameException e) {
179
+ throw context.getRuntime().newEncodingError(e.getMessage());
180
+ }
181
+ }
182
+
183
+ private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+",
184
+ Pattern.CASE_INSENSITIVE);
238
185
 
239
- @JRubyMethod(name="file", meta=true)
240
- public static IRubyObject parse_file(ThreadContext context,
241
- IRubyObject klass,
242
- IRubyObject data,
243
- IRubyObject encoding) {
244
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
245
- ctx.setInputSourceFile(context, data);
246
- String javaEncoding = findEncodingName(context, encoding);
247
- if (javaEncoding != null) {
248
- ctx.getInputSource().setEncoding(javaEncoding);
249
- }
250
- return ctx;
186
+ private static CharSequence
187
+ applyEncoding(final String input, final String enc)
188
+ {
189
+ int start_pos = 0;
190
+ int end_pos = 0;
191
+ if (containsIgnoreCase(input, "charset")) {
192
+ Matcher m = CHARSET_PATTERN.matcher(input);
193
+ while (m.find()) {
194
+ start_pos = m.start();
195
+ end_pos = m.end();
196
+ }
197
+ }
198
+ if (start_pos != end_pos) {
199
+ return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
251
200
  }
201
+ return input;
202
+ }
252
203
 
253
- @JRubyMethod(name="io", meta=true)
254
- public static IRubyObject parse_io(ThreadContext context,
255
- IRubyObject klass,
256
- IRubyObject data,
257
- IRubyObject encoding) {
258
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
259
- ctx.setIOInputSource(context, data, context.nil);
260
- String javaEncoding = findEncodingName(context, encoding);
261
- if (javaEncoding != null) {
262
- ctx.getInputSource().setEncoding(javaEncoding);
263
- }
264
- return ctx;
204
+ private static boolean
205
+ containsIgnoreCase(final String str, final String sub)
206
+ {
207
+ final int len = sub.length();
208
+ final int max = str.length() - len;
209
+
210
+ if (len == 0) { return true; }
211
+ final char c0Lower = Character.toLowerCase(sub.charAt(0));
212
+ final char c0Upper = Character.toUpperCase(sub.charAt(0));
213
+
214
+ for (int i = 0; i <= max; i++) {
215
+ final char ch = str.charAt(i);
216
+ if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
217
+ continue; // first char doesn't match
218
+ }
219
+
220
+ if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
221
+ return true;
222
+ }
265
223
  }
224
+ return false;
225
+ }
266
226
 
267
- /**
268
- * Create a new parser context that will read from a raw input stream.
269
- * Meant to be run in a separate thread by HtmlSaxPushParser.
270
- */
271
- static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) {
272
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
273
- ctx.setInputSource(stream);
274
- return ctx;
227
+ @JRubyMethod(name = "file", meta = true)
228
+ public static IRubyObject
229
+ parse_file(ThreadContext context,
230
+ IRubyObject klass,
231
+ IRubyObject data,
232
+ IRubyObject encoding)
233
+ {
234
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
235
+ ctx.setInputSourceFile(context, data);
236
+ String javaEncoding = findEncodingName(context, encoding);
237
+ if (javaEncoding != null) {
238
+ ctx.getInputSource().setEncoding(javaEncoding);
275
239
  }
240
+ return ctx;
241
+ }
276
242
 
277
- @Override
278
- protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
279
- // this function is meant to be empty. It overrides the one in XmlSaxParserContext
243
+ @JRubyMethod(name = "io", meta = true)
244
+ public static IRubyObject
245
+ parse_io(ThreadContext context,
246
+ IRubyObject klass,
247
+ IRubyObject data,
248
+ IRubyObject encoding)
249
+ {
250
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
251
+ ctx.setIOInputSource(context, data, context.nil);
252
+ String javaEncoding = findEncodingName(context, encoding);
253
+ if (javaEncoding != null) {
254
+ ctx.getInputSource().setEncoding(javaEncoding);
280
255
  }
256
+ return ctx;
257
+ }
258
+
259
+ /**
260
+ * Create a new parser context that will read from a raw input stream.
261
+ * Meant to be run in a separate thread by HtmlSaxPushParser.
262
+ */
263
+ static HtmlSaxParserContext
264
+ parse_stream(final Ruby runtime, RubyClass klass, InputStream stream)
265
+ {
266
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
267
+ ctx.setInputSource(stream);
268
+ return ctx;
269
+ }
270
+
271
+ @Override
272
+ protected void
273
+ preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
274
+ {
275
+ // this function is meant to be empty. It overrides the one in XmlSaxParserContext
276
+ }
281
277
 
282
278
  }