nokogiri 1.11.0.rc2-java → 1.11.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  7. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  8. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  12. data/ext/java/nokogiri/NokogiriService.java +595 -556
  13. data/ext/java/nokogiri/XmlAttr.java +118 -126
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  15. data/ext/java/nokogiri/XmlCdata.java +35 -58
  16. data/ext/java/nokogiri/XmlComment.java +46 -67
  17. data/ext/java/nokogiri/XmlDocument.java +645 -572
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  19. data/ext/java/nokogiri/XmlDtd.java +448 -414
  20. data/ext/java/nokogiri/XmlElement.java +23 -48
  21. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  22. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  23. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  24. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  25. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1622
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  29. data/ext/java/nokogiri/XmlReader.java +513 -450
  30. data/ext/java/nokogiri/XmlRelaxng.java +89 -101
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  33. data/ext/java/nokogiri/XmlSchema.java +335 -242
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
  35. data/ext/java/nokogiri/XmlText.java +55 -76
  36. data/ext/java/nokogiri/XmlXpathContext.java +242 -210
  37. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  93. data/ext/nokogiri/depend +37 -358
  94. data/ext/nokogiri/extconf.rb +581 -374
  95. data/ext/nokogiri/html_document.c +78 -82
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  99. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +192 -93
  102. data/ext/nokogiri/test_global_handlers.c +40 -0
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +250 -187
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +371 -320
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +191 -157
  120. data/ext/nokogiri/xml_relax_ng.c +52 -28
  121. data/ext/nokogiri/xml_sax_parser.c +118 -118
  122. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  123. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  124. data/ext/nokogiri/xml_schema.c +95 -47
  125. data/ext/nokogiri/xml_syntax_error.c +42 -21
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +206 -123
  128. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  129. data/lib/nokogiri.rb +4 -8
  130. data/lib/nokogiri/css/parser.rb +62 -62
  131. data/lib/nokogiri/css/parser.y +2 -2
  132. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  133. data/lib/nokogiri/extension.rb +26 -0
  134. data/lib/nokogiri/html/document.rb +12 -26
  135. data/lib/nokogiri/html/document_fragment.rb +15 -15
  136. data/lib/nokogiri/nokogiri.jar +0 -0
  137. data/lib/nokogiri/version.rb +2 -148
  138. data/lib/nokogiri/version/constant.rb +5 -0
  139. data/lib/nokogiri/version/info.rb +205 -0
  140. data/lib/nokogiri/xml/builder.rb +2 -2
  141. data/lib/nokogiri/xml/document.rb +91 -35
  142. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  143. data/lib/nokogiri/xml/node.rb +89 -69
  144. data/lib/nokogiri/xml/parse_options.rb +6 -0
  145. data/lib/nokogiri/xml/reader.rb +2 -9
  146. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  147. data/lib/nokogiri/xml/schema.rb +12 -4
  148. data/lib/nokogiri/xml/searchable.rb +3 -1
  149. data/lib/nokogiri/xml/xpath.rb +1 -3
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  151. metadata +87 -164
  152. data/ext/nokogiri/html_document.h +0 -10
  153. data/ext/nokogiri/html_element_description.h +0 -10
  154. data/ext/nokogiri/html_entity_lookup.h +0 -8
  155. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  156. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  157. data/ext/nokogiri/nokogiri.h +0 -134
  158. data/ext/nokogiri/xml_attr.h +0 -9
  159. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  160. data/ext/nokogiri/xml_cdata.h +0 -9
  161. data/ext/nokogiri/xml_comment.h +0 -9
  162. data/ext/nokogiri/xml_document.h +0 -23
  163. data/ext/nokogiri/xml_document_fragment.h +0 -10
  164. data/ext/nokogiri/xml_dtd.h +0 -10
  165. data/ext/nokogiri/xml_element_content.h +0 -10
  166. data/ext/nokogiri/xml_element_decl.h +0 -9
  167. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  168. data/ext/nokogiri/xml_entity_decl.h +0 -10
  169. data/ext/nokogiri/xml_entity_reference.h +0 -9
  170. data/ext/nokogiri/xml_io.c +0 -61
  171. data/ext/nokogiri/xml_io.h +0 -11
  172. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  173. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  174. data/ext/nokogiri/xml_namespace.h +0 -14
  175. data/ext/nokogiri/xml_node.h +0 -13
  176. data/ext/nokogiri/xml_node_set.h +0 -12
  177. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  178. data/ext/nokogiri/xml_reader.h +0 -10
  179. data/ext/nokogiri/xml_relax_ng.h +0 -9
  180. data/ext/nokogiri/xml_sax_parser.h +0 -39
  181. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  182. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_schema.h +0 -9
  184. data/ext/nokogiri/xml_syntax_error.h +0 -13
  185. data/ext/nokogiri/xml_text.h +0 -9
  186. data/ext/nokogiri/xml_xpath_context.h +0 -10
  187. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import static org.jruby.runtime.Helpers.invoke;
@@ -45,35 +13,40 @@ import org.jruby.runtime.builtin.IRubyObject;
45
13
 
46
14
  /**
47
15
  * Class for Nokogiri::HTML::EntityLookup.
48
- *
16
+ *
49
17
  * @author Patrick Mahoney <pat@polycrystal.org>
50
18
  */
51
- @JRubyClass(name="Nokogiri::HTML::EntityLookup")
52
- public class HtmlEntityLookup extends RubyObject {
53
-
54
- public HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) {
55
- super(runtime, rubyClass);
56
- }
57
-
58
- /**
59
- * Looks up an HTML entity <code>key</code>.
60
- *
61
- * The description is a bit lacking.
62
- */
63
- @JRubyMethod()
64
- public IRubyObject get(ThreadContext context, IRubyObject key) {
65
- Ruby ruby = context.getRuntime();
66
- String name = key.toString();
67
- int val = HTMLEntities.get(name);
68
- if (val == -1) return ruby.getNil();
69
-
70
- IRubyObject edClass =
71
- ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
72
- IRubyObject edObj = invoke(context, edClass, "new",
73
- ruby.newFixnum(val), ruby.newString(name),
74
- ruby.newString(name + " entity"));
75
-
76
- return edObj;
77
- }
19
+ @JRubyClass(name = "Nokogiri::HTML::EntityLookup")
20
+ public class HtmlEntityLookup extends RubyObject
21
+ {
22
+
23
+ public
24
+ HtmlEntityLookup(Ruby runtime, RubyClass rubyClass)
25
+ {
26
+ super(runtime, rubyClass);
27
+ }
28
+
29
+ /**
30
+ * Looks up an HTML entity <code>key</code>.
31
+ *
32
+ * The description is a bit lacking.
33
+ */
34
+ @JRubyMethod()
35
+ public IRubyObject
36
+ get(ThreadContext context, IRubyObject key)
37
+ {
38
+ Ruby ruby = context.getRuntime();
39
+ String name = key.toString();
40
+ int val = HTMLEntities.get(name);
41
+ if (val == -1) { return ruby.getNil(); }
42
+
43
+ IRubyObject edClass =
44
+ ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
45
+ IRubyObject edObj = invoke(context, edClass, "new",
46
+ ruby.newFixnum(val), ruby.newString(name),
47
+ ruby.newString(name + " entity"));
48
+
49
+ return edObj;
50
+ }
78
51
 
79
52
  }
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import java.io.ByteArrayInputStream;
@@ -63,220 +31,248 @@ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
63
31
  * @author Yoko Harada <yokolet@gmail.com>
64
32
  */
65
33
 
66
- @JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext")
67
- public class HtmlSaxParserContext extends XmlSaxParserContext {
34
+ @JRubyClass(name = "Nokogiri::HTML::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext")
35
+ public class HtmlSaxParserContext extends XmlSaxParserContext
36
+ {
68
37
 
69
- static HtmlSaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) {
70
- HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
71
- instance.initialize(runtime);
72
- return instance;
73
- }
38
+ static HtmlSaxParserContext
39
+ newInstance(final Ruby runtime, final RubyClass klazz)
40
+ {
41
+ HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
42
+ instance.initialize(runtime);
43
+ return instance;
44
+ }
74
45
 
75
- public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) {
76
- super(ruby, rubyClass);
77
- }
46
+ public
47
+ HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass)
48
+ {
49
+ super(ruby, rubyClass);
50
+ }
51
+
52
+ @Override
53
+ protected AbstractSAXParser
54
+ createParser() throws SAXException
55
+ {
56
+ SAXParser parser = new SAXParser();
78
57
 
79
- @Override
80
- protected AbstractSAXParser createParser() throws SAXException {
81
- SAXParser parser = new SAXParser();
82
-
83
- try{
84
- parser.setProperty(
85
- "http://cyberneko.org/html/properties/names/elems", "lower");
86
- parser.setProperty(
87
- "http://cyberneko.org/html/properties/names/attrs", "lower");
88
-
89
- // NekoHTML should not try to guess the encoding based on the meta
90
- // tags or other information in the document. This is already
91
- // handled by the EncodingReader.
92
- parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
93
- return parser;
94
- } catch(SAXException ex) {
95
- throw new SAXException(
96
- "Problem while creating HTML SAX Parser: " + ex.toString());
97
- }
58
+ try {
59
+ parser.setProperty(
60
+ "http://cyberneko.org/html/properties/names/elems", "lower");
61
+ parser.setProperty(
62
+ "http://cyberneko.org/html/properties/names/attrs", "lower");
63
+
64
+ // NekoHTML should not try to guess the encoding based on the meta
65
+ // tags or other information in the document. This is already
66
+ // handled by the EncodingReader.
67
+ parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
68
+ return parser;
69
+ } catch (SAXException ex) {
70
+ throw new SAXException(
71
+ "Problem while creating HTML SAX Parser: " + ex.toString());
98
72
  }
73
+ }
99
74
 
100
- @JRubyMethod(name="memory", meta=true)
101
- public static IRubyObject parse_memory(ThreadContext context,
102
- IRubyObject klazz,
103
- IRubyObject data,
104
- IRubyObject encoding) {
105
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
106
- String javaEncoding = findEncodingName(context, encoding);
107
- if (javaEncoding != null) {
108
- CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
109
- ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
110
- ctx.setInputSource(istream);
111
- ctx.getInputSource().setEncoding(javaEncoding);
112
- }
113
- return ctx;
75
+ @JRubyMethod(name = "memory", meta = true)
76
+ public static IRubyObject
77
+ parse_memory(ThreadContext context,
78
+ IRubyObject klazz,
79
+ IRubyObject data,
80
+ IRubyObject encoding)
81
+ {
82
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
83
+ String javaEncoding = findEncodingName(context, encoding);
84
+ if (javaEncoding != null) {
85
+ CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
86
+ ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
87
+ ctx.setInputSource(istream);
88
+ ctx.getInputSource().setEncoding(javaEncoding);
114
89
  }
90
+ return ctx;
91
+ }
92
+
93
+ public enum EncodingType {
94
+ NONE(0, "NONE"),
95
+ UTF_8(1, "UTF-8"),
96
+ UTF16LE(2, "UTF16LE"),
97
+ UTF16BE(3, "UTF16BE"),
98
+ UCS4LE(4, "UCS4LE"),
99
+ UCS4BE(5, "UCS4BE"),
100
+ EBCDIC(6, "EBCDIC"),
101
+ UCS4_2143(7, "ICS4-2143"),
102
+ UCS4_3412(8, "UCS4-3412"),
103
+ UCS2(9, "UCS2"),
104
+ ISO_8859_1(10, "ISO-8859-1"),
105
+ ISO_8859_2(11, "ISO-8859-2"),
106
+ ISO_8859_3(12, "ISO-8859-3"),
107
+ ISO_8859_4(13, "ISO-8859-4"),
108
+ ISO_8859_5(14, "ISO-8859-5"),
109
+ ISO_8859_6(15, "ISO-8859-6"),
110
+ ISO_8859_7(16, "ISO-8859-7"),
111
+ ISO_8859_8(17, "ISO-8859-8"),
112
+ ISO_8859_9(18, "ISO-8859-9"),
113
+ ISO_2022_JP(19, "ISO-2022-JP"),
114
+ SHIFT_JIS(20, "SHIFT-JIS"),
115
+ EUC_JP(21, "EUC-JP"),
116
+ ASCII(22, "ASCII");
115
117
 
116
- public enum EncodingType {
117
- NONE(0, "NONE"),
118
- UTF_8(1, "UTF-8"),
119
- UTF16LE(2, "UTF16LE"),
120
- UTF16BE(3, "UTF16BE"),
121
- UCS4LE(4, "UCS4LE"),
122
- UCS4BE(5, "UCS4BE"),
123
- EBCDIC(6, "EBCDIC"),
124
- UCS4_2143(7, "ICS4-2143"),
125
- UCS4_3412(8, "UCS4-3412"),
126
- UCS2(9, "UCS2"),
127
- ISO_8859_1(10, "ISO-8859-1"),
128
- ISO_8859_2(11, "ISO-8859-2"),
129
- ISO_8859_3(12, "ISO-8859-3"),
130
- ISO_8859_4(13, "ISO-8859-4"),
131
- ISO_8859_5(14, "ISO-8859-5"),
132
- ISO_8859_6(15, "ISO-8859-6"),
133
- ISO_8859_7(16, "ISO-8859-7"),
134
- ISO_8859_8(17, "ISO-8859-8"),
135
- ISO_8859_9(18, "ISO-8859-9"),
136
- ISO_2022_JP(19, "ISO-2022-JP"),
137
- SHIFT_JIS(20, "SHIFT-JIS"),
138
- EUC_JP(21, "EUC-JP"),
139
- ASCII(22, "ASCII");
140
-
141
- private final int value;
142
- private final String name;
143
-
144
- EncodingType(int value, String name) {
145
- this.value = value;
146
- this.name = name;
147
- }
148
-
149
- public int getValue() {
150
- return value;
151
- }
152
-
153
- public String toString() {
154
- return name;
155
- }
156
-
157
- private static transient EncodingType[] values;
158
-
159
- // NOTE: assuming ordinal == value
160
- static EncodingType get(final int ordinal) {
161
- EncodingType[] values = EncodingType.values;
162
- if (values == null) {
163
- values = EncodingType.values();
164
- EncodingType.values = values;
165
- }
166
- if (ordinal >= 0 && ordinal < values.length) {
167
- return values[ordinal];
168
- }
169
- return null;
170
- }
118
+ private final int value;
119
+ private final String name;
171
120
 
121
+ EncodingType(int value, String name)
122
+ {
123
+ this.value = value;
124
+ this.name = name;
172
125
  }
173
126
 
174
- private static String findEncodingName(final int value) {
175
- EncodingType type = EncodingType.get(value);
176
- if (type == null) return null;
177
- assert type.value == value;
178
- return type.name;
127
+ public int getValue()
128
+ {
129
+ return value;
179
130
  }
180
131
 
181
- private static String findEncodingName(ThreadContext context, IRubyObject encoding) {
182
- String rubyEncoding = null;
183
- if (encoding instanceof RubyString) {
184
- rubyEncoding = rubyStringToString((RubyString) encoding);
185
- }
186
- else if (encoding instanceof RubyFixnum) {
187
- rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
188
- }
189
- if (rubyEncoding == null) return null;
190
- try {
191
- return Charset.forName(rubyEncoding).displayName();
192
- }
193
- catch (UnsupportedCharsetException e) {
194
- throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
195
- }
196
- catch (IllegalCharsetNameException e) {
197
- throw context.getRuntime().newInvalidEncoding(e.getMessage());
198
- }
132
+ public String toString()
133
+ {
134
+ return name;
199
135
  }
200
136
 
201
- private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+", Pattern.CASE_INSENSITIVE);
202
-
203
- private static CharSequence applyEncoding(final String input, final String enc) {
204
- int start_pos = 0; int end_pos = 0;
205
- if (containsIgnoreCase(input, "charset")) {
206
- Matcher m = CHARSET_PATTERN.matcher(input);
207
- while (m.find()) {
208
- start_pos = m.start();
209
- end_pos = m.end();
210
- }
211
- }
212
- if (start_pos != end_pos) {
213
- return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
214
- }
215
- return input;
137
+ private static transient EncodingType[] values;
138
+
139
+ // NOTE: assuming ordinal == value
140
+ static EncodingType get(final int ordinal)
141
+ {
142
+ EncodingType[] values = EncodingType.values;
143
+ if (values == null) {
144
+ values = EncodingType.values();
145
+ EncodingType.values = values;
146
+ }
147
+ if (ordinal >= 0 && ordinal < values.length) {
148
+ return values[ordinal];
149
+ }
150
+ return null;
216
151
  }
217
152
 
218
- private static boolean containsIgnoreCase(final String str, final String sub) {
219
- final int len = sub.length();
220
- final int max = str.length() - len;
221
-
222
- if (len == 0) return true;
223
- final char c0Lower = Character.toLowerCase(sub.charAt(0));
224
- final char c0Upper = Character.toUpperCase(sub.charAt(0));
225
-
226
- for (int i = 0; i <= max; i++) {
227
- final char ch = str.charAt(i);
228
- if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
229
- continue; // first char doesn't match
230
- }
231
-
232
- if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
233
- return true;
234
- }
235
- }
236
- return false;
153
+ }
154
+
155
+ private static String
156
+ findEncodingName(final int value)
157
+ {
158
+ EncodingType type = EncodingType.get(value);
159
+ if (type == null) { return null; }
160
+ assert type.value == value;
161
+ return type.name;
162
+ }
163
+
164
+ private static String
165
+ findEncodingName(ThreadContext context, IRubyObject encoding)
166
+ {
167
+ String rubyEncoding = null;
168
+ if (encoding instanceof RubyString) {
169
+ rubyEncoding = rubyStringToString((RubyString) encoding);
170
+ } else if (encoding instanceof RubyFixnum) {
171
+ rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
237
172
  }
173
+ if (rubyEncoding == null) { return null; }
174
+ try {
175
+ return Charset.forName(rubyEncoding).displayName();
176
+ } catch (UnsupportedCharsetException e) {
177
+ throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
178
+ } catch (IllegalCharsetNameException e) {
179
+ throw context.getRuntime().newEncodingError(e.getMessage());
180
+ }
181
+ }
182
+
183
+ private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+",
184
+ Pattern.CASE_INSENSITIVE);
238
185
 
239
- @JRubyMethod(name="file", meta=true)
240
- public static IRubyObject parse_file(ThreadContext context,
241
- IRubyObject klass,
242
- IRubyObject data,
243
- IRubyObject encoding) {
244
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
245
- ctx.setInputSourceFile(context, data);
246
- String javaEncoding = findEncodingName(context, encoding);
247
- if (javaEncoding != null) {
248
- ctx.getInputSource().setEncoding(javaEncoding);
249
- }
250
- return ctx;
186
+ private static CharSequence
187
+ applyEncoding(final String input, final String enc)
188
+ {
189
+ int start_pos = 0;
190
+ int end_pos = 0;
191
+ if (containsIgnoreCase(input, "charset")) {
192
+ Matcher m = CHARSET_PATTERN.matcher(input);
193
+ while (m.find()) {
194
+ start_pos = m.start();
195
+ end_pos = m.end();
196
+ }
197
+ }
198
+ if (start_pos != end_pos) {
199
+ return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
251
200
  }
201
+ return input;
202
+ }
252
203
 
253
- @JRubyMethod(name="io", meta=true)
254
- public static IRubyObject parse_io(ThreadContext context,
255
- IRubyObject klass,
256
- IRubyObject data,
257
- IRubyObject encoding) {
258
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
259
- ctx.setIOInputSource(context, data, context.nil);
260
- String javaEncoding = findEncodingName(context, encoding);
261
- if (javaEncoding != null) {
262
- ctx.getInputSource().setEncoding(javaEncoding);
263
- }
264
- return ctx;
204
+ private static boolean
205
+ containsIgnoreCase(final String str, final String sub)
206
+ {
207
+ final int len = sub.length();
208
+ final int max = str.length() - len;
209
+
210
+ if (len == 0) { return true; }
211
+ final char c0Lower = Character.toLowerCase(sub.charAt(0));
212
+ final char c0Upper = Character.toUpperCase(sub.charAt(0));
213
+
214
+ for (int i = 0; i <= max; i++) {
215
+ final char ch = str.charAt(i);
216
+ if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
217
+ continue; // first char doesn't match
218
+ }
219
+
220
+ if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
221
+ return true;
222
+ }
265
223
  }
224
+ return false;
225
+ }
266
226
 
267
- /**
268
- * Create a new parser context that will read from a raw input stream.
269
- * Meant to be run in a separate thread by HtmlSaxPushParser.
270
- */
271
- static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) {
272
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
273
- ctx.setInputSource(stream);
274
- return ctx;
227
+ @JRubyMethod(name = "file", meta = true)
228
+ public static IRubyObject
229
+ parse_file(ThreadContext context,
230
+ IRubyObject klass,
231
+ IRubyObject data,
232
+ IRubyObject encoding)
233
+ {
234
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
235
+ ctx.setInputSourceFile(context, data);
236
+ String javaEncoding = findEncodingName(context, encoding);
237
+ if (javaEncoding != null) {
238
+ ctx.getInputSource().setEncoding(javaEncoding);
275
239
  }
240
+ return ctx;
241
+ }
276
242
 
277
- @Override
278
- protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
279
- // this function is meant to be empty. It overrides the one in XmlSaxParserContext
243
+ @JRubyMethod(name = "io", meta = true)
244
+ public static IRubyObject
245
+ parse_io(ThreadContext context,
246
+ IRubyObject klass,
247
+ IRubyObject data,
248
+ IRubyObject encoding)
249
+ {
250
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
251
+ ctx.setIOInputSource(context, data, context.nil);
252
+ String javaEncoding = findEncodingName(context, encoding);
253
+ if (javaEncoding != null) {
254
+ ctx.getInputSource().setEncoding(javaEncoding);
280
255
  }
256
+ return ctx;
257
+ }
258
+
259
+ /**
260
+ * Create a new parser context that will read from a raw input stream.
261
+ * Meant to be run in a separate thread by HtmlSaxPushParser.
262
+ */
263
+ static HtmlSaxParserContext
264
+ parse_stream(final Ruby runtime, RubyClass klass, InputStream stream)
265
+ {
266
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
267
+ ctx.setInputSource(stream);
268
+ return ctx;
269
+ }
270
+
271
+ @Override
272
+ protected void
273
+ preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
274
+ {
275
+ // this function is meant to be empty. It overrides the one in XmlSaxParserContext
276
+ }
281
277
 
282
278
  }