nokogiri-maven 1.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. data/CHANGELOG.ja.rdoc +544 -0
  2. data/CHANGELOG.rdoc +532 -0
  3. data/Manifest.txt +283 -0
  4. data/README.ja.rdoc +106 -0
  5. data/README.rdoc +174 -0
  6. data/Rakefile +164 -0
  7. data/bin/nokogiri +53 -0
  8. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +119 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +145 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +259 -0
  13. data/ext/java/nokogiri/NokogiriService.java +590 -0
  14. data/ext/java/nokogiri/XmlAttr.java +180 -0
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +130 -0
  16. data/ext/java/nokogiri/XmlCdata.java +84 -0
  17. data/ext/java/nokogiri/XmlComment.java +86 -0
  18. data/ext/java/nokogiri/XmlDocument.java +519 -0
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +223 -0
  20. data/ext/java/nokogiri/XmlDtd.java +469 -0
  21. data/ext/java/nokogiri/XmlElement.java +195 -0
  22. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  23. data/ext/java/nokogiri/XmlElementDecl.java +152 -0
  24. data/ext/java/nokogiri/XmlEntityDecl.java +162 -0
  25. data/ext/java/nokogiri/XmlEntityReference.java +97 -0
  26. data/ext/java/nokogiri/XmlNamespace.java +183 -0
  27. data/ext/java/nokogiri/XmlNode.java +1378 -0
  28. data/ext/java/nokogiri/XmlNodeSet.java +267 -0
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +99 -0
  30. data/ext/java/nokogiri/XmlReader.java +408 -0
  31. data/ext/java/nokogiri/XmlRelaxng.java +144 -0
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +367 -0
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +184 -0
  34. data/ext/java/nokogiri/XmlSchema.java +324 -0
  35. data/ext/java/nokogiri/XmlSyntaxError.java +119 -0
  36. data/ext/java/nokogiri/XmlText.java +119 -0
  37. data/ext/java/nokogiri/XmlXpathContext.java +199 -0
  38. data/ext/java/nokogiri/XsltStylesheet.java +197 -0
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +204 -0
  40. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +73 -0
  41. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +86 -0
  42. data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
  43. data/ext/java/nokogiri/internals/NokogiriHelpers.java +639 -0
  44. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +167 -0
  45. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +130 -0
  46. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +74 -0
  47. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  48. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +79 -0
  49. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +141 -0
  50. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +73 -0
  51. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +67 -0
  52. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +86 -0
  53. data/ext/java/nokogiri/internals/ParserContext.java +276 -0
  54. data/ext/java/nokogiri/internals/PushInputStream.java +411 -0
  55. data/ext/java/nokogiri/internals/ReaderNode.java +531 -0
  56. data/ext/java/nokogiri/internals/SaveContextVisitor.java +567 -0
  57. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +76 -0
  58. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  59. data/ext/java/nokogiri/internals/XmlDomParser.java +76 -0
  60. data/ext/java/nokogiri/internals/XmlDomParserContext.java +244 -0
  61. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  62. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
  63. data/ext/nokogiri/depend +358 -0
  64. data/ext/nokogiri/extconf.rb +124 -0
  65. data/ext/nokogiri/html_document.c +154 -0
  66. data/ext/nokogiri/html_document.h +10 -0
  67. data/ext/nokogiri/html_element_description.c +276 -0
  68. data/ext/nokogiri/html_element_description.h +10 -0
  69. data/ext/nokogiri/html_entity_lookup.c +32 -0
  70. data/ext/nokogiri/html_entity_lookup.h +8 -0
  71. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  72. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  73. data/ext/nokogiri/nokogiri.c +115 -0
  74. data/ext/nokogiri/nokogiri.h +160 -0
  75. data/ext/nokogiri/xml_attr.c +94 -0
  76. data/ext/nokogiri/xml_attr.h +9 -0
  77. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  78. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  79. data/ext/nokogiri/xml_cdata.c +56 -0
  80. data/ext/nokogiri/xml_cdata.h +9 -0
  81. data/ext/nokogiri/xml_comment.c +54 -0
  82. data/ext/nokogiri/xml_comment.h +9 -0
  83. data/ext/nokogiri/xml_document.c +478 -0
  84. data/ext/nokogiri/xml_document.h +23 -0
  85. data/ext/nokogiri/xml_document_fragment.c +48 -0
  86. data/ext/nokogiri/xml_document_fragment.h +10 -0
  87. data/ext/nokogiri/xml_dtd.c +202 -0
  88. data/ext/nokogiri/xml_dtd.h +10 -0
  89. data/ext/nokogiri/xml_element_content.c +123 -0
  90. data/ext/nokogiri/xml_element_content.h +10 -0
  91. data/ext/nokogiri/xml_element_decl.c +69 -0
  92. data/ext/nokogiri/xml_element_decl.h +9 -0
  93. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  94. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  95. data/ext/nokogiri/xml_entity_decl.c +110 -0
  96. data/ext/nokogiri/xml_entity_decl.h +10 -0
  97. data/ext/nokogiri/xml_entity_reference.c +52 -0
  98. data/ext/nokogiri/xml_entity_reference.h +9 -0
  99. data/ext/nokogiri/xml_io.c +56 -0
  100. data/ext/nokogiri/xml_io.h +11 -0
  101. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  102. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  103. data/ext/nokogiri/xml_namespace.c +84 -0
  104. data/ext/nokogiri/xml_namespace.h +13 -0
  105. data/ext/nokogiri/xml_node.c +1385 -0
  106. data/ext/nokogiri/xml_node.h +13 -0
  107. data/ext/nokogiri/xml_node_set.c +418 -0
  108. data/ext/nokogiri/xml_node_set.h +9 -0
  109. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  110. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  111. data/ext/nokogiri/xml_reader.c +684 -0
  112. data/ext/nokogiri/xml_reader.h +10 -0
  113. data/ext/nokogiri/xml_relax_ng.c +161 -0
  114. data/ext/nokogiri/xml_relax_ng.h +9 -0
  115. data/ext/nokogiri/xml_sax_parser.c +293 -0
  116. data/ext/nokogiri/xml_sax_parser.h +39 -0
  117. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  118. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  119. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  120. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  121. data/ext/nokogiri/xml_schema.c +205 -0
  122. data/ext/nokogiri/xml_schema.h +9 -0
  123. data/ext/nokogiri/xml_syntax_error.c +58 -0
  124. data/ext/nokogiri/xml_syntax_error.h +13 -0
  125. data/ext/nokogiri/xml_text.c +50 -0
  126. data/ext/nokogiri/xml_text.h +9 -0
  127. data/ext/nokogiri/xml_xpath_context.c +309 -0
  128. data/ext/nokogiri/xml_xpath_context.h +9 -0
  129. data/ext/nokogiri/xslt_stylesheet.c +264 -0
  130. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  131. data/lib/nokogiri.rb +127 -0
  132. data/lib/nokogiri/css.rb +27 -0
  133. data/lib/nokogiri/css/node.rb +99 -0
  134. data/lib/nokogiri/css/parser.rb +677 -0
  135. data/lib/nokogiri/css/parser.y +237 -0
  136. data/lib/nokogiri/css/parser_extras.rb +91 -0
  137. data/lib/nokogiri/css/syntax_error.rb +7 -0
  138. data/lib/nokogiri/css/tokenizer.rb +152 -0
  139. data/lib/nokogiri/css/tokenizer.rex +55 -0
  140. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  141. data/lib/nokogiri/decorators/slop.rb +35 -0
  142. data/lib/nokogiri/html.rb +36 -0
  143. data/lib/nokogiri/html/builder.rb +35 -0
  144. data/lib/nokogiri/html/document.rb +213 -0
  145. data/lib/nokogiri/html/document_fragment.rb +41 -0
  146. data/lib/nokogiri/html/element_description.rb +23 -0
  147. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  148. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  149. data/lib/nokogiri/html/sax/parser.rb +52 -0
  150. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  151. data/lib/nokogiri/nokogiri.jar +0 -0
  152. data/lib/nokogiri/syntax_error.rb +4 -0
  153. data/lib/nokogiri/version.rb +88 -0
  154. data/lib/nokogiri/xml.rb +67 -0
  155. data/lib/nokogiri/xml/attr.rb +14 -0
  156. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  157. data/lib/nokogiri/xml/builder.rb +425 -0
  158. data/lib/nokogiri/xml/cdata.rb +11 -0
  159. data/lib/nokogiri/xml/character_data.rb +7 -0
  160. data/lib/nokogiri/xml/document.rb +234 -0
  161. data/lib/nokogiri/xml/document_fragment.rb +98 -0
  162. data/lib/nokogiri/xml/dtd.rb +22 -0
  163. data/lib/nokogiri/xml/element_content.rb +36 -0
  164. data/lib/nokogiri/xml/element_decl.rb +13 -0
  165. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  166. data/lib/nokogiri/xml/namespace.rb +13 -0
  167. data/lib/nokogiri/xml/node.rb +915 -0
  168. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  169. data/lib/nokogiri/xml/node_set.rb +357 -0
  170. data/lib/nokogiri/xml/notation.rb +6 -0
  171. data/lib/nokogiri/xml/parse_options.rb +93 -0
  172. data/lib/nokogiri/xml/pp.rb +2 -0
  173. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  174. data/lib/nokogiri/xml/pp/node.rb +56 -0
  175. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  176. data/lib/nokogiri/xml/reader.rb +112 -0
  177. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  178. data/lib/nokogiri/xml/sax.rb +4 -0
  179. data/lib/nokogiri/xml/sax/document.rb +164 -0
  180. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  181. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  182. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  183. data/lib/nokogiri/xml/schema.rb +63 -0
  184. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  185. data/lib/nokogiri/xml/text.rb +9 -0
  186. data/lib/nokogiri/xml/xpath.rb +10 -0
  187. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  188. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  189. data/lib/nokogiri/xslt.rb +52 -0
  190. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  191. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  192. data/nokogiri_help_responses.md +40 -0
  193. data/tasks/cross_compile.rb +152 -0
  194. data/tasks/nokogiri.org.rb +18 -0
  195. data/tasks/test.rb +94 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +303 -0
  198. data/test/css/test_tokenizer.rb +198 -0
  199. data/test/css/test_xpath_visitor.rb +85 -0
  200. data/test/decorators/test_slop.rb +16 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/encoding.html +82 -0
  207. data/test/files/encoding.xhtml +84 -0
  208. data/test/files/exslt.xml +8 -0
  209. data/test/files/exslt.xslt +35 -0
  210. data/test/files/foo/foo.xsd +4 -0
  211. data/test/files/metacharset.html +10 -0
  212. data/test/files/noencoding.html +47 -0
  213. data/test/files/po.xml +32 -0
  214. data/test/files/po.xsd +66 -0
  215. data/test/files/shift_jis.html +10 -0
  216. data/test/files/shift_jis.xml +5 -0
  217. data/test/files/snuggles.xml +3 -0
  218. data/test/files/staff.dtd +10 -0
  219. data/test/files/staff.xml +59 -0
  220. data/test/files/staff.xslt +32 -0
  221. data/test/files/tlm.html +850 -0
  222. data/test/files/valid_bar.xml +2 -0
  223. data/test/helper.rb +173 -0
  224. data/test/html/sax/test_parser.rb +136 -0
  225. data/test/html/sax/test_parser_context.rb +48 -0
  226. data/test/html/test_builder.rb +164 -0
  227. data/test/html/test_document.rb +472 -0
  228. data/test/html/test_document_encoding.rb +138 -0
  229. data/test/html/test_document_fragment.rb +255 -0
  230. data/test/html/test_element_description.rb +100 -0
  231. data/test/html/test_named_characters.rb +14 -0
  232. data/test/html/test_node.rb +190 -0
  233. data/test/html/test_node_encoding.rb +27 -0
  234. data/test/test_convert_xpath.rb +135 -0
  235. data/test/test_css_cache.rb +45 -0
  236. data/test/test_encoding_handler.rb +46 -0
  237. data/test/test_memory_leak.rb +72 -0
  238. data/test/test_nokogiri.rb +132 -0
  239. data/test/test_reader.rb +425 -0
  240. data/test/test_soap4r_sax.rb +52 -0
  241. data/test/test_xslt_transforms.rb +193 -0
  242. data/test/xml/node/test_save_options.rb +28 -0
  243. data/test/xml/node/test_subclass.rb +44 -0
  244. data/test/xml/sax/test_parser.rb +338 -0
  245. data/test/xml/sax/test_parser_context.rb +113 -0
  246. data/test/xml/sax/test_push_parser.rb +156 -0
  247. data/test/xml/test_attr.rb +65 -0
  248. data/test/xml/test_attribute_decl.rb +86 -0
  249. data/test/xml/test_builder.rb +227 -0
  250. data/test/xml/test_cdata.rb +50 -0
  251. data/test/xml/test_comment.rb +29 -0
  252. data/test/xml/test_document.rb +697 -0
  253. data/test/xml/test_document_encoding.rb +26 -0
  254. data/test/xml/test_document_fragment.rb +192 -0
  255. data/test/xml/test_dtd.rb +107 -0
  256. data/test/xml/test_dtd_encoding.rb +33 -0
  257. data/test/xml/test_element_content.rb +56 -0
  258. data/test/xml/test_element_decl.rb +73 -0
  259. data/test/xml/test_entity_decl.rb +122 -0
  260. data/test/xml/test_entity_reference.rb +21 -0
  261. data/test/xml/test_namespace.rb +70 -0
  262. data/test/xml/test_node.rb +917 -0
  263. data/test/xml/test_node_attributes.rb +34 -0
  264. data/test/xml/test_node_encoding.rb +107 -0
  265. data/test/xml/test_node_reparenting.rb +334 -0
  266. data/test/xml/test_node_set.rb +742 -0
  267. data/test/xml/test_parse_options.rb +52 -0
  268. data/test/xml/test_processing_instruction.rb +30 -0
  269. data/test/xml/test_reader_encoding.rb +126 -0
  270. data/test/xml/test_relax_ng.rb +60 -0
  271. data/test/xml/test_schema.rb +94 -0
  272. data/test/xml/test_syntax_error.rb +12 -0
  273. data/test/xml/test_text.rb +47 -0
  274. data/test/xml/test_unparented_node.rb +381 -0
  275. data/test/xml/test_xpath.rb +237 -0
  276. data/test/xslt/test_custom_functions.rb +94 -0
  277. data/test/xslt/test_exception_handling.rb +37 -0
  278. metadata +552 -0
@@ -0,0 +1,276 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2011:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com]
9
+ * * {Sergio Arbeo}[http://www.serabe.com]
10
+ * * {Patrick Mahoney}[http://polycrystal.org]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri.internals;
34
+
35
+ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
36
+ import static nokogiri.internals.NokogiriHelpers.adjustSystemIdIfNecessary;
37
+ import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
38
+
39
+ import java.io.ByteArrayInputStream;
40
+ import java.io.File;
41
+ import java.io.IOException;
42
+ import java.io.InputStream;
43
+
44
+ import org.jruby.Ruby;
45
+ import org.jruby.RubyClass;
46
+ import org.jruby.RubyIO;
47
+ import org.jruby.RubyObject;
48
+ import org.jruby.RubyString;
49
+ import org.jruby.RubyStringIO;
50
+ import org.jruby.exceptions.RaiseException;
51
+ import org.jruby.runtime.ThreadContext;
52
+ import org.jruby.runtime.builtin.IRubyObject;
53
+ import org.jruby.util.ByteList;
54
+ import org.jruby.util.TypeConverter;
55
+ import org.xml.sax.InputSource;
56
+ import org.xml.sax.SAXException;
57
+ import org.xml.sax.ext.EntityResolver2;
58
+
59
+ /**
60
+ * Base class for the various parser contexts. Handles converting
61
+ * Ruby objects to InputSource objects.
62
+ *
63
+ * @author Patrick Mahoney <pat@polycrystal.org>
64
+ * @author Yoko Harada <yokolet@gmail.com>
65
+ */
66
+ public class ParserContext extends RubyObject {
67
+ protected InputSource source = null;
68
+
69
+ /**
70
+ * Create a file base input source taking into account the current
71
+ * directory of <code>runtime</code>.
72
+ */
73
+ public static InputSource resolveEntity(Ruby runtime, String publicId, String baseURI, String systemId)
74
+ throws IOException {
75
+ InputSource s = new InputSource();
76
+ String adjusted = adjustSystemIdIfNecessary(runtime.getCurrentDirectory(), runtime.getInstanceConfig().getScriptFileName(), baseURI, systemId);
77
+ if (adjusted == null && publicId == null) {
78
+ throw runtime.newRuntimeError("SystemId \"" + systemId + "\" is not correct.");
79
+ }
80
+ s.setSystemId(adjusted);
81
+ s.setPublicId(publicId);
82
+ return s;
83
+ }
84
+
85
+ public ParserContext(Ruby runtime) {
86
+ // default to class 'Object' because this class isn't exposed to Ruby
87
+ super(runtime, runtime.getObject());
88
+ }
89
+
90
+ public ParserContext(Ruby runtime, RubyClass klass) {
91
+ super(runtime, klass);
92
+ }
93
+
94
+ protected InputSource getInputSource() {
95
+ return source;
96
+ }
97
+
98
+ /**
99
+ * Set the InputSource from <code>url</code> or <code>data</code>,
100
+ * which may be an IO object, a String, or a StringIO.
101
+ */
102
+ public void setInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
103
+ Ruby ruby = context.getRuntime();
104
+ String path = (String) url.toJava(String.class);
105
+ if (isAbsolutePath(path)) {
106
+ source = new InputSource();
107
+ source.setSystemId(path);
108
+ return;
109
+ }
110
+ RubyString stringData = null;
111
+ if (invoke(context, data, "respond_to?",
112
+ ruby.newSymbol("to_io").to_sym()).isTrue()) {
113
+ /* IO or other object that responds to :to_io */
114
+ RubyIO io =
115
+ (RubyIO) TypeConverter.convertToType(data,
116
+ ruby.getIO(),
117
+ "to_io");
118
+ source = new InputSource(io.getInStream());
119
+ } else if (((RubyObject)data).getInstanceVariable("@io") != null) {
120
+ // in case of EncodingReader is used
121
+ // since EncodingReader won't respond to :to_io
122
+ RubyObject dataObject = (RubyObject) ((RubyObject)data).getInstanceVariable("@io");
123
+ if (dataObject instanceof RubyIO) {
124
+ RubyIO io = (RubyIO)dataObject;
125
+ source = new InputSource(io.getInStream());
126
+ } else if (dataObject instanceof RubyStringIO) {
127
+ stringData = (RubyString)((RubyStringIO)dataObject).string();
128
+ }
129
+ } else {
130
+ if (invoke(context, data, "respond_to?",
131
+ ruby.newSymbol("string").to_sym()).isTrue()) {
132
+ /* StringIO or other object that responds to :string */
133
+ stringData = invoke(context, data, "string").convertToString();
134
+ } else if (data instanceof RubyString) {
135
+ stringData = (RubyString) data;
136
+ } else {
137
+ throw ruby.newArgumentError(
138
+ "must be kind_of String or respond to :to_io or :string");
139
+ }
140
+ }
141
+ if (stringData != null) {
142
+ ByteList bytes = stringData.getByteList();
143
+ source = new InputSource(new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length()));
144
+ }
145
+ }
146
+
147
+ private boolean isAbsolutePath(String url) {
148
+ if (url == null) return false;
149
+ return (new File(url)).isAbsolute();
150
+ }
151
+
152
+ /**
153
+ * Set the InputSource to read from <code>file</code>, a String filename.
154
+ */
155
+ public void setInputSourceFile(ThreadContext context, IRubyObject file) {
156
+ String filename = rubyStringToString(file);
157
+
158
+ try{
159
+ source = resolveEntity(context.getRuntime(), null, null, filename);
160
+ } catch (Exception e) {
161
+ throw RaiseException.createNativeRaiseException(context.getRuntime(), e);
162
+ }
163
+
164
+ }
165
+
166
+ /**
167
+ * Set the InputSource from <code>stream</code>.
168
+ */
169
+ public void setInputSource(InputStream stream) {
170
+ source = new InputSource(stream);
171
+ }
172
+
173
+ /**
174
+ * Wrap Nokogiri parser options in a utility class. This is
175
+ * read-only.
176
+ */
177
+ public static class Options {
178
+ protected static final long STRICT = 0;
179
+ protected static final long RECOVER = 1;
180
+ protected static final long NOENT = 2;
181
+ protected static final long DTDLOAD = 4;
182
+ protected static final long DTDATTR = 8;
183
+ protected static final long DTDVALID = 16;
184
+ protected static final long NOERROR = 32;
185
+ protected static final long NOWARNING = 64;
186
+ protected static final long PEDANTIC = 128;
187
+ protected static final long NOBLANKS = 256;
188
+ protected static final long SAX1 = 512;
189
+ protected static final long XINCLUDE = 1024;
190
+ protected static final long NONET = 2048;
191
+ protected static final long NODICT = 4096;
192
+ protected static final long NSCLEAN = 8192;
193
+ protected static final long NOCDATA = 16384;
194
+ protected static final long NOXINCNODE = 32768;
195
+
196
+ public boolean strict;
197
+ public boolean recover;
198
+ public boolean noEnt;
199
+ public boolean dtdLoad;
200
+ public boolean dtdAttr;
201
+ public boolean dtdValid;
202
+ public boolean noError;
203
+ public boolean noWarning;
204
+ public boolean pedantic;
205
+ public boolean noBlanks;
206
+ public boolean sax1;
207
+ public boolean xInclude;
208
+ public boolean noNet;
209
+ public boolean noDict;
210
+ public boolean nsClean;
211
+ public boolean noCdata;
212
+ public boolean noXIncNode;
213
+
214
+ protected static boolean test(long options, long mask) {
215
+ return ((options & mask) == mask);
216
+ }
217
+
218
+ public Options(long options) {
219
+ strict = ((options & RECOVER) == STRICT);
220
+ recover = test(options, RECOVER);
221
+ noEnt = test(options, NOENT);
222
+ dtdLoad = test(options, DTDLOAD);
223
+ dtdAttr = test(options, DTDATTR);
224
+ dtdValid = test(options, DTDVALID);
225
+ noError = test(options, NOERROR);
226
+ noWarning = test(options, NOWARNING);
227
+ pedantic = test(options, PEDANTIC);
228
+ noBlanks = test(options, NOBLANKS);
229
+ sax1 = test(options, SAX1);
230
+ xInclude = test(options, XINCLUDE);
231
+ noNet = test(options, NONET);
232
+ noDict = test(options, NODICT);
233
+ nsClean = test(options, NSCLEAN);
234
+ noCdata = test(options, NOCDATA);
235
+ noXIncNode = test(options, NOXINCNODE);
236
+ }
237
+ }
238
+
239
+ /**
240
+ * An entity resolver aware of the fact that the Ruby runtime can
241
+ * change directory but the JVM cannot. Thus any file based
242
+ * entity resolution that uses relative paths must be translated
243
+ * to be relative to the current directory of the Ruby runtime.
244
+ */
245
+ public static class ChdirEntityResolver implements EntityResolver2 {
246
+ protected Ruby runtime;
247
+
248
+ public ChdirEntityResolver(Ruby runtime) {
249
+ super();
250
+ this.runtime = runtime;
251
+ }
252
+
253
+ @Override
254
+ public InputSource getExternalSubset(String name, String baseURI)
255
+ throws SAXException, IOException {
256
+ return null;
257
+ }
258
+
259
+ @Override
260
+ public InputSource resolveEntity(String publicId, String systemId)
261
+ throws SAXException, IOException {
262
+ return resolveEntity(null, publicId, null, systemId);
263
+ }
264
+
265
+ @Override
266
+ public InputSource resolveEntity(String name,
267
+ String publicId,
268
+ String baseURI,
269
+ String systemId)
270
+ throws SAXException, IOException {
271
+ return ParserContext.resolveEntity(runtime, publicId, baseURI, systemId);
272
+ }
273
+
274
+ }
275
+
276
+ }
@@ -0,0 +1,411 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2011:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com/]
9
+ * * {Sergio Arbeo}[http://www.serabe.com/]
10
+ * * {Patrick Mahoney}[http://polycrystal.org/]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com/]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri.internals;
34
+
35
+ import java.io.IOException;
36
+ import java.io.InputStream;
37
+ import java.nio.channels.ClosedChannelException;
38
+ import java.util.ArrayList;
39
+
40
+
41
+ /**
42
+ * Implements a "push" InputStream. An owner thread create an
43
+ * InputStream and passes it to a second thread. The owner thread
44
+ * calls PushInputStream.write() to write data to the stream. The
45
+ * second thread calls PushInputStream.read() and other InputStream
46
+ * methods.
47
+ *
48
+ * You should ensure that only one thread write to, and only one
49
+ * thread reads to, this stream, though nothing enforces this
50
+ * strictly.
51
+ */
52
+ public class PushInputStream extends InputStream {
53
+ /**
54
+ * Current position in the stream relative to the start of the
55
+ * buffer.
56
+ */
57
+ protected int pos;
58
+
59
+ /**
60
+ * Current mark position, or -1 if there is no mark.
61
+ */
62
+ protected int mark;
63
+
64
+ protected int readlimit;
65
+
66
+ /**
67
+ * State is open or closed.
68
+ */
69
+ protected boolean isOpen;
70
+
71
+ protected Buffer buffer;
72
+
73
+ public PushInputStream() {
74
+ pos = 0;
75
+ mark = -1;
76
+ readlimit = -1;
77
+ isOpen = true;
78
+
79
+ buffer = new Buffer(512);
80
+ }
81
+
82
+ protected synchronized void ensureOpen() throws IOException {
83
+ if (!isOpen) {
84
+ throw new ClosedChannelException();
85
+ }
86
+ }
87
+
88
+ /**
89
+ * Write data that can be read from the stream.
90
+ */
91
+ public synchronized void write(byte[] b) {
92
+ if (buffer == null) System.out.println("BUFFER IS NULL");
93
+ if (b == null) System.out.println("BYTE ARRAY IS NILL");
94
+ buffer.put(b);
95
+ notifyAll(); // notify readers waiting
96
+ }
97
+
98
+ /**
99
+ * Write data and then wait until all the data has been read
100
+ * (waits until the thread reading from this stream is blocked in
101
+ * a read()).
102
+ */
103
+ public synchronized void writeAndWaitForRead(byte[] b) throws IOException {
104
+ ensureOpen();
105
+ write(b);
106
+ for (;;) {
107
+ try {
108
+ wait();
109
+ break;
110
+ } catch (InterruptedException e) {
111
+ // continue waiting
112
+ }
113
+ }
114
+ }
115
+
116
+ /*
117
+ *------------------------------------------------------------
118
+ * InputStream methods
119
+ *------------------------------------------------------------
120
+ */
121
+
122
+ /**
123
+ * @see InputStream.available()
124
+ */
125
+ @Override
126
+ public synchronized int available() throws IOException {
127
+ ensureOpen();
128
+ return buffer.size() - pos;
129
+ }
130
+
131
+ int nClose = 0;
132
+ /**
133
+ * @see InputStream.close()
134
+ */
135
+ @Override
136
+ public synchronized void close() throws IOException {
137
+ if (!isOpen) return;
138
+ isOpen = false;
139
+ buffer = null;
140
+ notifyAll();
141
+ }
142
+
143
+ /**
144
+ * @see InputStream.mark()
145
+ */
146
+ @Override
147
+ public synchronized void mark(int readlimit) {
148
+ this.mark = pos;
149
+ this.readlimit = readlimit;
150
+ }
151
+
152
+ /**
153
+ * Mark the current position in this stream. Supported by
154
+ * PushInputStream.
155
+ *
156
+ * @see InputStream.markSupported()
157
+ */
158
+ @Override
159
+ public synchronized boolean markSupported() {
160
+ return true;
161
+ }
162
+
163
+ /**
164
+ * @see InputStream.read()
165
+ */
166
+ @Override
167
+ public synchronized int read() throws IOException {
168
+ ensureOpen();
169
+ byte[] b = new byte[1];
170
+ read(b, 0, 1);
171
+ return (int) b[0];
172
+ }
173
+
174
+ /**
175
+ * @see InputStream.read(byte[])
176
+ */
177
+ @Override
178
+ public synchronized int read(byte[] b) throws IOException {
179
+ ensureOpen();
180
+ return read(b, 0, b.length);
181
+ }
182
+
183
+ protected synchronized boolean markIsValid() {
184
+ return (mark >= 0 && pos < mark+readlimit);
185
+ }
186
+
187
+ /**
188
+ * @see InputStream.read(byte[], int, int)
189
+ */
190
+ @Override
191
+ public synchronized int read(byte[] b, int off, int len) throws IOException {
192
+ while (isOpen && available() == 0) {
193
+ /* block until data available */
194
+ try {
195
+ notifyAll(); // notify writers waiting
196
+ wait();
197
+ } catch (InterruptedException e) {
198
+ // continue waiting
199
+ }
200
+ }
201
+
202
+ if (!isOpen) {
203
+ return -1;
204
+ }
205
+
206
+ int readLen = Math.min(available(), len);
207
+
208
+ buffer.get(pos, readLen, b, off);
209
+ pos += readLen;
210
+
211
+ int reduce;
212
+
213
+ if (markIsValid()) {
214
+ reduce = mark;
215
+ } else {
216
+ reduce = pos;
217
+ }
218
+
219
+ buffer.truncateFromStart(buffer.size - reduce);
220
+ pos -= reduce;
221
+ mark -= reduce;
222
+ if (mark < 0) mark = -1; // don't wrap mark around?
223
+
224
+ return readLen;
225
+ }
226
+
227
+ /**
228
+ * @see InputStream.reset()
229
+ */
230
+ @Override
231
+ public synchronized void reset() throws IOException {
232
+ ensureOpen();
233
+ if (markIsValid())
234
+ pos = mark;
235
+ }
236
+
237
+ /**
238
+ * @see InputStream.skip()
239
+ */
240
+ @Override
241
+ public synchronized long skip(long n) throws IOException {
242
+ ensureOpen();
243
+ pos += n;
244
+ return n;
245
+ }
246
+
247
+ /*
248
+ *------------------------------------------------------------
249
+ * Data Buffer
250
+ *------------------------------------------------------------
251
+ */
252
+
253
+ public static class Block {
254
+ protected byte[] data;
255
+
256
+ public Block(int size) {
257
+ data = new byte[size];
258
+ }
259
+
260
+ public void copyIn(byte[] src, int srcPos, int destPos, int length) {
261
+ System.arraycopy(src, srcPos, data, destPos, length);
262
+ }
263
+
264
+ public void copyOut(int srcPos, byte[] dest, int destPos, int length) {
265
+ System.arraycopy(data, srcPos, dest, destPos, length);
266
+ }
267
+ }
268
+
269
+ public static class BlockList extends ArrayList<Block> {
270
+ public BlockList() {
271
+ super();
272
+ }
273
+
274
+ @Override
275
+ public void removeRange(int fromIndex, int toIndex) {
276
+ super.removeRange(fromIndex, toIndex);
277
+ }
278
+ }
279
+
280
+ public static class Buffer {
281
+ protected int blockSize;
282
+ protected BlockList blocks;
283
+
284
+ /**
285
+ * Offset (position) to the first logical byte in the buffer.
286
+ */
287
+ protected int offset;
288
+
289
+ /**
290
+ * Logical size of the buffer.
291
+ */
292
+ protected int size;
293
+
294
+ public Buffer(int blockSize) {
295
+ this.blockSize = blockSize;
296
+ this.blocks = new BlockList();
297
+ this.offset = 0;
298
+ this.size = 0;
299
+ }
300
+
301
+ public int size() {
302
+ return size;
303
+ }
304
+
305
+ protected class Segment {
306
+ /**
307
+ * Block index.
308
+ */
309
+ protected int block;
310
+
311
+ /**
312
+ * Offset into the block.
313
+ */
314
+ protected int off;
315
+
316
+ /**
317
+ * Length of segment.
318
+ */
319
+ protected int len;
320
+
321
+ /**
322
+ * Calculate the block number and block offset given a position.
323
+ */
324
+ protected Segment(int pos) {
325
+ int absPos = offset + pos;
326
+ block = (int) (absPos / blockSize);
327
+ off = (int) (absPos % blockSize);
328
+ len = -1;
329
+ }
330
+ }
331
+
332
+ protected Segment[] accessList(int pos, int size) {
333
+ Segment start = new Segment(pos);
334
+ Segment end = new Segment(pos + size);
335
+ int nBlocks = end.block - start.block + 1;
336
+ Segment[] segs = new Segment[nBlocks];
337
+
338
+ start.len = Math.min(size, blockSize - start.off);
339
+ segs[0] = start;
340
+ int currPos = pos + start.len;
341
+ int currSize = start.len;
342
+ for (int i = 1; i < nBlocks; i++) {
343
+ Segment seg = new Segment(currPos);
344
+ seg.len = Math.min(blockSize, size - currSize);
345
+ segs[i] = seg;
346
+ currPos += seg.len;
347
+ currSize += seg.len;
348
+ }
349
+
350
+ return segs;
351
+ }
352
+
353
+ protected void ensureCapacity(int pos) {
354
+ Segment seg = new Segment(pos-1);
355
+
356
+ while (blocks.size() < (seg.block + 1))
357
+ blocks.add(new Block(blockSize));
358
+ }
359
+
360
+ public void put(byte b) {
361
+ byte[] buf = new byte[1];
362
+ buf[0] = b;
363
+ put(buf);
364
+ }
365
+
366
+ public void put(byte[] b) {
367
+ ensureCapacity(size + b.length);
368
+ Segment[] segs = accessList(size, b.length);
369
+
370
+ int off = 0;
371
+ for (int i = 0; i < segs.length; i++) {
372
+ Block block = blocks.get(segs[i].block);
373
+ block.copyIn(b, off, segs[i].off, segs[i].len);
374
+ }
375
+
376
+ size += b.length;
377
+ }
378
+
379
+ public byte[] get(int pos, int len) {
380
+ byte[] b = new byte[len];
381
+ get(pos, len, b, 0);
382
+ return b;
383
+ }
384
+
385
+ /**
386
+ * Throws IndexOutOfBoundsException.
387
+ */
388
+ public void get(int pos, int len, byte[] b, int off) {
389
+ Segment[] segs = accessList(pos, len);
390
+ for (int i = 0; i < segs.length; i++) {
391
+ Block block = blocks.get(segs[i].block);
392
+ block.copyOut(segs[i].off, b, off, segs[i].len);
393
+ }
394
+ }
395
+
396
+ /**
397
+ * Truncate the buffer to <code>newSize</code> by removing
398
+ * data from the start of the buffer.
399
+ */
400
+ public void truncateFromStart(int newSize) {
401
+ if (newSize > size || newSize < 0)
402
+ throw new RuntimeException("invalid size");
403
+
404
+ Segment newStart = new Segment(size - newSize);
405
+ blocks.removeRange(0, newStart.block);
406
+
407
+ size = newSize;
408
+ offset = newStart.off;
409
+ }
410
+ }
411
+ }