coupa-libxml-ruby 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (280) hide show
  1. data/CHANGES +488 -0
  2. data/LICENSE +22 -0
  3. data/README +161 -0
  4. data/Rakefile +188 -0
  5. data/doc/css/normal.css +182 -0
  6. data/doc/img/raze-tiny.png +0 -0
  7. data/doc/img/red-cube.jpg +0 -0
  8. data/doc/img/xml-ruby.png +0 -0
  9. data/doc/index.xml +43 -0
  10. data/doc/install.xml +77 -0
  11. data/doc/layout.rhtml +38 -0
  12. data/doc/layout.xsl +67 -0
  13. data/doc/license.xml +32 -0
  14. data/doc/log/changelog.xml +1324 -0
  15. data/doc/log/changelog.xsl +42 -0
  16. data/doc/rdoc/classes/LibXML.html +207 -0
  17. data/doc/rdoc/classes/LibXML/XML.html +407 -0
  18. data/doc/rdoc/classes/LibXML/XML/Attr.html +521 -0
  19. data/doc/rdoc/classes/LibXML/XML/AttrDecl.html +360 -0
  20. data/doc/rdoc/classes/LibXML/XML/Attributes.html +149 -0
  21. data/doc/rdoc/classes/LibXML/XML/Document.html +460 -0
  22. data/doc/rdoc/classes/LibXML/XML/Error.html +129 -0
  23. data/doc/rdoc/classes/LibXML/XML/HTMLParser.html +263 -0
  24. data/doc/rdoc/classes/LibXML/XML/Namespace.html +248 -0
  25. data/doc/rdoc/classes/LibXML/XML/Namespaces.html +200 -0
  26. data/doc/rdoc/classes/LibXML/XML/Node.html +1351 -0
  27. data/doc/rdoc/classes/LibXML/XML/Parser.html +328 -0
  28. data/doc/rdoc/classes/LibXML/XML/Reader.html +172 -0
  29. data/doc/rdoc/classes/LibXML/XML/SaxParser.html +232 -0
  30. data/doc/rdoc/classes/LibXML/XML/SaxParser/Callbacks.html +506 -0
  31. data/doc/rdoc/classes/LibXML/XML/SaxParser/VerboseCallbacks.html +555 -0
  32. data/doc/rdoc/classes/LibXML/XML/XPath.html +111 -0
  33. data/doc/rdoc/classes/LibXML/XML/XPath/Object.html +162 -0
  34. data/doc/rdoc/classes/cXMLDtd.html +114 -0
  35. data/doc/rdoc/classes/cXMLNode.html +114 -0
  36. data/doc/rdoc/created.rid +1 -0
  37. data/doc/rdoc/files/CHANGES.html +794 -0
  38. data/doc/rdoc/files/LICENSE.html +131 -0
  39. data/doc/rdoc/files/README.html +343 -0
  40. data/doc/rdoc/files/ext/libxml/libxml_c.html +101 -0
  41. data/doc/rdoc/files/ext/libxml/ruby_xml_attr_c.html +101 -0
  42. data/doc/rdoc/files/ext/libxml/ruby_xml_attr_decl_c.html +101 -0
  43. data/doc/rdoc/files/ext/libxml/ruby_xml_attributes_c.html +101 -0
  44. data/doc/rdoc/files/ext/libxml/ruby_xml_c.html +101 -0
  45. data/doc/rdoc/files/ext/libxml/ruby_xml_cbg_c.html +101 -0
  46. data/doc/rdoc/files/ext/libxml/ruby_xml_document_c.html +101 -0
  47. data/doc/rdoc/files/ext/libxml/ruby_xml_dtd_c.html +101 -0
  48. data/doc/rdoc/files/ext/libxml/ruby_xml_encoding_c.html +101 -0
  49. data/doc/rdoc/files/ext/libxml/ruby_xml_error_c.html +101 -0
  50. data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_c.html +101 -0
  51. data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_context_c.html +101 -0
  52. data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_options_c.html +101 -0
  53. data/doc/rdoc/files/ext/libxml/ruby_xml_input_cbg_c.html +101 -0
  54. data/doc/rdoc/files/ext/libxml/ruby_xml_io_c.html +101 -0
  55. data/doc/rdoc/files/ext/libxml/ruby_xml_namespace_c.html +101 -0
  56. data/doc/rdoc/files/ext/libxml/ruby_xml_namespaces_c.html +101 -0
  57. data/doc/rdoc/files/ext/libxml/ruby_xml_node_c.html +101 -0
  58. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_c.html +101 -0
  59. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_context_c.html +101 -0
  60. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_options_c.html +101 -0
  61. data/doc/rdoc/files/ext/libxml/ruby_xml_reader_c.html +101 -0
  62. data/doc/rdoc/files/ext/libxml/ruby_xml_relaxng_c.html +101 -0
  63. data/doc/rdoc/files/ext/libxml/ruby_xml_sax2_handler_c.html +101 -0
  64. data/doc/rdoc/files/ext/libxml/ruby_xml_sax_parser_c.html +101 -0
  65. data/doc/rdoc/files/ext/libxml/ruby_xml_schema_c.html +101 -0
  66. data/doc/rdoc/files/ext/libxml/ruby_xml_xinclude_c.html +101 -0
  67. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_c.html +101 -0
  68. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_context_c.html +101 -0
  69. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_expression_c.html +101 -0
  70. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_object_c.html +101 -0
  71. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_c.html +101 -0
  72. data/doc/rdoc/files/lib/libxml/attr_decl_rb.html +101 -0
  73. data/doc/rdoc/files/lib/libxml/attr_rb.html +101 -0
  74. data/doc/rdoc/files/lib/libxml/attributes_rb.html +101 -0
  75. data/doc/rdoc/files/lib/libxml/document_rb.html +101 -0
  76. data/doc/rdoc/files/lib/libxml/error_rb.html +101 -0
  77. data/doc/rdoc/files/lib/libxml/hpricot_rb.html +192 -0
  78. data/doc/rdoc/files/lib/libxml/html_parser_rb.html +101 -0
  79. data/doc/rdoc/files/lib/libxml/namespace_rb.html +101 -0
  80. data/doc/rdoc/files/lib/libxml/namespaces_rb.html +101 -0
  81. data/doc/rdoc/files/lib/libxml/node_rb.html +108 -0
  82. data/doc/rdoc/files/lib/libxml/ns_rb.html +101 -0
  83. data/doc/rdoc/files/lib/libxml/parser_rb.html +101 -0
  84. data/doc/rdoc/files/lib/libxml/properties_rb.html +101 -0
  85. data/doc/rdoc/files/lib/libxml/reader_rb.html +101 -0
  86. data/doc/rdoc/files/lib/libxml/sax_callbacks_rb.html +101 -0
  87. data/doc/rdoc/files/lib/libxml/sax_parser_rb.html +101 -0
  88. data/doc/rdoc/files/lib/libxml/tree_rb.html +101 -0
  89. data/doc/rdoc/files/lib/libxml/xpath_object_rb.html +101 -0
  90. data/doc/rdoc/files/lib/libxml_rb.html +133 -0
  91. data/doc/rdoc/files/lib/xml/libxml_rb.html +124 -0
  92. data/doc/rdoc/files/lib/xml_rb.html +134 -0
  93. data/doc/rdoc/fr_class_index.html +46 -0
  94. data/doc/rdoc/fr_file_index.html +84 -0
  95. data/doc/rdoc/fr_method_index.html +155 -0
  96. data/doc/rdoc/index.html +24 -0
  97. data/doc/rdoc/rdoc-style.css +208 -0
  98. data/ext/libxml/Makefile +157 -0
  99. data/ext/libxml/build.log +4 -0
  100. data/ext/libxml/extconf.h +5 -0
  101. data/ext/libxml/extconf.rb +278 -0
  102. data/ext/libxml/libxml.c +77 -0
  103. data/ext/libxml/libxml.o +0 -0
  104. data/ext/libxml/libxml_ruby.bundle +0 -0
  105. data/ext/libxml/mkmf.log +228 -0
  106. data/ext/libxml/ruby_libxml.h +93 -0
  107. data/ext/libxml/ruby_xml.c +893 -0
  108. data/ext/libxml/ruby_xml.h +10 -0
  109. data/ext/libxml/ruby_xml.o +0 -0
  110. data/ext/libxml/ruby_xml_attr.c +352 -0
  111. data/ext/libxml/ruby_xml_attr.h +14 -0
  112. data/ext/libxml/ruby_xml_attr.o +0 -0
  113. data/ext/libxml/ruby_xml_attr_decl.c +171 -0
  114. data/ext/libxml/ruby_xml_attr_decl.h +13 -0
  115. data/ext/libxml/ruby_xml_attr_decl.o +0 -0
  116. data/ext/libxml/ruby_xml_attributes.c +277 -0
  117. data/ext/libxml/ruby_xml_attributes.h +17 -0
  118. data/ext/libxml/ruby_xml_attributes.o +0 -0
  119. data/ext/libxml/ruby_xml_cbg.c +86 -0
  120. data/ext/libxml/ruby_xml_cbg.o +0 -0
  121. data/ext/libxml/ruby_xml_document.c +1006 -0
  122. data/ext/libxml/ruby_xml_document.c.old +936 -0
  123. data/ext/libxml/ruby_xml_document.h +17 -0
  124. data/ext/libxml/ruby_xml_document.o +0 -0
  125. data/ext/libxml/ruby_xml_dtd.c +257 -0
  126. data/ext/libxml/ruby_xml_dtd.h +9 -0
  127. data/ext/libxml/ruby_xml_dtd.o +0 -0
  128. data/ext/libxml/ruby_xml_encoding.c +134 -0
  129. data/ext/libxml/ruby_xml_encoding.h +12 -0
  130. data/ext/libxml/ruby_xml_encoding.o +0 -0
  131. data/ext/libxml/ruby_xml_error.c +1004 -0
  132. data/ext/libxml/ruby_xml_error.h +14 -0
  133. data/ext/libxml/ruby_xml_error.o +0 -0
  134. data/ext/libxml/ruby_xml_html_parser.c +92 -0
  135. data/ext/libxml/ruby_xml_html_parser.h +12 -0
  136. data/ext/libxml/ruby_xml_html_parser.o +0 -0
  137. data/ext/libxml/ruby_xml_html_parser_context.c +308 -0
  138. data/ext/libxml/ruby_xml_html_parser_context.h +12 -0
  139. data/ext/libxml/ruby_xml_html_parser_context.o +0 -0
  140. data/ext/libxml/ruby_xml_html_parser_options.c +40 -0
  141. data/ext/libxml/ruby_xml_html_parser_options.h +12 -0
  142. data/ext/libxml/ruby_xml_html_parser_options.o +0 -0
  143. data/ext/libxml/ruby_xml_input_cbg.c +191 -0
  144. data/ext/libxml/ruby_xml_input_cbg.h +20 -0
  145. data/ext/libxml/ruby_xml_input_cbg.o +0 -0
  146. data/ext/libxml/ruby_xml_io.c +30 -0
  147. data/ext/libxml/ruby_xml_io.h +9 -0
  148. data/ext/libxml/ruby_xml_io.o +0 -0
  149. data/ext/libxml/ruby_xml_namespace.c +170 -0
  150. data/ext/libxml/ruby_xml_namespace.h +12 -0
  151. data/ext/libxml/ruby_xml_namespace.o +0 -0
  152. data/ext/libxml/ruby_xml_namespaces.c +295 -0
  153. data/ext/libxml/ruby_xml_namespaces.h +11 -0
  154. data/ext/libxml/ruby_xml_namespaces.o +0 -0
  155. data/ext/libxml/ruby_xml_node.c +1386 -0
  156. data/ext/libxml/ruby_xml_node.h +13 -0
  157. data/ext/libxml/ruby_xml_node.o +0 -0
  158. data/ext/libxml/ruby_xml_parser.c +94 -0
  159. data/ext/libxml/ruby_xml_parser.h +14 -0
  160. data/ext/libxml/ruby_xml_parser.o +0 -0
  161. data/ext/libxml/ruby_xml_parser_context.c +982 -0
  162. data/ext/libxml/ruby_xml_parser_context.h +12 -0
  163. data/ext/libxml/ruby_xml_parser_context.o +0 -0
  164. data/ext/libxml/ruby_xml_parser_options.c +68 -0
  165. data/ext/libxml/ruby_xml_parser_options.h +14 -0
  166. data/ext/libxml/ruby_xml_parser_options.o +0 -0
  167. data/ext/libxml/ruby_xml_reader.c +1002 -0
  168. data/ext/libxml/ruby_xml_reader.h +14 -0
  169. data/ext/libxml/ruby_xml_reader.o +0 -0
  170. data/ext/libxml/ruby_xml_relaxng.c +111 -0
  171. data/ext/libxml/ruby_xml_relaxng.h +10 -0
  172. data/ext/libxml/ruby_xml_relaxng.o +0 -0
  173. data/ext/libxml/ruby_xml_sax2_handler.c +322 -0
  174. data/ext/libxml/ruby_xml_sax2_handler.h +12 -0
  175. data/ext/libxml/ruby_xml_sax2_handler.o +0 -0
  176. data/ext/libxml/ruby_xml_sax_parser.c +137 -0
  177. data/ext/libxml/ruby_xml_sax_parser.h +12 -0
  178. data/ext/libxml/ruby_xml_sax_parser.o +0 -0
  179. data/ext/libxml/ruby_xml_schema.c +159 -0
  180. data/ext/libxml/ruby_xml_schema.h +11 -0
  181. data/ext/libxml/ruby_xml_schema.o +0 -0
  182. data/ext/libxml/ruby_xml_version.h +9 -0
  183. data/ext/libxml/ruby_xml_xinclude.c +18 -0
  184. data/ext/libxml/ruby_xml_xinclude.h +13 -0
  185. data/ext/libxml/ruby_xml_xinclude.o +0 -0
  186. data/ext/libxml/ruby_xml_xpath.c +107 -0
  187. data/ext/libxml/ruby_xml_xpath.h +12 -0
  188. data/ext/libxml/ruby_xml_xpath.o +0 -0
  189. data/ext/libxml/ruby_xml_xpath_context.c +387 -0
  190. data/ext/libxml/ruby_xml_xpath_context.h +11 -0
  191. data/ext/libxml/ruby_xml_xpath_context.o +0 -0
  192. data/ext/libxml/ruby_xml_xpath_expression.c +83 -0
  193. data/ext/libxml/ruby_xml_xpath_expression.h +12 -0
  194. data/ext/libxml/ruby_xml_xpath_expression.o +0 -0
  195. data/ext/libxml/ruby_xml_xpath_object.c +336 -0
  196. data/ext/libxml/ruby_xml_xpath_object.h +19 -0
  197. data/ext/libxml/ruby_xml_xpath_object.o +0 -0
  198. data/ext/libxml/ruby_xml_xpointer.c +101 -0
  199. data/ext/libxml/ruby_xml_xpointer.h +13 -0
  200. data/ext/libxml/ruby_xml_xpointer.o +0 -0
  201. data/ext/mingw/Rakefile +34 -0
  202. data/ext/mingw/build.rake +41 -0
  203. data/ext/vc/libxml_ruby.sln +26 -0
  204. data/lib/libxml.rb +30 -0
  205. data/lib/libxml/attr.rb +111 -0
  206. data/lib/libxml/attr_decl.rb +78 -0
  207. data/lib/libxml/attributes.rb +12 -0
  208. data/lib/libxml/document.rb +190 -0
  209. data/lib/libxml/error.rb +88 -0
  210. data/lib/libxml/hpricot.rb +76 -0
  211. data/lib/libxml/html_parser.rb +94 -0
  212. data/lib/libxml/namespace.rb +60 -0
  213. data/lib/libxml/namespaces.rb +36 -0
  214. data/lib/libxml/node.rb +385 -0
  215. data/lib/libxml/ns.rb +20 -0
  216. data/lib/libxml/parser.rb +365 -0
  217. data/lib/libxml/properties.rb +21 -0
  218. data/lib/libxml/reader.rb +27 -0
  219. data/lib/libxml/sax_callbacks.rb +178 -0
  220. data/lib/libxml/sax_parser.rb +56 -0
  221. data/lib/libxml/tree.rb +27 -0
  222. data/lib/libxml/xpath_object.rb +14 -0
  223. data/lib/xml.rb +14 -0
  224. data/lib/xml/libxml.rb +8 -0
  225. data/setup.rb +1585 -0
  226. data/test/etc_doc_to_s.rb +19 -0
  227. data/test/ets_doc_file.rb +15 -0
  228. data/test/ets_doc_to_s.rb +21 -0
  229. data/test/ets_gpx.rb +26 -0
  230. data/test/ets_node_gc.rb +21 -0
  231. data/test/ets_test.xml +2 -0
  232. data/test/ets_tsr.rb +9 -0
  233. data/test/model/atom.xml +13 -0
  234. data/test/model/bands.xml +5 -0
  235. data/test/model/books.xml +146 -0
  236. data/test/model/merge_bug_data.xml +58 -0
  237. data/test/model/ruby-lang.html +238 -0
  238. data/test/model/rubynet.xml +79 -0
  239. data/test/model/rubynet_project +1 -0
  240. data/test/model/shiporder.rnc +28 -0
  241. data/test/model/shiporder.rng +86 -0
  242. data/test/model/shiporder.xml +23 -0
  243. data/test/model/shiporder.xsd +31 -0
  244. data/test/model/soap.xml +27 -0
  245. data/test/model/xinclude.xml +5 -0
  246. data/test/tc_attr.rb +170 -0
  247. data/test/tc_attr_decl.rb +131 -0
  248. data/test/tc_attributes.rb +133 -0
  249. data/test/tc_deprecated_require.rb +11 -0
  250. data/test/tc_document.rb +113 -0
  251. data/test/tc_document_write.rb +118 -0
  252. data/test/tc_dtd.rb +123 -0
  253. data/test/tc_error.rb +136 -0
  254. data/test/tc_html_parser.rb +138 -0
  255. data/test/tc_namespace.rb +59 -0
  256. data/test/tc_namespaces.rb +174 -0
  257. data/test/tc_node.rb +181 -0
  258. data/test/tc_node_cdata.rb +49 -0
  259. data/test/tc_node_comment.rb +30 -0
  260. data/test/tc_node_copy.rb +40 -0
  261. data/test/tc_node_edit.rb +158 -0
  262. data/test/tc_node_text.rb +69 -0
  263. data/test/tc_node_write.rb +83 -0
  264. data/test/tc_node_xlink.rb +26 -0
  265. data/test/tc_parser.rb +330 -0
  266. data/test/tc_parser_context.rb +186 -0
  267. data/test/tc_properties.rb +36 -0
  268. data/test/tc_reader.rb +284 -0
  269. data/test/tc_relaxng.rb +51 -0
  270. data/test/tc_sax_parser.rb +274 -0
  271. data/test/tc_schema.rb +51 -0
  272. data/test/tc_traversal.rb +220 -0
  273. data/test/tc_xinclude.rb +19 -0
  274. data/test/tc_xml.rb +224 -0
  275. data/test/tc_xpath.rb +193 -0
  276. data/test/tc_xpath_context.rb +78 -0
  277. data/test/tc_xpath_expression.rb +35 -0
  278. data/test/tc_xpointer.rb +72 -0
  279. data/test/test_suite.rb +33 -0
  280. metadata +376 -0
@@ -0,0 +1,14 @@
1
+ /* $Id: rxml_ns.h 324 2008-07-08 23:00:02Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RXML_ERROR__
6
+ #define __RXML_ERROR__
7
+
8
+ extern VALUE eXMLError;
9
+
10
+ void rxml_init_error();
11
+ VALUE rxml_error_wrap(xmlErrorPtr xerror);
12
+ void rxml_raise(xmlErrorPtr xerror);
13
+
14
+ #endif
@@ -0,0 +1,92 @@
1
+ /* $Id: ruby_xml_html_parser.c 758 2009-01-25 20:36:03Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+
7
+ /* Document-class: LibXML::XML::HTMLParser
8
+ *
9
+ * The HTML parser implements an HTML 4.0 non-verifying parser with an API
10
+ * compatible with the XML::Parser. In contrast with the XML::Parser,
11
+ * it can parse "real world" HTML, even if it severely broken from a
12
+ * specification point of view.
13
+ *
14
+ * The HTML parser creates an in-memory document object
15
+ * that consist of any number of XML::Node instances. This is simple
16
+ * and powerful model, but has the major limitation that the size of
17
+ * the document that can be processed is limited by the amount of
18
+ * memory available.
19
+ *
20
+ * Using the html parser is simple:
21
+ *
22
+ * parser = XML::HTMLParser.file('my_file')
23
+ * doc = parser.parse
24
+ *
25
+ * You can also parse documents (see XML::HTMLParser.document),
26
+ * strings (see XML::HTMLParser.string) and io objects (see
27
+ * XML::HTMLParser.io).
28
+ */
29
+
30
+ VALUE cXMLHtmlParser;
31
+ static ID CONTEXT_ATTR;
32
+
33
+
34
+ /* call-seq:
35
+ * XML::HTMLParser.initialize -> parser
36
+ *
37
+ * Initializes a new parser instance with no pre-determined source.
38
+ */
39
+ static VALUE rxml_html_parser_initialize(int argc, VALUE *argv, VALUE self)
40
+ {
41
+ VALUE context = Qnil;
42
+
43
+ rb_scan_args(argc, argv, "01", &context);
44
+
45
+ if (context == Qnil)
46
+ {
47
+ rb_warn("Passing no parameters to XML::HTMLParser.new is deprecated. Pass an instance of XML::Parser::Context instead.");
48
+ context = rb_class_new_instance(0, NULL, cXMLParserContext);
49
+ }
50
+
51
+ rb_ivar_set(self, CONTEXT_ATTR, context);
52
+ return self;
53
+ }
54
+
55
+ /*
56
+ * call-seq:
57
+ * parser.parse -> XML::Document
58
+ *
59
+ * Parse the input XML and create an XML::Document with
60
+ * it's content. If an error occurs, XML::Parser::ParseError
61
+ * is thrown.
62
+ */
63
+ static VALUE rxml_html_parser_parse(VALUE self)
64
+ {
65
+ xmlParserCtxtPtr ctxt;
66
+ VALUE context = rb_ivar_get(self, CONTEXT_ATTR);
67
+
68
+ Data_Get_Struct(context, xmlParserCtxt, ctxt);
69
+
70
+ if (htmlParseDocument(ctxt) == -1 && ! ctxt->recovery)
71
+ {
72
+ if (ctxt->myDoc)
73
+ xmlFreeDoc(ctxt->myDoc);
74
+ rxml_raise(&ctxt->lastError);
75
+ }
76
+
77
+ return rxml_document_wrap(ctxt->myDoc);
78
+ }
79
+
80
+ void rxml_init_html_parser(void)
81
+ {
82
+ CONTEXT_ATTR = rb_intern("@context");
83
+
84
+ cXMLHtmlParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
85
+
86
+ /* Atributes */
87
+ rb_define_attr(cXMLHtmlParser, "input", 1, 0);
88
+
89
+ /* Instance methods */
90
+ rb_define_method(cXMLHtmlParser, "initialize", rxml_html_parser_initialize, -1);
91
+ rb_define_method(cXMLHtmlParser, "parse", rxml_html_parser_parse, 0);
92
+ }
@@ -0,0 +1,12 @@
1
+ /* $Id: ruby_xml_html_parser.h 758 2009-01-25 20:36:03Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RXML_HTML_PARSER__
6
+ #define __RXML_HTML_PARSER__
7
+
8
+ extern VALUE cXMLHtmlParser;
9
+
10
+ void rxml_init_html_parser(void);
11
+
12
+ #endif
@@ -0,0 +1,308 @@
1
+ /* $Id: ruby_xml_parser_context.c 710 2009-01-20 05:30:51Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+ #include "ruby_xml_html_parser_context.h"
7
+
8
+
9
+ /*
10
+ * Document-class: LibXML::XML::HTMLParser::Context
11
+ *
12
+ * The XML::HTMLParser::Context class provides in-depth control over how
13
+ * a document is parsed.
14
+ */
15
+
16
+ VALUE cXMLHtmlParserContext;
17
+ static ID IO_ATTR;
18
+
19
+ /* OS X 10.5 ships with libxml2 version 2.6.16 which does not expose the
20
+ htmlNewParserCtxt (or htmlInitParserCtxt which it uses) method. htmlNewParserCtxt
21
+ wasn't added to the libxml2 header files until 2.6.27. So the next two
22
+ methods are simply copied from a newer version of libxml2 (2.7.2). */
23
+ #if LIBXML_VERSION < 20627
24
+ #define XML_CTXT_FINISH_DTD_0 0xabcd1234
25
+ static int htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
26
+ {
27
+ htmlSAXHandler *sax;
28
+ if (ctxt == NULL) return(-1);
29
+
30
+ memset(ctxt, 0, sizeof(htmlParserCtxt));
31
+ ctxt->dict = xmlDictCreate();
32
+ if (ctxt->dict == NULL) {
33
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
34
+ return(-1);
35
+ }
36
+ sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
37
+ if (sax == NULL) {
38
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
39
+ return(-1);
40
+ }
41
+ else
42
+ memset(sax, 0, sizeof(htmlSAXHandler));
43
+
44
+ ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr));
45
+ if (ctxt->inputTab == NULL) {
46
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
47
+ ctxt->inputNr = 0;
48
+ ctxt->inputMax = 0;
49
+ ctxt->input = NULL;
50
+ return(-1);
51
+ }
52
+ ctxt->inputNr = 0;
53
+ ctxt->inputMax = 5;
54
+ ctxt->input = NULL;
55
+ ctxt->version = NULL;
56
+ ctxt->encoding = NULL;
57
+ ctxt->standalone = -1;
58
+ ctxt->instate = XML_PARSER_START;
59
+
60
+ ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
61
+ if (ctxt->nodeTab == NULL) {
62
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
63
+ ctxt->nodeNr = 0;
64
+ ctxt->nodeMax = 0;
65
+ ctxt->node = NULL;
66
+ ctxt->inputNr = 0;
67
+ ctxt->inputMax = 0;
68
+ ctxt->input = NULL;
69
+ return(-1);
70
+ }
71
+ ctxt->nodeNr = 0;
72
+ ctxt->nodeMax = 10;
73
+ ctxt->node = NULL;
74
+
75
+ ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
76
+ if (ctxt->nameTab == NULL) {
77
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
78
+ ctxt->nameNr = 0;
79
+ ctxt->nameMax = 10;
80
+ ctxt->name = NULL;
81
+ ctxt->nodeNr = 0;
82
+ ctxt->nodeMax = 0;
83
+ ctxt->node = NULL;
84
+ ctxt->inputNr = 0;
85
+ ctxt->inputMax = 0;
86
+ ctxt->input = NULL;
87
+ return(-1);
88
+ }
89
+ ctxt->nameNr = 0;
90
+ ctxt->nameMax = 10;
91
+ ctxt->name = NULL;
92
+
93
+ if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
94
+ else {
95
+ ctxt->sax = sax;
96
+ memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
97
+ }
98
+ ctxt->userData = ctxt;
99
+ ctxt->myDoc = NULL;
100
+ ctxt->wellFormed = 1;
101
+ ctxt->replaceEntities = 0;
102
+ ctxt->linenumbers = xmlLineNumbersDefaultValue;
103
+ ctxt->html = 1;
104
+ ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
105
+ ctxt->vctxt.userData = ctxt;
106
+ ctxt->vctxt.error = xmlParserValidityError;
107
+ ctxt->vctxt.warning = xmlParserValidityWarning;
108
+ ctxt->record_info = 0;
109
+ ctxt->validate = 0;
110
+ ctxt->nbChars = 0;
111
+ ctxt->checkIndex = 0;
112
+ ctxt->catalogs = NULL;
113
+ xmlInitNodeInfoSeq(&ctxt->node_seq);
114
+ return(0);
115
+ }
116
+
117
+ static htmlParserCtxtPtr htmlNewParserCtxt(void)
118
+ {
119
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
120
+ if (ctxt == NULL) {
121
+ rb_raise(rb_eNoMemError, "NewParserCtxt: out of memory\n");
122
+ return(NULL);
123
+ }
124
+ memset(ctxt, 0, sizeof(xmlParserCtxt));
125
+ if (htmlInitParserCtxt(ctxt) < 0) {
126
+ htmlFreeParserCtxt(ctxt);
127
+ return(NULL);
128
+ }
129
+ return(ctxt);
130
+ }
131
+ #endif
132
+
133
+ static void rxml_html_parser_context_free(htmlParserCtxtPtr ctxt)
134
+ {
135
+ htmlFreeParserCtxt(ctxt);
136
+ }
137
+
138
+ static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt)
139
+ {
140
+ return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
141
+ }
142
+
143
+ /* call-seq:
144
+ * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
145
+ *
146
+ * Creates a new parser context based on the specified file or uri.
147
+ *
148
+ * Parameters:
149
+ *
150
+ * file - A filename or uri.
151
+ */
152
+ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
153
+ {
154
+ htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
155
+ if (!ctxt)
156
+ rxml_raise(&xmlLastError);
157
+
158
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
159
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
160
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
161
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
162
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
163
+
164
+ return rxml_html_parser_context_wrap(ctxt);
165
+ }
166
+
167
+ /* call-seq:
168
+ * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
169
+ *
170
+ * Creates a new parser context based on the specified io object.
171
+ *
172
+ * Parameters:
173
+ *
174
+ * io - A ruby IO object.
175
+ */
176
+ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
177
+ {
178
+ VALUE result;
179
+ htmlParserCtxtPtr ctxt;
180
+ xmlParserInputBufferPtr input;
181
+ xmlParserInputPtr stream;
182
+
183
+ if (NIL_P(io))
184
+ rb_raise(rb_eTypeError, "Must pass in an IO object");
185
+
186
+ input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
187
+ (void*)io, XML_CHAR_ENCODING_NONE);
188
+
189
+ ctxt = htmlNewParserCtxt();
190
+ if (!ctxt)
191
+ {
192
+ xmlFreeParserInputBuffer(input);
193
+ rxml_raise(&xmlLastError);
194
+ }
195
+
196
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
197
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
198
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
199
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
200
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
201
+
202
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
203
+
204
+ if (!stream)
205
+ {
206
+ xmlFreeParserInputBuffer(input);
207
+ xmlFreeParserCtxt(ctxt);
208
+ rxml_raise(&xmlLastError);
209
+ }
210
+ inputPush(ctxt, stream);
211
+ result = rxml_html_parser_context_wrap(ctxt);
212
+
213
+ /* Attach io object to parser so it won't get freed.*/
214
+ rb_ivar_set(result, IO_ATTR, io);
215
+
216
+ return result;
217
+ }
218
+
219
+ /* call-seq:
220
+ * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
221
+ *
222
+ * Creates a new parser context based on the specified string.
223
+ *
224
+ * Parameters:
225
+ *
226
+ * string - A string that contains the data to parse.
227
+ */
228
+ static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
229
+ {
230
+ htmlParserCtxtPtr ctxt;
231
+ Check_Type(string, T_STRING);
232
+
233
+ if (RSTRING_LEN(string) == 0)
234
+ rb_raise(rb_eArgError, "Must specify a string with one or more characters");
235
+
236
+ ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
237
+ RSTRING_LEN(string));
238
+ if (!ctxt)
239
+ rxml_raise(&xmlLastError);
240
+
241
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
242
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
243
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
244
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
245
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
246
+
247
+ htmlDefaultSAXHandlerInit();
248
+ if (ctxt->sax != NULL)
249
+ memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
250
+
251
+ return rxml_html_parser_context_wrap(ctxt);
252
+ }
253
+
254
+ /*
255
+ * call-seq:
256
+ * context.disable_cdata = (true|false)
257
+ *
258
+ * Control whether the CDATA nodes will be created in this context.
259
+ */
260
+ static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE bool)
261
+ {
262
+ htmlParserCtxtPtr ctxt;
263
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
264
+
265
+ if (ctxt->sax == NULL)
266
+ rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
267
+
268
+ /* LibXML controls this internally with the default SAX handler. */
269
+ if (bool)
270
+ ctxt->sax->cdataBlock = NULL;
271
+ else
272
+ ctxt->sax->cdataBlock = htmlDefaultSAXHandler.cdataBlock;
273
+
274
+ return bool;
275
+ }
276
+
277
+ /*
278
+ * call-seq:
279
+ * context.options = XML::Parser::Options::NOENT |
280
+ XML::Parser::Options::NOCDATA
281
+ *
282
+ * Provides control over the execution of a parser. Valid values
283
+ * are the constants defined on XML::Parser::Options. Multiple
284
+ * options can be combined by using Bitwise OR (|).
285
+ */
286
+ static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
287
+ {
288
+ int result;
289
+ htmlParserCtxtPtr ctxt;
290
+ Check_Type(options, T_FIXNUM);
291
+
292
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
293
+ result = htmlCtxtUseOptions(ctxt, NUM2INT(options));
294
+
295
+ return self;
296
+ }
297
+
298
+ void rxml_init_html_parser_context(void)
299
+ {
300
+ IO_ATTR = ID2SYM(rb_intern("@io"));
301
+ cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
302
+
303
+ rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
304
+ rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
305
+ rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
306
+ rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
307
+ rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
308
+ }
@@ -0,0 +1,12 @@
1
+ /* $Id: ruby_xml_parser_context.h 666 2008-12-07 00:16:50Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RXML_HTML_PARSER_CONTEXT__
6
+ #define __RXML_HTML_PARSER_CONTEXT__
7
+
8
+ extern VALUE cXMLHtmlParserContext;
9
+
10
+ void rxml_init_html_parser_context(void);
11
+
12
+ #endif