libxml-ruby 2.8.0 → 3.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY +842 -775
  3. data/LICENSE +20 -20
  4. data/MANIFEST +166 -166
  5. data/README.rdoc +217 -184
  6. data/Rakefile +90 -78
  7. data/ext/libxml/extconf.h +3 -0
  8. data/ext/libxml/extconf.rb +61 -116
  9. data/ext/libxml/libxml.c +80 -76
  10. data/ext/libxml/ruby_libxml.h +67 -75
  11. data/ext/libxml/ruby_xml.c +933 -893
  12. data/ext/libxml/ruby_xml.h +10 -10
  13. data/ext/libxml/ruby_xml_attr.c +333 -333
  14. data/ext/libxml/ruby_xml_attr.h +12 -12
  15. data/ext/libxml/ruby_xml_attr_decl.c +153 -153
  16. data/ext/libxml/ruby_xml_attr_decl.h +11 -11
  17. data/ext/libxml/ruby_xml_attributes.c +275 -275
  18. data/ext/libxml/ruby_xml_attributes.h +15 -15
  19. data/ext/libxml/ruby_xml_cbg.c +85 -85
  20. data/ext/libxml/ruby_xml_document.c +1123 -1147
  21. data/ext/libxml/ruby_xml_document.h +11 -11
  22. data/ext/libxml/ruby_xml_dtd.c +248 -268
  23. data/ext/libxml/ruby_xml_dtd.h +9 -9
  24. data/ext/libxml/ruby_xml_encoding.c +250 -260
  25. data/ext/libxml/ruby_xml_encoding.h +16 -19
  26. data/ext/libxml/ruby_xml_error.c +996 -996
  27. data/ext/libxml/ruby_xml_error.h +12 -12
  28. data/ext/libxml/ruby_xml_html_parser.c +89 -92
  29. data/ext/libxml/ruby_xml_html_parser.h +10 -10
  30. data/ext/libxml/ruby_xml_html_parser_context.c +337 -338
  31. data/ext/libxml/ruby_xml_html_parser_context.h +10 -10
  32. data/ext/libxml/ruby_xml_html_parser_options.c +46 -46
  33. data/ext/libxml/ruby_xml_html_parser_options.h +10 -10
  34. data/ext/libxml/ruby_xml_input_cbg.c +191 -191
  35. data/ext/libxml/ruby_xml_input_cbg.h +20 -20
  36. data/ext/libxml/ruby_xml_io.c +47 -50
  37. data/ext/libxml/ruby_xml_io.h +10 -10
  38. data/ext/libxml/ruby_xml_namespace.c +153 -153
  39. data/ext/libxml/ruby_xml_namespace.h +10 -10
  40. data/ext/libxml/ruby_xml_namespaces.c +293 -293
  41. data/ext/libxml/ruby_xml_namespaces.h +9 -9
  42. data/ext/libxml/ruby_xml_node.c +1402 -1452
  43. data/ext/libxml/ruby_xml_node.h +13 -11
  44. data/ext/libxml/ruby_xml_parser.c +91 -94
  45. data/ext/libxml/ruby_xml_parser.h +12 -12
  46. data/ext/libxml/ruby_xml_parser_context.c +999 -1001
  47. data/ext/libxml/ruby_xml_parser_context.h +10 -10
  48. data/ext/libxml/ruby_xml_parser_options.c +66 -66
  49. data/ext/libxml/ruby_xml_parser_options.h +12 -12
  50. data/ext/libxml/ruby_xml_reader.c +1239 -1228
  51. data/ext/libxml/ruby_xml_reader.h +17 -17
  52. data/ext/libxml/ruby_xml_relaxng.c +110 -111
  53. data/ext/libxml/ruby_xml_relaxng.h +10 -10
  54. data/ext/libxml/ruby_xml_sax2_handler.c +326 -328
  55. data/ext/libxml/ruby_xml_sax2_handler.h +10 -10
  56. data/ext/libxml/ruby_xml_sax_parser.c +116 -120
  57. data/ext/libxml/ruby_xml_sax_parser.h +10 -10
  58. data/ext/libxml/ruby_xml_schema.c +278 -301
  59. data/ext/libxml/ruby_xml_schema.h +809 -809
  60. data/ext/libxml/ruby_xml_schema_attribute.c +109 -109
  61. data/ext/libxml/ruby_xml_schema_attribute.h +15 -15
  62. data/ext/libxml/ruby_xml_schema_element.c +95 -94
  63. data/ext/libxml/ruby_xml_schema_element.h +14 -14
  64. data/ext/libxml/ruby_xml_schema_facet.c +52 -52
  65. data/ext/libxml/ruby_xml_schema_facet.h +13 -13
  66. data/ext/libxml/ruby_xml_schema_type.c +232 -259
  67. data/ext/libxml/ruby_xml_schema_type.h +9 -9
  68. data/ext/libxml/ruby_xml_version.h +9 -9
  69. data/ext/libxml/ruby_xml_writer.c +1133 -1137
  70. data/ext/libxml/ruby_xml_writer.h +10 -10
  71. data/ext/libxml/ruby_xml_xinclude.c +16 -16
  72. data/ext/libxml/ruby_xml_xinclude.h +11 -11
  73. data/ext/libxml/ruby_xml_xpath.c +194 -188
  74. data/ext/libxml/ruby_xml_xpath.h +13 -13
  75. data/ext/libxml/ruby_xml_xpath_context.c +360 -361
  76. data/ext/libxml/ruby_xml_xpath_context.h +9 -9
  77. data/ext/libxml/ruby_xml_xpath_expression.c +81 -81
  78. data/ext/libxml/ruby_xml_xpath_expression.h +10 -10
  79. data/ext/libxml/ruby_xml_xpath_object.c +338 -335
  80. data/ext/libxml/ruby_xml_xpath_object.h +17 -17
  81. data/ext/libxml/ruby_xml_xpointer.c +99 -99
  82. data/ext/libxml/ruby_xml_xpointer.h +11 -11
  83. data/ext/vc/libxml_ruby.sln +17 -15
  84. data/lib/libxml-ruby.rb +30 -0
  85. data/lib/libxml.rb +3 -33
  86. data/lib/libxml/node.rb +2 -78
  87. data/lib/libxml/parser.rb +0 -266
  88. data/lib/libxml/sax_parser.rb +0 -17
  89. data/lib/libxml/schema.rb +47 -66
  90. data/lib/libxml/schema/attribute.rb +19 -19
  91. data/lib/libxml/schema/element.rb +19 -27
  92. data/lib/libxml/schema/type.rb +21 -29
  93. data/libxml-ruby.gemspec +48 -44
  94. data/script/benchmark/depixelate +634 -634
  95. data/script/benchmark/hamlet.xml +9054 -9054
  96. data/script/benchmark/parsecount +170 -170
  97. data/script/benchmark/throughput +41 -41
  98. data/script/test +6 -6
  99. data/setup.rb +0 -1
  100. data/test/c14n/given/example-1.xml +14 -14
  101. data/test/c14n/given/example-2.xml +11 -11
  102. data/test/c14n/given/example-3.xml +18 -18
  103. data/test/c14n/given/example-4.xml +9 -9
  104. data/test/c14n/given/example-5.xml +12 -12
  105. data/test/c14n/given/example-6.xml +2 -2
  106. data/test/c14n/given/example-7.xml +11 -11
  107. data/test/c14n/given/example-8.xml +11 -11
  108. data/test/c14n/given/example-8.xpath +9 -9
  109. data/test/c14n/result/1-1-without-comments/example-1 +3 -3
  110. data/test/c14n/result/1-1-without-comments/example-2 +10 -10
  111. data/test/c14n/result/1-1-without-comments/example-3 +13 -13
  112. data/test/c14n/result/1-1-without-comments/example-4 +8 -8
  113. data/test/c14n/result/1-1-without-comments/example-5 +2 -2
  114. data/test/c14n/result/with-comments/example-1 +5 -5
  115. data/test/c14n/result/with-comments/example-2 +10 -10
  116. data/test/c14n/result/with-comments/example-3 +13 -13
  117. data/test/c14n/result/with-comments/example-4 +8 -8
  118. data/test/c14n/result/with-comments/example-5 +3 -3
  119. data/test/c14n/result/without-comments/example-1 +3 -3
  120. data/test/c14n/result/without-comments/example-2 +10 -10
  121. data/test/c14n/result/without-comments/example-3 +13 -13
  122. data/test/c14n/result/without-comments/example-4 +8 -8
  123. data/test/c14n/result/without-comments/example-5 +2 -2
  124. data/test/model/atom.xml +12 -12
  125. data/test/model/bands.iso-8859-1.xml +4 -4
  126. data/test/model/bands.utf-8.xml +4 -4
  127. data/test/model/bands.xml +4 -4
  128. data/test/model/books.xml +153 -153
  129. data/test/model/cwm_1_0.xml +11336 -0
  130. data/test/model/merge_bug_data.xml +58 -58
  131. data/test/model/ruby-lang.html +238 -238
  132. data/test/model/rubynet.xml +79 -79
  133. data/test/model/shiporder.rnc +28 -28
  134. data/test/model/shiporder.rng +86 -86
  135. data/test/model/shiporder.xml +22 -22
  136. data/test/model/shiporder.xsd +39 -39
  137. data/test/model/soap.xml +27 -27
  138. data/test/model/xinclude.xml +4 -4
  139. data/test/test.xml +2 -0
  140. data/test/{tc_attr.rb → test_attr.rb} +23 -25
  141. data/test/{tc_attr_decl.rb → test_attr_decl.rb} +13 -14
  142. data/test/{tc_attributes.rb → test_attributes.rb} +11 -18
  143. data/test/{tc_canonicalize.rb → test_canonicalize.rb} +36 -41
  144. data/test/test_deprecated_require.rb +12 -0
  145. data/test/{tc_document.rb → test_document.rb} +32 -27
  146. data/test/test_document_write.rb +146 -0
  147. data/test/{tc_dtd.rb → test_dtd.rb} +28 -29
  148. data/test/{tc_encoding.rb → test_encoding.rb} +129 -126
  149. data/test/{tc_encoding_sax.rb → test_encoding_sax.rb} +7 -6
  150. data/test/test_error.rb +178 -0
  151. data/test/test_helper.rb +3 -10
  152. data/test/test_html_parser.rb +162 -0
  153. data/test/test_html_parser_context.rb +23 -0
  154. data/test/test_namespace.rb +60 -0
  155. data/test/{tc_namespaces.rb → test_namespaces.rb} +34 -44
  156. data/test/{tc_node.rb → test_node.rb} +68 -47
  157. data/test/{tc_node_cdata.rb → test_node_cdata.rb} +12 -13
  158. data/test/{tc_node_comment.rb → test_node_comment.rb} +7 -8
  159. data/test/{tc_node_copy.rb → test_node_copy.rb} +4 -6
  160. data/test/{tc_node_edit.rb → test_node_edit.rb} +23 -41
  161. data/test/{tc_node_pi.rb → test_node_pi.rb} +37 -40
  162. data/test/{tc_node_text.rb → test_node_text.rb} +10 -12
  163. data/test/{tc_node_write.rb → test_node_write.rb} +18 -29
  164. data/test/test_node_xlink.rb +28 -0
  165. data/test/test_parser.rb +324 -0
  166. data/test/{tc_parser_context.rb → test_parser_context.rb} +41 -42
  167. data/test/{tc_properties.rb → test_properties.rb} +6 -7
  168. data/test/test_reader.rb +363 -0
  169. data/test/test_relaxng.rb +53 -0
  170. data/test/{tc_sax_parser.rb → test_sax_parser.rb} +36 -37
  171. data/test/{tc_schema.rb → test_schema.rb} +43 -37
  172. data/test/test_suite.rb +38 -40
  173. data/test/{tc_traversal.rb → test_traversal.rb} +5 -6
  174. data/test/{tc_writer.rb → test_writer.rb} +468 -448
  175. data/test/{tc_xinclude.rb → test_xinclude.rb} +4 -5
  176. data/test/test_xml.rb +262 -0
  177. data/test/{tc_xpath.rb → test_xpath.rb} +31 -32
  178. data/test/{tc_xpath_context.rb → test_xpath_context.rb} +8 -9
  179. data/test/test_xpath_expression.rb +37 -0
  180. data/test/{tc_xpointer.rb → test_xpointer.rb} +16 -18
  181. metadata +117 -95
  182. data/lib/libxml/ns.rb +0 -22
  183. data/lib/libxml/properties.rb +0 -23
  184. data/lib/libxml/reader.rb +0 -29
  185. data/lib/libxml/xpath_object.rb +0 -16
  186. data/test/etc_doc_to_s.rb +0 -21
  187. data/test/ets_doc_file.rb +0 -17
  188. data/test/ets_doc_to_s.rb +0 -23
  189. data/test/ets_gpx.rb +0 -28
  190. data/test/ets_node_gc.rb +0 -23
  191. data/test/ets_test.xml +0 -2
  192. data/test/ets_tsr.rb +0 -11
  193. data/test/tc_deprecated_require.rb +0 -13
  194. data/test/tc_document_write.rb +0 -196
  195. data/test/tc_error.rb +0 -180
  196. data/test/tc_html_parser.rb +0 -153
  197. data/test/tc_html_parser_context.rb +0 -24
  198. data/test/tc_namespace.rb +0 -62
  199. data/test/tc_node_xlink.rb +0 -29
  200. data/test/tc_parser.rb +0 -381
  201. data/test/tc_reader.rb +0 -400
  202. data/test/tc_relaxng.rb +0 -54
  203. data/test/tc_xml.rb +0 -226
  204. data/test/tc_xpath_expression.rb +0 -38
@@ -1,12 +1,12 @@
1
- /* Please see the LICENSE file for copyright and distribution information */
2
-
3
- #ifndef __RXML_ERROR__
4
- #define __RXML_ERROR__
5
-
6
- extern VALUE eXMLError;
7
-
8
- void rxml_init_error();
9
- VALUE rxml_error_wrap(xmlErrorPtr xerror);
10
- void rxml_raise(xmlErrorPtr xerror);
11
-
12
- #endif
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #ifndef __RXML_ERROR__
4
+ #define __RXML_ERROR__
5
+
6
+ extern VALUE eXMLError;
7
+
8
+ void rxml_init_error();
9
+ VALUE rxml_error_wrap(xmlErrorPtr xerror);
10
+ NORETURN(void rxml_raise(xmlErrorPtr xerror));
11
+
12
+ #endif
@@ -1,92 +1,89 @@
1
- /* Please see the LICENSE file for copyright and distribution information */
2
-
3
- #include "ruby_libxml.h"
4
-
5
- /* Document-class: LibXML::XML::HTMLParser
6
- *
7
- * The HTML parser implements an HTML 4.0 non-verifying parser with an API
8
- * compatible with the XML::Parser. In contrast with the XML::Parser,
9
- * it can parse "real world" HTML, even if it severely broken from a
10
- * specification point of view.
11
- *
12
- * The HTML parser creates an in-memory document object
13
- * that consist of any number of XML::Node instances. This is simple
14
- * and powerful model, but has the major limitation that the size of
15
- * the document that can be processed is limited by the amount of
16
- * memory available.
17
- *
18
- * Using the html parser is simple:
19
- *
20
- * parser = XML::HTMLParser.file('my_file')
21
- * doc = parser.parse
22
- *
23
- * You can also parse documents (see XML::HTMLParser.document),
24
- * strings (see XML::HTMLParser.string) and io objects (see
25
- * XML::HTMLParser.io).
26
- */
27
-
28
- VALUE cXMLHtmlParser;
29
- static ID CONTEXT_ATTR;
30
-
31
-
32
- /* call-seq:
33
- * XML::HTMLParser.initialize -> parser
34
- *
35
- * Initializes a new parser instance with no pre-determined source.
36
- */
37
- static VALUE rxml_html_parser_initialize(int argc, VALUE *argv, VALUE self)
38
- {
39
- VALUE context = Qnil;
40
-
41
- rb_scan_args(argc, argv, "01", &context);
42
-
43
- if (context == Qnil)
44
- {
45
- rb_warn("Passing no parameters to XML::HTMLParser.new is deprecated. Pass an instance of XML::Parser::Context instead.");
46
- context = rb_class_new_instance(0, NULL, cXMLParserContext);
47
- }
48
-
49
- rb_ivar_set(self, CONTEXT_ATTR, context);
50
- return self;
51
- }
52
-
53
- /*
54
- * call-seq:
55
- * parser.parse -> XML::Document
56
- *
57
- * Parse the input XML and create an XML::Document with
58
- * it's content. If an error occurs, XML::Parser::ParseError
59
- * is thrown.
60
- */
61
- static VALUE rxml_html_parser_parse(VALUE self)
62
- {
63
- xmlParserCtxtPtr ctxt;
64
- VALUE context = rb_ivar_get(self, CONTEXT_ATTR);
65
-
66
- Data_Get_Struct(context, xmlParserCtxt, ctxt);
67
-
68
- if (htmlParseDocument(ctxt) == -1 && ! ctxt->recovery)
69
- {
70
- if (ctxt->myDoc)
71
- xmlFreeDoc(ctxt->myDoc);
72
- rxml_raise(&ctxt->lastError);
73
- }
74
-
75
- rb_funcall(context, rb_intern("close"), 0);
76
-
77
- return rxml_document_wrap(ctxt->myDoc);
78
- }
79
-
80
- void rxml_init_html_parser(void)
81
- {
82
- CONTEXT_ATTR = rb_intern("@context");
83
-
84
- cXMLHtmlParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
85
-
86
- /* Atributes */
87
- rb_define_attr(cXMLHtmlParser, "input", 1, 0);
88
-
89
- /* Instance methods */
90
- rb_define_method(cXMLHtmlParser, "initialize", rxml_html_parser_initialize, -1);
91
- rb_define_method(cXMLHtmlParser, "parse", rxml_html_parser_parse, 0);
92
- }
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #include "ruby_libxml.h"
4
+
5
+ /* Document-class: LibXML::XML::HTMLParser
6
+ *
7
+ * The HTML parser implements an HTML 4.0 non-verifying parser with an API
8
+ * compatible with the XML::Parser. In contrast with the XML::Parser,
9
+ * it can parse "real world" HTML, even if it severely broken from a
10
+ * specification point of view.
11
+ *
12
+ * The HTML parser creates an in-memory document object
13
+ * that consist of any number of XML::Node instances. This is simple
14
+ * and powerful model, but has the major limitation that the size of
15
+ * the document that can be processed is limited by the amount of
16
+ * memory available.
17
+ *
18
+ * Using the html parser is simple:
19
+ *
20
+ * parser = XML::HTMLParser.file('my_file')
21
+ * doc = parser.parse
22
+ *
23
+ * You can also parse documents (see XML::HTMLParser.document),
24
+ * strings (see XML::HTMLParser.string) and io objects (see
25
+ * XML::HTMLParser.io).
26
+ */
27
+
28
+ VALUE cXMLHtmlParser;
29
+ static ID CONTEXT_ATTR;
30
+
31
+
32
+ /* call-seq:
33
+ * XML::HTMLParser.initialize -> parser
34
+ *
35
+ * Initializes a new parser instance with no pre-determined source.
36
+ */
37
+ static VALUE rxml_html_parser_initialize(int argc, VALUE *argv, VALUE self)
38
+ {
39
+ VALUE context = Qnil;
40
+
41
+ rb_scan_args(argc, argv, "01", &context);
42
+
43
+ if (context == Qnil)
44
+ {
45
+ rb_raise(rb_eArgError, "An instance of a XML::Parser::Context must be passed to XML::HTMLParser.new");
46
+ }
47
+
48
+ rb_ivar_set(self, CONTEXT_ATTR, context);
49
+ return self;
50
+ }
51
+
52
+ /*
53
+ * call-seq:
54
+ * parser.parse -> XML::Document
55
+ *
56
+ * Parse the input XML and create an XML::Document with
57
+ * it's content. If an error occurs, XML::Parser::ParseError
58
+ * is thrown.
59
+ */
60
+ static VALUE rxml_html_parser_parse(VALUE self)
61
+ {
62
+ xmlParserCtxtPtr ctxt;
63
+ VALUE context = rb_ivar_get(self, CONTEXT_ATTR);
64
+
65
+ Data_Get_Struct(context, xmlParserCtxt, ctxt);
66
+
67
+ if (htmlParseDocument(ctxt) == -1 && ! ctxt->recovery)
68
+ {
69
+ rxml_raise(&ctxt->lastError);
70
+ }
71
+
72
+ rb_funcall(context, rb_intern("close"), 0);
73
+
74
+ return rxml_document_wrap(ctxt->myDoc);
75
+ }
76
+
77
+ void rxml_init_html_parser(void)
78
+ {
79
+ CONTEXT_ATTR = rb_intern("@context");
80
+
81
+ cXMLHtmlParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
82
+
83
+ /* Atributes */
84
+ rb_define_attr(cXMLHtmlParser, "input", 1, 0);
85
+
86
+ /* Instance methods */
87
+ rb_define_method(cXMLHtmlParser, "initialize", rxml_html_parser_initialize, -1);
88
+ rb_define_method(cXMLHtmlParser, "parse", rxml_html_parser_parse, 0);
89
+ }
@@ -1,10 +1,10 @@
1
- /* Please see the LICENSE file for copyright and distribution information */
2
-
3
- #ifndef __RXML_HTML_PARSER__
4
- #define __RXML_HTML_PARSER__
5
-
6
- extern VALUE cXMLHtmlParser;
7
-
8
- void rxml_init_html_parser(void);
9
-
10
- #endif
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #ifndef __RXML_HTML_PARSER__
4
+ #define __RXML_HTML_PARSER__
5
+
6
+ extern VALUE cXMLHtmlParser;
7
+
8
+ void rxml_init_html_parser(void);
9
+
10
+ #endif
@@ -1,338 +1,337 @@
1
- /* Please see the LICENSE file for copyright and distribution information */
2
-
3
- #include "ruby_libxml.h"
4
- #include "ruby_xml_html_parser_context.h"
5
-
6
-
7
- /*
8
- * Document-class: LibXML::XML::HTMLParser::Context
9
- *
10
- * The XML::HTMLParser::Context class provides in-depth control over how
11
- * a document is parsed.
12
- */
13
-
14
- VALUE cXMLHtmlParserContext;
15
- static ID IO_ATTR;
16
-
17
- /* OS X 10.5 ships with libxml2 version 2.6.16 which does not expose the
18
- htmlNewParserCtxt (or htmlInitParserCtxt which it uses) method. htmlNewParserCtxt
19
- wasn't added to the libxml2 header files until 2.6.27. So the next two
20
- methods are simply copied from a newer version of libxml2 (2.7.2). */
21
- #if LIBXML_VERSION < 20627
22
- #define XML_CTXT_FINISH_DTD_0 0xabcd1234
23
- static int htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
24
- {
25
- htmlSAXHandler *sax;
26
- if (ctxt == NULL) return(-1);
27
-
28
- memset(ctxt, 0, sizeof(htmlParserCtxt));
29
- ctxt->dict = xmlDictCreate();
30
- if (ctxt->dict == NULL) {
31
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
32
- return(-1);
33
- }
34
- sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
35
- if (sax == NULL) {
36
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
37
- return(-1);
38
- }
39
- else
40
- memset(sax, 0, sizeof(htmlSAXHandler));
41
-
42
- ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr));
43
- if (ctxt->inputTab == NULL) {
44
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
45
- ctxt->inputNr = 0;
46
- ctxt->inputMax = 0;
47
- ctxt->input = NULL;
48
- return(-1);
49
- }
50
- ctxt->inputNr = 0;
51
- ctxt->inputMax = 5;
52
- ctxt->input = NULL;
53
- ctxt->version = NULL;
54
- ctxt->encoding = NULL;
55
- ctxt->standalone = -1;
56
- ctxt->instate = XML_PARSER_START;
57
-
58
- ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
59
- if (ctxt->nodeTab == NULL) {
60
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
61
- ctxt->nodeNr = 0;
62
- ctxt->nodeMax = 0;
63
- ctxt->node = NULL;
64
- ctxt->inputNr = 0;
65
- ctxt->inputMax = 0;
66
- ctxt->input = NULL;
67
- return(-1);
68
- }
69
- ctxt->nodeNr = 0;
70
- ctxt->nodeMax = 10;
71
- ctxt->node = NULL;
72
-
73
- ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
74
- if (ctxt->nameTab == NULL) {
75
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
76
- ctxt->nameNr = 0;
77
- ctxt->nameMax = 10;
78
- ctxt->name = NULL;
79
- ctxt->nodeNr = 0;
80
- ctxt->nodeMax = 0;
81
- ctxt->node = NULL;
82
- ctxt->inputNr = 0;
83
- ctxt->inputMax = 0;
84
- ctxt->input = NULL;
85
- return(-1);
86
- }
87
- ctxt->nameNr = 0;
88
- ctxt->nameMax = 10;
89
- ctxt->name = NULL;
90
-
91
- if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
92
- else {
93
- ctxt->sax = sax;
94
- memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
95
- }
96
- ctxt->userData = ctxt;
97
- ctxt->myDoc = NULL;
98
- ctxt->wellFormed = 1;
99
- ctxt->replaceEntities = 0;
100
- ctxt->linenumbers = xmlLineNumbersDefaultValue;
101
- ctxt->html = 1;
102
- ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
103
- ctxt->vctxt.userData = ctxt;
104
- ctxt->vctxt.error = xmlParserValidityError;
105
- ctxt->vctxt.warning = xmlParserValidityWarning;
106
- ctxt->record_info = 0;
107
- ctxt->validate = 0;
108
- ctxt->nbChars = 0;
109
- ctxt->checkIndex = 0;
110
- ctxt->catalogs = NULL;
111
- xmlInitNodeInfoSeq(&ctxt->node_seq);
112
- return(0);
113
- }
114
-
115
- static htmlParserCtxtPtr htmlNewParserCtxt(void)
116
- {
117
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
118
- if (ctxt == NULL) {
119
- rb_raise(rb_eNoMemError, "NewParserCtxt: out of memory\n");
120
- return(NULL);
121
- }
122
- memset(ctxt, 0, sizeof(xmlParserCtxt));
123
- if (htmlInitParserCtxt(ctxt) < 0) {
124
- htmlFreeParserCtxt(ctxt);
125
- return(NULL);
126
- }
127
- return(ctxt);
128
- }
129
- #endif
130
-
131
- static void rxml_html_parser_context_free(htmlParserCtxtPtr ctxt)
132
- {
133
- htmlFreeParserCtxt(ctxt);
134
- }
135
-
136
- static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt)
137
- {
138
- return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
139
- }
140
-
141
- /* call-seq:
142
- * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
143
- *
144
- * Creates a new parser context based on the specified file or uri.
145
- *
146
- * Parameters:
147
- *
148
- * file - A filename or uri.
149
- */
150
- static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
151
- {
152
- htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
153
- if (!ctxt)
154
- rxml_raise(&xmlLastError);
155
-
156
- /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
157
- xmlCtxtUseOptionsInternal (called below) initialize slightly different
158
- context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
159
- sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
160
- htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
161
-
162
- return rxml_html_parser_context_wrap(ctxt);
163
- }
164
-
165
- /* call-seq:
166
- * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
167
- *
168
- * Creates a new parser context based on the specified io object.
169
- *
170
- * Parameters:
171
- *
172
- * io - A ruby IO object.
173
- */
174
- static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
175
- {
176
- VALUE result;
177
- htmlParserCtxtPtr ctxt;
178
- xmlParserInputBufferPtr input;
179
- xmlParserInputPtr stream;
180
-
181
- if (NIL_P(io))
182
- rb_raise(rb_eTypeError, "Must pass in an IO object");
183
-
184
- input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
185
- (void*)io, XML_CHAR_ENCODING_NONE);
186
-
187
- ctxt = htmlNewParserCtxt();
188
- if (!ctxt)
189
- {
190
- xmlFreeParserInputBuffer(input);
191
- rxml_raise(&xmlLastError);
192
- }
193
-
194
- /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
195
- xmlCtxtUseOptionsInternal (called below) initialize slightly different
196
- context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
197
- sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
198
- htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
199
-
200
- stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
201
-
202
- if (!stream)
203
- {
204
- xmlFreeParserInputBuffer(input);
205
- xmlFreeParserCtxt(ctxt);
206
- rxml_raise(&xmlLastError);
207
- }
208
- inputPush(ctxt, stream);
209
- result = rxml_html_parser_context_wrap(ctxt);
210
-
211
- /* Attach io object to parser so it won't get freed.*/
212
- rb_ivar_set(result, IO_ATTR, io);
213
-
214
- return result;
215
- }
216
-
217
- /* call-seq:
218
- * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
219
- *
220
- * Creates a new parser context based on the specified string.
221
- *
222
- * Parameters:
223
- *
224
- * string - A string that contains the data to parse.
225
- */
226
- static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
227
- {
228
- htmlParserCtxtPtr ctxt;
229
- Check_Type(string, T_STRING);
230
-
231
- if (RSTRING_LEN(string) == 0)
232
- rb_raise(rb_eArgError, "Must specify a string with one or more characters");
233
-
234
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
235
- RSTRING_LEN(string));
236
- if (!ctxt)
237
- rxml_raise(&xmlLastError);
238
-
239
- /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
240
- xmlCtxtUseOptionsInternal (called below) initialize slightly different
241
- context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
242
- sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
243
- htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
244
-
245
- htmlDefaultSAXHandlerInit();
246
- if (ctxt->sax != NULL)
247
- memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
248
-
249
- return rxml_html_parser_context_wrap(ctxt);
250
- }
251
-
252
- /*
253
- * call-seq:
254
- * context.close -> nil
255
- *
256
- * Closes the underlying input streams. This is useful when parsing a large amount of
257
- * files and you want to close the files without relying on Ruby's garbage collector
258
- * to run.
259
- */
260
- static VALUE rxml_html_parser_context_close(VALUE self)
261
- {
262
- htmlParserCtxtPtr ctxt;
263
- xmlParserInputPtr xinput;
264
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
265
-
266
- while ((xinput = inputPop(ctxt)) != NULL)
267
- {
268
- xmlFreeInputStream(xinput);
269
- }
270
- return Qnil;
271
- }
272
-
273
- /*
274
- * call-seq:
275
- * context.disable_cdata = (true|false)
276
- *
277
- * Control whether the CDATA nodes will be created in this context.
278
- */
279
- static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE bool)
280
- {
281
- htmlParserCtxtPtr ctxt;
282
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
283
-
284
- if (ctxt->sax == NULL)
285
- rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
286
-
287
- /* LibXML controls this internally with the default SAX handler. */
288
- if (bool)
289
- ctxt->sax->cdataBlock = NULL;
290
- else
291
- ctxt->sax->cdataBlock = htmlDefaultSAXHandler.cdataBlock;
292
-
293
- return bool;
294
- }
295
-
296
- /*
297
- * call-seq:
298
- * context.options = XML::Parser::Options::NOENT |
299
- XML::Parser::Options::NOCDATA
300
- *
301
- * Provides control over the execution of a parser. Valid values
302
- * are the constants defined on XML::Parser::Options. Multiple
303
- * options can be combined by using Bitwise OR (|).
304
- */
305
- static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
306
- {
307
- int result;
308
- int xml_options = NUM2INT(options);
309
- htmlParserCtxtPtr ctxt;
310
- Check_Type(options, T_FIXNUM);
311
-
312
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
313
- result = htmlCtxtUseOptions(ctxt, xml_options);
314
-
315
- #if LIBXML_VERSION >= 20707
316
- /* Big hack here, but htmlCtxtUseOptions doens't support HTML_PARSE_NOIMPLIED.
317
- So do it ourselves. There must be a better way??? */
318
- if (xml_options & HTML_PARSE_NOIMPLIED)
319
- {
320
- ctxt->options |= HTML_PARSE_NOIMPLIED;
321
- }
322
- #endif
323
-
324
- return self;
325
- }
326
-
327
- void rxml_init_html_parser_context(void)
328
- {
329
- IO_ATTR = ID2SYM(rb_intern("@io"));
330
- cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
331
-
332
- rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
333
- rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
334
- rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
335
- rb_define_method(cXMLHtmlParserContext, "close", rxml_html_parser_context_close, 0);
336
- rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
337
- rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
338
- }
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #include "ruby_libxml.h"
4
+ #include "ruby_xml_html_parser_context.h"
5
+
6
+
7
+ /*
8
+ * Document-class: LibXML::XML::HTMLParser::Context
9
+ *
10
+ * The XML::HTMLParser::Context class provides in-depth control over how
11
+ * a document is parsed.
12
+ */
13
+
14
+ VALUE cXMLHtmlParserContext;
15
+ static ID IO_ATTR;
16
+
17
+ /* OS X 10.5 ships with libxml2 version 2.6.16 which does not expose the
18
+ htmlNewParserCtxt (or htmlInitParserCtxt which it uses) method. htmlNewParserCtxt
19
+ wasn't added to the libxml2 header files until 2.6.27. So the next two
20
+ methods are simply copied from a newer version of libxml2 (2.7.2). */
21
+ #if LIBXML_VERSION < 20627
22
+ #define XML_CTXT_FINISH_DTD_0 0xabcd1234
23
+ static int htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
24
+ {
25
+ htmlSAXHandler *sax;
26
+ if (ctxt == NULL) return(-1);
27
+
28
+ memset(ctxt, 0, sizeof(htmlParserCtxt));
29
+ ctxt->dict = xmlDictCreate();
30
+ if (ctxt->dict == NULL) {
31
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
32
+ return(-1);
33
+ }
34
+ sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
35
+ if (sax == NULL) {
36
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
37
+ return(-1);
38
+ }
39
+ else
40
+ memset(sax, 0, sizeof(htmlSAXHandler));
41
+
42
+ ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr));
43
+ if (ctxt->inputTab == NULL) {
44
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
45
+ ctxt->inputNr = 0;
46
+ ctxt->inputMax = 0;
47
+ ctxt->input = NULL;
48
+ return(-1);
49
+ }
50
+ ctxt->inputNr = 0;
51
+ ctxt->inputMax = 5;
52
+ ctxt->input = NULL;
53
+ ctxt->version = NULL;
54
+ ctxt->encoding = NULL;
55
+ ctxt->standalone = -1;
56
+ ctxt->instate = XML_PARSER_START;
57
+
58
+ ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
59
+ if (ctxt->nodeTab == NULL) {
60
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
61
+ ctxt->nodeNr = 0;
62
+ ctxt->nodeMax = 0;
63
+ ctxt->node = NULL;
64
+ ctxt->inputNr = 0;
65
+ ctxt->inputMax = 0;
66
+ ctxt->input = NULL;
67
+ return(-1);
68
+ }
69
+ ctxt->nodeNr = 0;
70
+ ctxt->nodeMax = 10;
71
+ ctxt->node = NULL;
72
+
73
+ ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
74
+ if (ctxt->nameTab == NULL) {
75
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
76
+ ctxt->nameNr = 0;
77
+ ctxt->nameMax = 10;
78
+ ctxt->name = NULL;
79
+ ctxt->nodeNr = 0;
80
+ ctxt->nodeMax = 0;
81
+ ctxt->node = NULL;
82
+ ctxt->inputNr = 0;
83
+ ctxt->inputMax = 0;
84
+ ctxt->input = NULL;
85
+ return(-1);
86
+ }
87
+ ctxt->nameNr = 0;
88
+ ctxt->nameMax = 10;
89
+ ctxt->name = NULL;
90
+
91
+ if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
92
+ else {
93
+ ctxt->sax = sax;
94
+ memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
95
+ }
96
+ ctxt->userData = ctxt;
97
+ ctxt->myDoc = NULL;
98
+ ctxt->wellFormed = 1;
99
+ ctxt->replaceEntities = 0;
100
+ ctxt->linenumbers = xmlLineNumbersDefaultValue;
101
+ ctxt->html = 1;
102
+ ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
103
+ ctxt->vctxt.userData = ctxt;
104
+ ctxt->vctxt.error = xmlParserValidityError;
105
+ ctxt->vctxt.warning = xmlParserValidityWarning;
106
+ ctxt->record_info = 0;
107
+ ctxt->validate = 0;
108
+ ctxt->nbChars = 0;
109
+ ctxt->checkIndex = 0;
110
+ ctxt->catalogs = NULL;
111
+ xmlInitNodeInfoSeq(&ctxt->node_seq);
112
+ return(0);
113
+ }
114
+
115
+ static htmlParserCtxtPtr htmlNewParserCtxt(void)
116
+ {
117
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
118
+ if (ctxt == NULL) {
119
+ rb_raise(rb_eNoMemError, "NewParserCtxt: out of memory\n");
120
+ return(NULL);
121
+ }
122
+ memset(ctxt, 0, sizeof(xmlParserCtxt));
123
+ if (htmlInitParserCtxt(ctxt) < 0) {
124
+ htmlFreeParserCtxt(ctxt);
125
+ return(NULL);
126
+ }
127
+ return(ctxt);
128
+ }
129
+ #endif
130
+
131
+ static void rxml_html_parser_context_free(htmlParserCtxtPtr ctxt)
132
+ {
133
+ htmlFreeParserCtxt(ctxt);
134
+ }
135
+
136
+ static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt)
137
+ {
138
+ return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
139
+ }
140
+
141
+ /* call-seq:
142
+ * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
143
+ *
144
+ * Creates a new parser context based on the specified file or uri.
145
+ *
146
+ * Parameters:
147
+ *
148
+ * file - A filename or uri.
149
+ */
150
+ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
151
+ {
152
+ htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
153
+ if (!ctxt)
154
+ rxml_raise(&xmlLastError);
155
+
156
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
157
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
158
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
159
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
160
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
161
+
162
+ return rxml_html_parser_context_wrap(ctxt);
163
+ }
164
+
165
+ /* call-seq:
166
+ * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
167
+ *
168
+ * Creates a new parser context based on the specified io object.
169
+ *
170
+ * Parameters:
171
+ *
172
+ * io - A ruby IO object.
173
+ */
174
+ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
175
+ {
176
+ VALUE result;
177
+ htmlParserCtxtPtr ctxt;
178
+ xmlParserInputBufferPtr input;
179
+ xmlParserInputPtr stream;
180
+
181
+ if (NIL_P(io))
182
+ rb_raise(rb_eTypeError, "Must pass in an IO object");
183
+
184
+ input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
185
+ (void*)io, XML_CHAR_ENCODING_NONE);
186
+
187
+ ctxt = htmlNewParserCtxt();
188
+ if (!ctxt)
189
+ {
190
+ xmlFreeParserInputBuffer(input);
191
+ rxml_raise(&xmlLastError);
192
+ }
193
+
194
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
195
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
196
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
197
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
198
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
199
+
200
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
201
+
202
+ if (!stream)
203
+ {
204
+ xmlFreeParserInputBuffer(input);
205
+ xmlFreeParserCtxt(ctxt);
206
+ rxml_raise(&xmlLastError);
207
+ }
208
+ inputPush(ctxt, stream);
209
+ result = rxml_html_parser_context_wrap(ctxt);
210
+
211
+ /* Attach io object to parser so it won't get freed.*/
212
+ rb_ivar_set(result, IO_ATTR, io);
213
+
214
+ return result;
215
+ }
216
+
217
+ /* call-seq:
218
+ * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
219
+ *
220
+ * Creates a new parser context based on the specified string.
221
+ *
222
+ * Parameters:
223
+ *
224
+ * string - A string that contains the data to parse.
225
+ */
226
+ static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
227
+ {
228
+ htmlParserCtxtPtr ctxt;
229
+ Check_Type(string, T_STRING);
230
+
231
+ if (RSTRING_LEN(string) == 0)
232
+ rb_raise(rb_eArgError, "Must specify a string with one or more characters");
233
+
234
+ ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
235
+ (int)RSTRING_LEN(string));
236
+ if (!ctxt)
237
+ rxml_raise(&xmlLastError);
238
+
239
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
240
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
241
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
242
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
243
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
244
+
245
+ htmlDefaultSAXHandlerInit();
246
+ if (ctxt->sax != NULL)
247
+ memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
248
+
249
+ return rxml_html_parser_context_wrap(ctxt);
250
+ }
251
+
252
+ /*
253
+ * call-seq:
254
+ * context.close -> nil
255
+ *
256
+ * Closes the underlying input streams. This is useful when parsing a large amount of
257
+ * files and you want to close the files without relying on Ruby's garbage collector
258
+ * to run.
259
+ */
260
+ static VALUE rxml_html_parser_context_close(VALUE self)
261
+ {
262
+ htmlParserCtxtPtr ctxt;
263
+ xmlParserInputPtr xinput;
264
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
265
+
266
+ while ((xinput = inputPop(ctxt)) != NULL)
267
+ {
268
+ xmlFreeInputStream(xinput);
269
+ }
270
+ return Qnil;
271
+ }
272
+
273
+ /*
274
+ * call-seq:
275
+ * context.disable_cdata = (true|false)
276
+ *
277
+ * Control whether the CDATA nodes will be created in this context.
278
+ */
279
+ static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE value)
280
+ {
281
+ htmlParserCtxtPtr ctxt;
282
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
283
+
284
+ if (ctxt->sax == NULL)
285
+ rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
286
+
287
+ /* LibXML controls this internally with the default SAX handler. */
288
+ if (value)
289
+ ctxt->sax->cdataBlock = NULL;
290
+ else
291
+ ctxt->sax->cdataBlock = htmlDefaultSAXHandler.cdataBlock;
292
+
293
+ return value;
294
+ }
295
+
296
+ /*
297
+ * call-seq:
298
+ * context.options = XML::Parser::Options::NOENT |
299
+ XML::Parser::Options::NOCDATA
300
+ *
301
+ * Provides control over the execution of a parser. Valid values
302
+ * are the constants defined on XML::Parser::Options. Multiple
303
+ * options can be combined by using Bitwise OR (|).
304
+ */
305
+ static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
306
+ {
307
+ int xml_options = NUM2INT(options);
308
+ htmlParserCtxtPtr ctxt;
309
+ Check_Type(options, T_FIXNUM);
310
+
311
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
312
+ htmlCtxtUseOptions(ctxt, xml_options);
313
+
314
+ #if LIBXML_VERSION >= 20707
315
+ /* Big hack here, but htmlCtxtUseOptions doens't support HTML_PARSE_NOIMPLIED.
316
+ So do it ourselves. There must be a better way??? */
317
+ if (xml_options & HTML_PARSE_NOIMPLIED)
318
+ {
319
+ ctxt->options |= HTML_PARSE_NOIMPLIED;
320
+ }
321
+ #endif
322
+
323
+ return self;
324
+ }
325
+
326
+ void rxml_init_html_parser_context(void)
327
+ {
328
+ IO_ATTR = ID2SYM(rb_intern("@io"));
329
+ cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
330
+
331
+ rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
332
+ rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
333
+ rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
334
+ rb_define_method(cXMLHtmlParserContext, "close", rxml_html_parser_context_close, 0);
335
+ rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
336
+ rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
337
+ }