libxml-ruby 2.8.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY +15 -0
  3. data/README.rdoc +7 -7
  4. data/Rakefile +80 -78
  5. data/ext/libxml/extconf.h +4 -0
  6. data/ext/libxml/extconf.rb +57 -116
  7. data/ext/libxml/libxml.c +4 -0
  8. data/ext/libxml/ruby_xml.c +977 -893
  9. data/ext/libxml/ruby_xml.h +20 -10
  10. data/ext/libxml/ruby_xml_attr.c +333 -333
  11. data/ext/libxml/ruby_xml_attr_decl.c +2 -2
  12. data/ext/libxml/ruby_xml_cbg.c +85 -85
  13. data/ext/libxml/ruby_xml_document.c +1133 -1147
  14. data/ext/libxml/ruby_xml_dtd.c +261 -268
  15. data/ext/libxml/ruby_xml_encoding.c +262 -260
  16. data/ext/libxml/ruby_xml_encoding.h +19 -19
  17. data/ext/libxml/ruby_xml_html_parser_context.c +337 -338
  18. data/ext/libxml/ruby_xml_input_cbg.c +191 -191
  19. data/ext/libxml/ruby_xml_io.c +52 -50
  20. data/ext/libxml/ruby_xml_namespace.c +2 -2
  21. data/ext/libxml/ruby_xml_node.c +1446 -1452
  22. data/ext/libxml/ruby_xml_parser_context.c +999 -1001
  23. data/ext/libxml/ruby_xml_reader.c +1226 -1228
  24. data/ext/libxml/ruby_xml_relaxng.c +110 -111
  25. data/ext/libxml/ruby_xml_sax2_handler.c +326 -328
  26. data/ext/libxml/ruby_xml_schema.c +300 -301
  27. data/ext/libxml/ruby_xml_version.h +3 -3
  28. data/ext/libxml/ruby_xml_writer.c +14 -15
  29. data/ext/libxml/ruby_xml_xpath.c +188 -188
  30. data/ext/libxml/ruby_xml_xpath_context.c +360 -361
  31. data/ext/libxml/ruby_xml_xpath_object.c +335 -335
  32. data/libxml-ruby.gemspec +47 -44
  33. data/test/tc_attr.rb +5 -7
  34. data/test/tc_attr_decl.rb +5 -6
  35. data/test/tc_attributes.rb +1 -2
  36. data/test/tc_canonicalize.rb +1 -2
  37. data/test/tc_deprecated_require.rb +1 -2
  38. data/test/tc_document.rb +4 -5
  39. data/test/tc_document_write.rb +2 -3
  40. data/test/tc_dtd.rb +4 -5
  41. data/test/tc_encoding.rb +126 -126
  42. data/test/tc_encoding_sax.rb +4 -3
  43. data/test/tc_error.rb +14 -15
  44. data/test/tc_html_parser.rb +15 -7
  45. data/test/tc_html_parser_context.rb +1 -2
  46. data/test/tc_namespace.rb +2 -3
  47. data/test/tc_namespaces.rb +5 -6
  48. data/test/tc_node.rb +2 -3
  49. data/test/tc_node_cdata.rb +2 -3
  50. data/test/tc_node_comment.rb +1 -2
  51. data/test/tc_node_copy.rb +1 -2
  52. data/test/tc_node_edit.rb +5 -7
  53. data/test/tc_node_pi.rb +1 -2
  54. data/test/tc_node_text.rb +2 -3
  55. data/test/tc_node_write.rb +2 -3
  56. data/test/tc_node_xlink.rb +1 -2
  57. data/test/tc_parser.rb +18 -24
  58. data/test/tc_parser_context.rb +6 -7
  59. data/test/tc_properties.rb +1 -2
  60. data/test/tc_reader.rb +9 -10
  61. data/test/tc_relaxng.rb +4 -5
  62. data/test/tc_sax_parser.rb +9 -10
  63. data/test/tc_schema.rb +4 -5
  64. data/test/tc_traversal.rb +1 -2
  65. data/test/tc_writer.rb +1 -2
  66. data/test/tc_xinclude.rb +1 -2
  67. data/test/tc_xml.rb +1 -2
  68. data/test/tc_xpath.rb +8 -9
  69. data/test/tc_xpath_context.rb +3 -4
  70. data/test/tc_xpath_expression.rb +3 -4
  71. data/test/tc_xpointer.rb +1 -3
  72. data/test/test_helper.rb +3 -1
  73. data/test/test_suite.rb +0 -1
  74. metadata +47 -11
  75. data/test/etc_doc_to_s.rb +0 -21
  76. data/test/ets_doc_file.rb +0 -17
  77. data/test/ets_doc_to_s.rb +0 -23
  78. data/test/ets_gpx.rb +0 -28
  79. data/test/ets_node_gc.rb +0 -23
  80. data/test/ets_test.xml +0 -2
  81. data/test/ets_tsr.rb +0 -11
@@ -1,19 +1,19 @@
1
- /* Please see the LICENSE file for copyright and distribution information */
2
-
3
- #ifndef __RXML_ENCODING__
4
- #define __RXML_ENCODING__
5
-
6
- extern VALUE mXMLEncoding;
7
-
8
- void rxml_init_encoding();
9
-
10
- // Ruby 1.8/1.9 encoding compatibility
11
- VALUE rxml_new_cstr(const char* xstr, const char* xencoding);
12
- VALUE rxml_new_cstr_len(const char* xstr, const int length, const char* xencoding);
13
-
14
- #ifdef HAVE_RUBY_ENCODING_H
15
- rb_encoding* rxml_xml_encoding_to_rb_encoding(VALUE klass, xmlCharEncoding xmlEncoding);
16
- rb_encoding* rxml_figure_encoding(const char* xencoding);
17
- #endif
18
-
19
- #endif
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #ifndef __RXML_ENCODING__
4
+ #define __RXML_ENCODING__
5
+
6
+ extern VALUE mXMLEncoding;
7
+
8
+ void rxml_init_encoding();
9
+
10
+ // Ruby 1.8/1.9 encoding compatibility
11
+ VALUE rxml_new_cstr(const xmlChar* xstr, const xmlChar* xencoding);
12
+ VALUE rxml_new_cstr_len(const xmlChar* xstr, const long length, const xmlChar* xencoding);
13
+
14
+ #ifdef HAVE_RUBY_ENCODING_H
15
+ rb_encoding* rxml_xml_encoding_to_rb_encoding(VALUE klass, xmlCharEncoding xmlEncoding);
16
+ rb_encoding* rxml_figure_encoding(const xmlChar* xencoding);
17
+ #endif
18
+
19
+ #endif
@@ -1,338 +1,337 @@
1
- /* Please see the LICENSE file for copyright and distribution information */
2
-
3
- #include "ruby_libxml.h"
4
- #include "ruby_xml_html_parser_context.h"
5
-
6
-
7
- /*
8
- * Document-class: LibXML::XML::HTMLParser::Context
9
- *
10
- * The XML::HTMLParser::Context class provides in-depth control over how
11
- * a document is parsed.
12
- */
13
-
14
- VALUE cXMLHtmlParserContext;
15
- static ID IO_ATTR;
16
-
17
- /* OS X 10.5 ships with libxml2 version 2.6.16 which does not expose the
18
- htmlNewParserCtxt (or htmlInitParserCtxt which it uses) method. htmlNewParserCtxt
19
- wasn't added to the libxml2 header files until 2.6.27. So the next two
20
- methods are simply copied from a newer version of libxml2 (2.7.2). */
21
- #if LIBXML_VERSION < 20627
22
- #define XML_CTXT_FINISH_DTD_0 0xabcd1234
23
- static int htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
24
- {
25
- htmlSAXHandler *sax;
26
- if (ctxt == NULL) return(-1);
27
-
28
- memset(ctxt, 0, sizeof(htmlParserCtxt));
29
- ctxt->dict = xmlDictCreate();
30
- if (ctxt->dict == NULL) {
31
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
32
- return(-1);
33
- }
34
- sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
35
- if (sax == NULL) {
36
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
37
- return(-1);
38
- }
39
- else
40
- memset(sax, 0, sizeof(htmlSAXHandler));
41
-
42
- ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr));
43
- if (ctxt->inputTab == NULL) {
44
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
45
- ctxt->inputNr = 0;
46
- ctxt->inputMax = 0;
47
- ctxt->input = NULL;
48
- return(-1);
49
- }
50
- ctxt->inputNr = 0;
51
- ctxt->inputMax = 5;
52
- ctxt->input = NULL;
53
- ctxt->version = NULL;
54
- ctxt->encoding = NULL;
55
- ctxt->standalone = -1;
56
- ctxt->instate = XML_PARSER_START;
57
-
58
- ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
59
- if (ctxt->nodeTab == NULL) {
60
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
61
- ctxt->nodeNr = 0;
62
- ctxt->nodeMax = 0;
63
- ctxt->node = NULL;
64
- ctxt->inputNr = 0;
65
- ctxt->inputMax = 0;
66
- ctxt->input = NULL;
67
- return(-1);
68
- }
69
- ctxt->nodeNr = 0;
70
- ctxt->nodeMax = 10;
71
- ctxt->node = NULL;
72
-
73
- ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
74
- if (ctxt->nameTab == NULL) {
75
- rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
76
- ctxt->nameNr = 0;
77
- ctxt->nameMax = 10;
78
- ctxt->name = NULL;
79
- ctxt->nodeNr = 0;
80
- ctxt->nodeMax = 0;
81
- ctxt->node = NULL;
82
- ctxt->inputNr = 0;
83
- ctxt->inputMax = 0;
84
- ctxt->input = NULL;
85
- return(-1);
86
- }
87
- ctxt->nameNr = 0;
88
- ctxt->nameMax = 10;
89
- ctxt->name = NULL;
90
-
91
- if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
92
- else {
93
- ctxt->sax = sax;
94
- memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
95
- }
96
- ctxt->userData = ctxt;
97
- ctxt->myDoc = NULL;
98
- ctxt->wellFormed = 1;
99
- ctxt->replaceEntities = 0;
100
- ctxt->linenumbers = xmlLineNumbersDefaultValue;
101
- ctxt->html = 1;
102
- ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
103
- ctxt->vctxt.userData = ctxt;
104
- ctxt->vctxt.error = xmlParserValidityError;
105
- ctxt->vctxt.warning = xmlParserValidityWarning;
106
- ctxt->record_info = 0;
107
- ctxt->validate = 0;
108
- ctxt->nbChars = 0;
109
- ctxt->checkIndex = 0;
110
- ctxt->catalogs = NULL;
111
- xmlInitNodeInfoSeq(&ctxt->node_seq);
112
- return(0);
113
- }
114
-
115
- static htmlParserCtxtPtr htmlNewParserCtxt(void)
116
- {
117
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
118
- if (ctxt == NULL) {
119
- rb_raise(rb_eNoMemError, "NewParserCtxt: out of memory\n");
120
- return(NULL);
121
- }
122
- memset(ctxt, 0, sizeof(xmlParserCtxt));
123
- if (htmlInitParserCtxt(ctxt) < 0) {
124
- htmlFreeParserCtxt(ctxt);
125
- return(NULL);
126
- }
127
- return(ctxt);
128
- }
129
- #endif
130
-
131
- static void rxml_html_parser_context_free(htmlParserCtxtPtr ctxt)
132
- {
133
- htmlFreeParserCtxt(ctxt);
134
- }
135
-
136
- static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt)
137
- {
138
- return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
139
- }
140
-
141
- /* call-seq:
142
- * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
143
- *
144
- * Creates a new parser context based on the specified file or uri.
145
- *
146
- * Parameters:
147
- *
148
- * file - A filename or uri.
149
- */
150
- static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
151
- {
152
- htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
153
- if (!ctxt)
154
- rxml_raise(&xmlLastError);
155
-
156
- /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
157
- xmlCtxtUseOptionsInternal (called below) initialize slightly different
158
- context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
159
- sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
160
- htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
161
-
162
- return rxml_html_parser_context_wrap(ctxt);
163
- }
164
-
165
- /* call-seq:
166
- * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
167
- *
168
- * Creates a new parser context based on the specified io object.
169
- *
170
- * Parameters:
171
- *
172
- * io - A ruby IO object.
173
- */
174
- static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
175
- {
176
- VALUE result;
177
- htmlParserCtxtPtr ctxt;
178
- xmlParserInputBufferPtr input;
179
- xmlParserInputPtr stream;
180
-
181
- if (NIL_P(io))
182
- rb_raise(rb_eTypeError, "Must pass in an IO object");
183
-
184
- input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
185
- (void*)io, XML_CHAR_ENCODING_NONE);
186
-
187
- ctxt = htmlNewParserCtxt();
188
- if (!ctxt)
189
- {
190
- xmlFreeParserInputBuffer(input);
191
- rxml_raise(&xmlLastError);
192
- }
193
-
194
- /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
195
- xmlCtxtUseOptionsInternal (called below) initialize slightly different
196
- context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
197
- sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
198
- htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
199
-
200
- stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
201
-
202
- if (!stream)
203
- {
204
- xmlFreeParserInputBuffer(input);
205
- xmlFreeParserCtxt(ctxt);
206
- rxml_raise(&xmlLastError);
207
- }
208
- inputPush(ctxt, stream);
209
- result = rxml_html_parser_context_wrap(ctxt);
210
-
211
- /* Attach io object to parser so it won't get freed.*/
212
- rb_ivar_set(result, IO_ATTR, io);
213
-
214
- return result;
215
- }
216
-
217
- /* call-seq:
218
- * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
219
- *
220
- * Creates a new parser context based on the specified string.
221
- *
222
- * Parameters:
223
- *
224
- * string - A string that contains the data to parse.
225
- */
226
- static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
227
- {
228
- htmlParserCtxtPtr ctxt;
229
- Check_Type(string, T_STRING);
230
-
231
- if (RSTRING_LEN(string) == 0)
232
- rb_raise(rb_eArgError, "Must specify a string with one or more characters");
233
-
234
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
235
- RSTRING_LEN(string));
236
- if (!ctxt)
237
- rxml_raise(&xmlLastError);
238
-
239
- /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
240
- xmlCtxtUseOptionsInternal (called below) initialize slightly different
241
- context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
242
- sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
243
- htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
244
-
245
- htmlDefaultSAXHandlerInit();
246
- if (ctxt->sax != NULL)
247
- memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
248
-
249
- return rxml_html_parser_context_wrap(ctxt);
250
- }
251
-
252
- /*
253
- * call-seq:
254
- * context.close -> nil
255
- *
256
- * Closes the underlying input streams. This is useful when parsing a large amount of
257
- * files and you want to close the files without relying on Ruby's garbage collector
258
- * to run.
259
- */
260
- static VALUE rxml_html_parser_context_close(VALUE self)
261
- {
262
- htmlParserCtxtPtr ctxt;
263
- xmlParserInputPtr xinput;
264
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
265
-
266
- while ((xinput = inputPop(ctxt)) != NULL)
267
- {
268
- xmlFreeInputStream(xinput);
269
- }
270
- return Qnil;
271
- }
272
-
273
- /*
274
- * call-seq:
275
- * context.disable_cdata = (true|false)
276
- *
277
- * Control whether the CDATA nodes will be created in this context.
278
- */
279
- static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE bool)
280
- {
281
- htmlParserCtxtPtr ctxt;
282
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
283
-
284
- if (ctxt->sax == NULL)
285
- rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
286
-
287
- /* LibXML controls this internally with the default SAX handler. */
288
- if (bool)
289
- ctxt->sax->cdataBlock = NULL;
290
- else
291
- ctxt->sax->cdataBlock = htmlDefaultSAXHandler.cdataBlock;
292
-
293
- return bool;
294
- }
295
-
296
- /*
297
- * call-seq:
298
- * context.options = XML::Parser::Options::NOENT |
299
- XML::Parser::Options::NOCDATA
300
- *
301
- * Provides control over the execution of a parser. Valid values
302
- * are the constants defined on XML::Parser::Options. Multiple
303
- * options can be combined by using Bitwise OR (|).
304
- */
305
- static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
306
- {
307
- int result;
308
- int xml_options = NUM2INT(options);
309
- htmlParserCtxtPtr ctxt;
310
- Check_Type(options, T_FIXNUM);
311
-
312
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
313
- result = htmlCtxtUseOptions(ctxt, xml_options);
314
-
315
- #if LIBXML_VERSION >= 20707
316
- /* Big hack here, but htmlCtxtUseOptions doens't support HTML_PARSE_NOIMPLIED.
317
- So do it ourselves. There must be a better way??? */
318
- if (xml_options & HTML_PARSE_NOIMPLIED)
319
- {
320
- ctxt->options |= HTML_PARSE_NOIMPLIED;
321
- }
322
- #endif
323
-
324
- return self;
325
- }
326
-
327
- void rxml_init_html_parser_context(void)
328
- {
329
- IO_ATTR = ID2SYM(rb_intern("@io"));
330
- cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
331
-
332
- rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
333
- rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
334
- rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
335
- rb_define_method(cXMLHtmlParserContext, "close", rxml_html_parser_context_close, 0);
336
- rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
337
- rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
338
- }
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #include "ruby_libxml.h"
4
+ #include "ruby_xml_html_parser_context.h"
5
+
6
+
7
+ /*
8
+ * Document-class: LibXML::XML::HTMLParser::Context
9
+ *
10
+ * The XML::HTMLParser::Context class provides in-depth control over how
11
+ * a document is parsed.
12
+ */
13
+
14
+ VALUE cXMLHtmlParserContext;
15
+ static ID IO_ATTR;
16
+
17
+ /* OS X 10.5 ships with libxml2 version 2.6.16 which does not expose the
18
+ htmlNewParserCtxt (or htmlInitParserCtxt which it uses) method. htmlNewParserCtxt
19
+ wasn't added to the libxml2 header files until 2.6.27. So the next two
20
+ methods are simply copied from a newer version of libxml2 (2.7.2). */
21
+ #if LIBXML_VERSION < 20627
22
+ #define XML_CTXT_FINISH_DTD_0 0xabcd1234
23
+ static int htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
24
+ {
25
+ htmlSAXHandler *sax;
26
+ if (ctxt == NULL) return(-1);
27
+
28
+ memset(ctxt, 0, sizeof(htmlParserCtxt));
29
+ ctxt->dict = xmlDictCreate();
30
+ if (ctxt->dict == NULL) {
31
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
32
+ return(-1);
33
+ }
34
+ sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
35
+ if (sax == NULL) {
36
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
37
+ return(-1);
38
+ }
39
+ else
40
+ memset(sax, 0, sizeof(htmlSAXHandler));
41
+
42
+ ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr));
43
+ if (ctxt->inputTab == NULL) {
44
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
45
+ ctxt->inputNr = 0;
46
+ ctxt->inputMax = 0;
47
+ ctxt->input = NULL;
48
+ return(-1);
49
+ }
50
+ ctxt->inputNr = 0;
51
+ ctxt->inputMax = 5;
52
+ ctxt->input = NULL;
53
+ ctxt->version = NULL;
54
+ ctxt->encoding = NULL;
55
+ ctxt->standalone = -1;
56
+ ctxt->instate = XML_PARSER_START;
57
+
58
+ ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
59
+ if (ctxt->nodeTab == NULL) {
60
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
61
+ ctxt->nodeNr = 0;
62
+ ctxt->nodeMax = 0;
63
+ ctxt->node = NULL;
64
+ ctxt->inputNr = 0;
65
+ ctxt->inputMax = 0;
66
+ ctxt->input = NULL;
67
+ return(-1);
68
+ }
69
+ ctxt->nodeNr = 0;
70
+ ctxt->nodeMax = 10;
71
+ ctxt->node = NULL;
72
+
73
+ ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
74
+ if (ctxt->nameTab == NULL) {
75
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
76
+ ctxt->nameNr = 0;
77
+ ctxt->nameMax = 10;
78
+ ctxt->name = NULL;
79
+ ctxt->nodeNr = 0;
80
+ ctxt->nodeMax = 0;
81
+ ctxt->node = NULL;
82
+ ctxt->inputNr = 0;
83
+ ctxt->inputMax = 0;
84
+ ctxt->input = NULL;
85
+ return(-1);
86
+ }
87
+ ctxt->nameNr = 0;
88
+ ctxt->nameMax = 10;
89
+ ctxt->name = NULL;
90
+
91
+ if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
92
+ else {
93
+ ctxt->sax = sax;
94
+ memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
95
+ }
96
+ ctxt->userData = ctxt;
97
+ ctxt->myDoc = NULL;
98
+ ctxt->wellFormed = 1;
99
+ ctxt->replaceEntities = 0;
100
+ ctxt->linenumbers = xmlLineNumbersDefaultValue;
101
+ ctxt->html = 1;
102
+ ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
103
+ ctxt->vctxt.userData = ctxt;
104
+ ctxt->vctxt.error = xmlParserValidityError;
105
+ ctxt->vctxt.warning = xmlParserValidityWarning;
106
+ ctxt->record_info = 0;
107
+ ctxt->validate = 0;
108
+ ctxt->nbChars = 0;
109
+ ctxt->checkIndex = 0;
110
+ ctxt->catalogs = NULL;
111
+ xmlInitNodeInfoSeq(&ctxt->node_seq);
112
+ return(0);
113
+ }
114
+
115
+ static htmlParserCtxtPtr htmlNewParserCtxt(void)
116
+ {
117
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
118
+ if (ctxt == NULL) {
119
+ rb_raise(rb_eNoMemError, "NewParserCtxt: out of memory\n");
120
+ return(NULL);
121
+ }
122
+ memset(ctxt, 0, sizeof(xmlParserCtxt));
123
+ if (htmlInitParserCtxt(ctxt) < 0) {
124
+ htmlFreeParserCtxt(ctxt);
125
+ return(NULL);
126
+ }
127
+ return(ctxt);
128
+ }
129
+ #endif
130
+
131
+ static void rxml_html_parser_context_free(htmlParserCtxtPtr ctxt)
132
+ {
133
+ htmlFreeParserCtxt(ctxt);
134
+ }
135
+
136
+ static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt)
137
+ {
138
+ return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
139
+ }
140
+
141
+ /* call-seq:
142
+ * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
143
+ *
144
+ * Creates a new parser context based on the specified file or uri.
145
+ *
146
+ * Parameters:
147
+ *
148
+ * file - A filename or uri.
149
+ */
150
+ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
151
+ {
152
+ htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
153
+ if (!ctxt)
154
+ rxml_raise(&xmlLastError);
155
+
156
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
157
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
158
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
159
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
160
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
161
+
162
+ return rxml_html_parser_context_wrap(ctxt);
163
+ }
164
+
165
+ /* call-seq:
166
+ * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
167
+ *
168
+ * Creates a new parser context based on the specified io object.
169
+ *
170
+ * Parameters:
171
+ *
172
+ * io - A ruby IO object.
173
+ */
174
+ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
175
+ {
176
+ VALUE result;
177
+ htmlParserCtxtPtr ctxt;
178
+ xmlParserInputBufferPtr input;
179
+ xmlParserInputPtr stream;
180
+
181
+ if (NIL_P(io))
182
+ rb_raise(rb_eTypeError, "Must pass in an IO object");
183
+
184
+ input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
185
+ (void*)io, XML_CHAR_ENCODING_NONE);
186
+
187
+ ctxt = htmlNewParserCtxt();
188
+ if (!ctxt)
189
+ {
190
+ xmlFreeParserInputBuffer(input);
191
+ rxml_raise(&xmlLastError);
192
+ }
193
+
194
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
195
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
196
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
197
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
198
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
199
+
200
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
201
+
202
+ if (!stream)
203
+ {
204
+ xmlFreeParserInputBuffer(input);
205
+ xmlFreeParserCtxt(ctxt);
206
+ rxml_raise(&xmlLastError);
207
+ }
208
+ inputPush(ctxt, stream);
209
+ result = rxml_html_parser_context_wrap(ctxt);
210
+
211
+ /* Attach io object to parser so it won't get freed.*/
212
+ rb_ivar_set(result, IO_ATTR, io);
213
+
214
+ return result;
215
+ }
216
+
217
+ /* call-seq:
218
+ * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
219
+ *
220
+ * Creates a new parser context based on the specified string.
221
+ *
222
+ * Parameters:
223
+ *
224
+ * string - A string that contains the data to parse.
225
+ */
226
+ static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
227
+ {
228
+ htmlParserCtxtPtr ctxt;
229
+ Check_Type(string, T_STRING);
230
+
231
+ if (RSTRING_LEN(string) == 0)
232
+ rb_raise(rb_eArgError, "Must specify a string with one or more characters");
233
+
234
+ ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
235
+ (int)RSTRING_LEN(string));
236
+ if (!ctxt)
237
+ rxml_raise(&xmlLastError);
238
+
239
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
240
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
241
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
242
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
243
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
244
+
245
+ htmlDefaultSAXHandlerInit();
246
+ if (ctxt->sax != NULL)
247
+ memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
248
+
249
+ return rxml_html_parser_context_wrap(ctxt);
250
+ }
251
+
252
+ /*
253
+ * call-seq:
254
+ * context.close -> nil
255
+ *
256
+ * Closes the underlying input streams. This is useful when parsing a large amount of
257
+ * files and you want to close the files without relying on Ruby's garbage collector
258
+ * to run.
259
+ */
260
+ static VALUE rxml_html_parser_context_close(VALUE self)
261
+ {
262
+ htmlParserCtxtPtr ctxt;
263
+ xmlParserInputPtr xinput;
264
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
265
+
266
+ while ((xinput = inputPop(ctxt)) != NULL)
267
+ {
268
+ xmlFreeInputStream(xinput);
269
+ }
270
+ return Qnil;
271
+ }
272
+
273
+ /*
274
+ * call-seq:
275
+ * context.disable_cdata = (true|false)
276
+ *
277
+ * Control whether the CDATA nodes will be created in this context.
278
+ */
279
+ static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE value)
280
+ {
281
+ htmlParserCtxtPtr ctxt;
282
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
283
+
284
+ if (ctxt->sax == NULL)
285
+ rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
286
+
287
+ /* LibXML controls this internally with the default SAX handler. */
288
+ if (value)
289
+ ctxt->sax->cdataBlock = NULL;
290
+ else
291
+ ctxt->sax->cdataBlock = htmlDefaultSAXHandler.cdataBlock;
292
+
293
+ return value;
294
+ }
295
+
296
+ /*
297
+ * call-seq:
298
+ * context.options = XML::Parser::Options::NOENT |
299
+ XML::Parser::Options::NOCDATA
300
+ *
301
+ * Provides control over the execution of a parser. Valid values
302
+ * are the constants defined on XML::Parser::Options. Multiple
303
+ * options can be combined by using Bitwise OR (|).
304
+ */
305
+ static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
306
+ {
307
+ int xml_options = NUM2INT(options);
308
+ htmlParserCtxtPtr ctxt;
309
+ Check_Type(options, T_FIXNUM);
310
+
311
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
312
+ htmlCtxtUseOptions(ctxt, xml_options);
313
+
314
+ #if LIBXML_VERSION >= 20707
315
+ /* Big hack here, but htmlCtxtUseOptions doens't support HTML_PARSE_NOIMPLIED.
316
+ So do it ourselves. There must be a better way??? */
317
+ if (xml_options & HTML_PARSE_NOIMPLIED)
318
+ {
319
+ ctxt->options |= HTML_PARSE_NOIMPLIED;
320
+ }
321
+ #endif
322
+
323
+ return self;
324
+ }
325
+
326
+ void rxml_init_html_parser_context(void)
327
+ {
328
+ IO_ATTR = ID2SYM(rb_intern("@io"));
329
+ cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
330
+
331
+ rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
332
+ rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
333
+ rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
334
+ rb_define_method(cXMLHtmlParserContext, "close", rxml_html_parser_context_close, 0);
335
+ rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
336
+ rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
337
+ }