libxml-ruby 0.3.8.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/CHANGELOG +6 -0
  2. data/LICENSE +1 -1
  3. data/README +1 -1
  4. data/Rakefile +8 -5
  5. data/TODO +1 -1
  6. data/ext/xml/extconf.rb +4 -5
  7. data/ext/xml/libxml.c +5 -2
  8. data/ext/xml/libxml.h +16 -7
  9. data/ext/xml/libxml.rb +3 -3
  10. data/ext/xml/ruby_xml_attr.c +118 -99
  11. data/ext/xml/ruby_xml_attr.h +4 -7
  12. data/ext/xml/ruby_xml_document.c +131 -170
  13. data/ext/xml/ruby_xml_document.h +5 -9
  14. data/ext/xml/ruby_xml_html_parser.c +453 -0
  15. data/ext/xml/ruby_xml_html_parser.h +29 -0
  16. data/ext/xml/ruby_xml_node.c +219 -253
  17. data/ext/xml/ruby_xml_node.h +4 -7
  18. data/ext/xml/ruby_xml_node_set.c +6 -6
  19. data/ext/xml/ruby_xml_node_set.h +1 -1
  20. data/ext/xml/ruby_xml_ns.c +1 -1
  21. data/ext/xml/ruby_xml_ns.h +1 -1
  22. data/ext/xml/ruby_xml_parser.c +5 -8
  23. data/ext/xml/ruby_xml_parser.h +1 -1
  24. data/ext/xml/ruby_xml_parser_context.c +3 -4
  25. data/ext/xml/ruby_xml_parser_context.h +1 -1
  26. data/ext/xml/ruby_xml_reader.c +893 -0
  27. data/ext/xml/ruby_xml_reader.h +14 -0
  28. data/ext/xml/ruby_xml_sax_parser.c +255 -204
  29. data/ext/xml/ruby_xml_sax_parser.h +6 -2
  30. data/ext/xml/ruby_xml_tree.c +1 -1
  31. data/ext/xml/ruby_xml_tree.h +1 -1
  32. data/ext/xml/ruby_xml_xinclude.c +1 -1
  33. data/ext/xml/ruby_xml_xinclude.h +1 -1
  34. data/ext/xml/ruby_xml_xpath.c +3 -2
  35. data/ext/xml/ruby_xml_xpath.h +1 -1
  36. data/ext/xml/ruby_xml_xpath_context.c +4 -4
  37. data/ext/xml/ruby_xml_xpath_context.h +1 -1
  38. data/ext/xml/ruby_xml_xpointer.c +10 -4
  39. data/ext/xml/ruby_xml_xpointer.h +1 -1
  40. data/ext/xml/ruby_xml_xpointer_context.c +1 -1
  41. data/ext/xml/ruby_xml_xpointer_context.h +1 -1
  42. data/ext/xml/sax_parser_callbacks.inc +55 -54
  43. data/tests/model/rubynet_project +1 -1
  44. data/tests/model/simple.xml +7 -0
  45. data/tests/tc_xml_document.rb +1 -1
  46. data/tests/tc_xml_document_write.rb +1 -1
  47. data/tests/tc_xml_document_write2.rb +1 -1
  48. data/tests/tc_xml_document_write3.rb +1 -1
  49. data/tests/tc_xml_html_parser.rb +60 -0
  50. data/tests/tc_xml_node.rb +1 -1
  51. data/tests/tc_xml_node2.rb +1 -1
  52. data/tests/tc_xml_node3.rb +1 -1
  53. data/tests/tc_xml_node4.rb +8 -5
  54. data/tests/tc_xml_node5.rb +1 -1
  55. data/tests/tc_xml_node6.rb +1 -1
  56. data/tests/tc_xml_node7.rb +1 -1
  57. data/tests/tc_xml_node_set.rb +1 -1
  58. data/tests/tc_xml_node_set2.rb +1 -1
  59. data/tests/tc_xml_node_xlink.rb +1 -1
  60. data/tests/tc_xml_parser.rb +5 -1
  61. data/tests/tc_xml_parser2.rb +1 -1
  62. data/tests/tc_xml_parser3.rb +1 -1
  63. data/tests/tc_xml_parser4.rb +1 -1
  64. data/tests/tc_xml_parser5.rb +1 -1
  65. data/tests/tc_xml_parser6.rb +1 -1
  66. data/tests/tc_xml_parser7.rb +1 -1
  67. data/tests/tc_xml_parser8.rb +1 -1
  68. data/tests/tc_xml_parser_context.rb +1 -1
  69. data/tests/tc_xml_reader.rb +101 -0
  70. data/tests/tc_xml_sax_parser.rb +95 -0
  71. data/tests/tc_xml_xinclude.rb +1 -1
  72. data/tests/tc_xml_xpath.rb +1 -1
  73. data/tests/tc_xml_xpointer.rb +1 -1
  74. metadata +79 -73
  75. data/ext/xml/ruby_xml_attribute.c +0 -224
  76. data/ext/xml/ruby_xml_attribute.h +0 -21
  77. data/tests/test_xml_sax_parser.rb +0 -64
@@ -0,0 +1,453 @@
1
+ /* $Id: ruby_xml_html_parser.c 138 2007-08-29 18:00:35Z danj $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "libxml.h"
6
+
7
+ VALUE cXMLHTMLParser;
8
+
9
+ //static int
10
+ //ctxtRead(FILE *f, char * buf, int len) {
11
+ // return(fread(buf, 1, len, f));
12
+ //}
13
+
14
+
15
+ /*
16
+ * call-seq:
17
+ * parser.filename => "filename"
18
+ *
19
+ * Obtain the filename this parser will read from.
20
+ */
21
+ /*
22
+ VALUE
23
+ ruby_xml_html_parser_filename_get(VALUE self) {
24
+ ruby_xml_html_parser *rxp;
25
+ rx_file_data *data;
26
+
27
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
+ if (rxp->data == NULL)
29
+ return(Qnil);
30
+
31
+ if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
+ return(Qnil);
33
+
34
+ data = (rx_file_data *)rxp->data;
35
+ return(data->filename);
36
+ }
37
+ */
38
+
39
+ /*
40
+ * call-seq:
41
+ * parser.filename = "filename"
42
+ *
43
+ * Set the filename this parser will read from.
44
+ */
45
+ /*
46
+ VALUE
47
+ ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
+ ruby_xml_html_parser *rxp;
49
+ ruby_xml_parser_context *rxpc;
50
+ rx_file_data *data;
51
+
52
+ Check_Type(filename, T_STRING);
53
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
+
55
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
+ if (rxp->data != NULL)
57
+ rb_fatal("crap, this should be null");
58
+
59
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
+ data = ALLOC(rx_file_data);
61
+ rxp->data = data;
62
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
+ return(Qnil);
64
+ }
65
+
66
+ rxp->ctxt = ruby_xml_parser_context_new3();
67
+ data = (rx_file_data *)rxp->data;
68
+ data->filename = filename;
69
+
70
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
+ rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
+ if (rxpc->ctxt == NULL)
73
+ rb_sys_fail(StringValuePtr(filename));
74
+
75
+ return(data->filename);
76
+ }
77
+ */
78
+
79
+ void
80
+ ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
+ void *data;
82
+
83
+ ruby_xml_parser_count--;
84
+ if (ruby_xml_parser_count == 0)
85
+ xmlCleanupParser();
86
+
87
+ switch(rxp->data_type) {
88
+ case RUBY_LIBXML_SRC_TYPE_NULL:
89
+ break;
90
+ case RUBY_LIBXML_SRC_TYPE_FILE:
91
+ data = (void *)(rx_file_data *)rxp->data;
92
+ free((rx_file_data *)data);
93
+ break;
94
+ case RUBY_LIBXML_SRC_TYPE_STRING:
95
+ data = (void *)(rx_string_data *)rxp->data;
96
+ free((rx_string_data *)data);
97
+ break;
98
+ case RUBY_LIBXML_SRC_TYPE_IO:
99
+ data = (void *)(rx_io_data *)rxp->data;
100
+ free((rx_io_data *)data);
101
+ break;
102
+ default:
103
+ rb_fatal("Unknown data type, %d", rxp->data_type);
104
+ }
105
+
106
+ free(rxp);
107
+ }
108
+
109
+
110
+ /*
111
+ * call-seq:
112
+ * parser.io => IO
113
+ *
114
+ * Obtain the IO instance this parser works with.
115
+ */
116
+ /*
117
+ VALUE
118
+ ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
119
+ ruby_xml_html_parser *rxp;
120
+ rx_io_data *data;
121
+
122
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
123
+
124
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
125
+ rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
126
+ rxp->data == NULL)
127
+ return(Qnil);
128
+
129
+ data = (rx_io_data *)rxp->data;
130
+
131
+ return(data->io);
132
+ }
133
+ */
134
+
135
+ /*
136
+ * call-seq:
137
+ * parser.io = IO
138
+ *
139
+ * Set the IO instance this parser works with.
140
+ */
141
+ /*
142
+ VALUE
143
+ ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
144
+ ruby_xml_html_parser *rxp;
145
+ ruby_xml_parser_context *rxpc;
146
+ rx_io_data *data;
147
+ OpenFile *fptr;
148
+ FILE *f;
149
+
150
+ if (!rb_obj_is_kind_of(io, rb_cIO))
151
+ rb_raise(rb_eTypeError, "need an IO object");
152
+
153
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
154
+
155
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
156
+ if (rxp->data != NULL)
157
+ rb_fatal("crap, this should be null");
158
+
159
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
160
+ data = ALLOC(rx_io_data);
161
+ rxp->data = data;
162
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
163
+ return(Qnil);
164
+ }
165
+
166
+ rxp->ctxt = ruby_xml_parser_context_new3();
167
+ data = (rx_io_data *)rxp->data;
168
+ data->io = io;
169
+
170
+ GetOpenFile(io, fptr);
171
+ rb_io_check_readable(fptr);
172
+ f = GetWriteFile(fptr);
173
+
174
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
175
+ rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
176
+ (xmlInputReadCallback) ctxtRead,
177
+ NULL, f, XML_CHAR_ENCODING_NONE);
178
+ if (NIL_P(rxpc->ctxt))
179
+ rb_sys_fail(0);
180
+
181
+ return(data->io);
182
+ }
183
+ */
184
+
185
+ void
186
+ ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
187
+ if (rxp == NULL) return;
188
+ if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
189
+
190
+ switch(rxp->data_type) {
191
+ case RUBY_LIBXML_SRC_TYPE_NULL:
192
+ break;
193
+ case RUBY_LIBXML_SRC_TYPE_FILE:
194
+ if (!NIL_P(((rx_file_data *)rxp->data)->filename))
195
+ rb_gc_mark(((rx_file_data *)rxp->data)->filename);
196
+ break;
197
+ case RUBY_LIBXML_SRC_TYPE_STRING:
198
+ if (!NIL_P(((rx_string_data *)rxp->data)->str))
199
+ rb_gc_mark(((rx_string_data *)rxp->data)->str);
200
+ break;
201
+ case RUBY_LIBXML_SRC_TYPE_IO:
202
+ if (!NIL_P(((rx_io_data *)rxp->data)->io))
203
+ rb_gc_mark(((rx_io_data *)rxp->data)->io);
204
+ break;
205
+ default:
206
+ rb_fatal("unknown datatype: %d", rxp->data_type);
207
+ }
208
+ }
209
+
210
+
211
+ /*
212
+ * call-seq:
213
+ * XML::HTMLParser.new => parser
214
+ *
215
+ * Create a new parser instance with no pre-determined source.
216
+ */
217
+ VALUE
218
+ ruby_xml_html_parser_new(VALUE class) {
219
+ ruby_xml_html_parser *rxp;
220
+
221
+ ruby_xml_parser_count++;
222
+ rxp = ALLOC(ruby_xml_html_parser);
223
+ rxp->ctxt = Qnil;
224
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
225
+ rxp->data = NULL;
226
+ rxp->parsed = 0;
227
+
228
+ return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
229
+ ruby_xml_html_parser_free, rxp));
230
+ }
231
+
232
+
233
+ /*
234
+ * call-seq:
235
+ * XML::HTMLParser.file => parser
236
+ *
237
+ * Create a new parser instance that will read the specified file.
238
+ */
239
+ /*
240
+ VALUE
241
+ ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
242
+ VALUE obj;
243
+ ruby_xml_html_parser *rxp;
244
+ rx_file_data *data;
245
+
246
+ obj = ruby_xml_html_parser_new(class);
247
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
248
+
249
+ data = ALLOC(rx_file_data);
250
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
251
+ rxp->data = data;
252
+
253
+ ruby_xml_html_parser_filename_set(obj, filename);
254
+
255
+ return(obj);
256
+ }
257
+ */
258
+
259
+ /*
260
+ * call-seq:
261
+ * XML::HTMLParser.io => parser
262
+ *
263
+ * Create a new parser instance that will read from the
264
+ * specified IO object.
265
+ */
266
+ /*
267
+ VALUE
268
+ ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
269
+ VALUE obj;
270
+ ruby_xml_html_parser *rxp;
271
+ rx_io_data *data;
272
+
273
+ obj = ruby_xml_html_parser_new(class);
274
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
275
+
276
+ data = ALLOC(rx_io_data);
277
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
278
+ rxp->data = data;
279
+
280
+ ruby_xml_html_parser_io_set(obj, io);
281
+
282
+ return(obj);
283
+ }
284
+ */
285
+
286
+ /*
287
+ * call-seq:
288
+ * XML::HTMLParser.string => parser
289
+ *
290
+ * Create a new parser instance that will parse the given
291
+ * string.
292
+ */
293
+ VALUE
294
+ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
295
+ VALUE obj;
296
+ ruby_xml_html_parser *rxp;
297
+ rx_string_data *data;
298
+
299
+ obj = ruby_xml_html_parser_new(class);
300
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
301
+
302
+ data = ALLOC(rx_string_data);
303
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
304
+ rxp->data = data;
305
+
306
+ ruby_xml_html_parser_str_set(obj, str);
307
+
308
+ return(obj);
309
+ }
310
+
311
+
312
+ /*
313
+ * call-seq:
314
+ * parser.parse => document
315
+ *
316
+ * Parse the input XML and create an XML::Document with
317
+ * it's content. If an error occurs, XML::Parser::ParseError
318
+ * is thrown.
319
+ */
320
+ VALUE
321
+ ruby_xml_html_parser_parse(VALUE self) {
322
+ ruby_xml_document_t *rxd;
323
+ ruby_xml_html_parser *rxp;
324
+ ruby_xml_parser_context *rxpc;
325
+ htmlDocPtr xdp;
326
+ VALUE doc;
327
+
328
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
329
+
330
+ switch (rxp->data_type) {
331
+ case RUBY_LIBXML_SRC_TYPE_NULL:
332
+ return(Qnil);
333
+ case RUBY_LIBXML_SRC_TYPE_STRING:
334
+ //case RUBY_LIBXML_SRC_TYPE_FILE:
335
+ //case RUBY_LIBXML_SRC_TYPE_IO:
336
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
337
+
338
+ /* don't check return values here, the HTML parser returns errors
339
+ * but still allows the resulting tree to be used.
340
+ */
341
+ htmlParseDocument(rxpc->ctxt);
342
+ xdp = rxpc->ctxt->myDoc;
343
+ rxp->parsed = 1;
344
+
345
+ doc = ruby_xml_document_wrap(cXMLDocument, xdp);
346
+ break;
347
+ default:
348
+ rb_fatal("Unknown data type, %d", rxp->data_type);
349
+ }
350
+
351
+ return(doc);
352
+ }
353
+
354
+
355
+ /*
356
+ * call-seq:
357
+ * parser.context => context
358
+ *
359
+ * Obtain the XML::Parser::Context associated with this
360
+ * parser.
361
+ */
362
+ VALUE
363
+ ruby_xml_html_parser_context_get(VALUE self) {
364
+ ruby_xml_html_parser *rxp;
365
+
366
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
367
+ if (rxp->ctxt == Qnil)
368
+ return(Qnil);
369
+ else
370
+ return(rxp->ctxt);
371
+ }
372
+
373
+
374
+ /*
375
+ * call-seq:
376
+ * parser.string => "string"
377
+ *
378
+ * Obtain the string this parser works with.
379
+ */
380
+ VALUE
381
+ ruby_xml_html_parser_str_get(VALUE self) {
382
+ ruby_xml_html_parser *rxp;
383
+ rx_string_data *data;
384
+
385
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
386
+ if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
387
+ return(Qnil);
388
+
389
+ data = (rx_string_data *)rxp->data;
390
+ return(data->str);
391
+ }
392
+
393
+
394
+ /*
395
+ * call-seq:
396
+ * parser.string = "string"
397
+ *
398
+ * Set the string this parser works with.
399
+ */
400
+ VALUE
401
+ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
402
+ ruby_xml_html_parser *rxp;
403
+ ruby_xml_parser_context *rxpc;
404
+ rx_string_data *data;
405
+
406
+ Check_Type(str, T_STRING);
407
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
408
+
409
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
410
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
411
+ data = ALLOC(rx_string_data);
412
+ rxp->data = data;
413
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
414
+ return(Qnil);
415
+ }
416
+
417
+ rxp->ctxt = ruby_xml_parser_context_new3();
418
+ data = (rx_string_data *)rxp->data;
419
+ data->str = str;
420
+
421
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
422
+ rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
423
+
424
+ return(data->str);
425
+ }
426
+
427
+
428
+ // Rdoc needs to know
429
+ #ifdef RDOC_NEVER_DEFINED
430
+ mXML = rb_define_module("XML");
431
+ #endif
432
+
433
+ void
434
+ ruby_init_html_parser(void) {
435
+ cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
436
+
437
+ /*
438
+ rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
439
+ rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
440
+ */
441
+ rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
442
+ rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
443
+ /*
444
+ rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
445
+ rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
446
+ rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
447
+ rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
448
+ */
449
+ rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
450
+ rb_define_method(cXMLHTMLParser, "parser_context", ruby_xml_html_parser_context_get, 0);
451
+ rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
452
+ rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
453
+ }
@@ -0,0 +1,29 @@
1
+ /* $Id: ruby_xml_html_parser.h 111 2006-11-20 01:39:14Z roscopeco $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RUBY_XML_HTML_PARSER__
6
+ #define __RUBY_XML_HTML_PARSER__
7
+
8
+ extern int ruby_xml_html_parser_count;
9
+ extern VALUE cXMLHTMLParser;
10
+
11
+ typedef struct ruby_xml_html_parser {
12
+ VALUE ctxt;
13
+ int parsed;
14
+ void *data;
15
+ int data_type;
16
+ } ruby_xml_html_parser;
17
+
18
+ /*
19
+ * VALUE ruby_xml_html_parser_filename_get(VALUE self);
20
+ VALUE ruby_xml_html_parser_filename_set(VALUE self, VALUE filename);
21
+ VALUE ruby_xml_html_parser_new(VALUE class);
22
+ */
23
+ VALUE ruby_xml_html_parser_parse(VALUE self);
24
+ VALUE ruby_xml_html_parser_str_get(VALUE self);
25
+ VALUE ruby_xml_html_parser_str_set(VALUE self, VALUE str);
26
+
27
+ void ruby_init_html_parser(void);
28
+
29
+ #endif