libxml-ruby 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. data/LICENSE +23 -23
  2. data/README +144 -144
  3. data/ext/libxml/extconf.rb +26 -27
  4. data/ext/libxml/libxml.c +7 -37
  5. data/ext/libxml/{libxml.h → ruby_libxml.h} +93 -98
  6. data/ext/libxml/ruby_xml_attr.c +405 -387
  7. data/ext/libxml/ruby_xml_attr.h +19 -18
  8. data/ext/libxml/ruby_xml_document.c +1111 -1115
  9. data/ext/libxml/ruby_xml_document.h +27 -24
  10. data/ext/libxml/ruby_xml_dtd.c +168 -168
  11. data/ext/libxml/ruby_xml_html_parser.c +449 -450
  12. data/ext/libxml/ruby_xml_html_parser.h +1 -1
  13. data/ext/libxml/ruby_xml_input_cbg.c +158 -158
  14. data/ext/libxml/ruby_xml_node.c +2410 -2395
  15. data/ext/libxml/ruby_xml_node.h +1 -1
  16. data/ext/libxml/ruby_xml_node_set.c +170 -170
  17. data/ext/libxml/ruby_xml_node_set.h +1 -1
  18. data/ext/libxml/ruby_xml_ns.c +153 -153
  19. data/ext/libxml/ruby_xml_ns.h +1 -1
  20. data/ext/libxml/ruby_xml_parser.c +1425 -1422
  21. data/ext/libxml/ruby_xml_parser.h +1 -1
  22. data/ext/libxml/ruby_xml_parser_context.c +750 -716
  23. data/ext/libxml/ruby_xml_parser_context.h +1 -1
  24. data/ext/libxml/ruby_xml_reader.c +900 -896
  25. data/ext/libxml/ruby_xml_sax_parser.c +485 -485
  26. data/ext/libxml/ruby_xml_sax_parser.h +1 -1
  27. data/ext/libxml/ruby_xml_schema.c +146 -142
  28. data/ext/libxml/ruby_xml_state.c +5 -6
  29. data/ext/libxml/ruby_xml_state.h +1 -0
  30. data/ext/libxml/ruby_xml_tree.c +43 -43
  31. data/ext/libxml/ruby_xml_tree.h +1 -1
  32. data/ext/libxml/ruby_xml_xinclude.c +20 -20
  33. data/ext/libxml/ruby_xml_xinclude.h +1 -1
  34. data/ext/libxml/ruby_xml_xpath.c +243 -252
  35. data/ext/libxml/ruby_xml_xpath.h +1 -1
  36. data/ext/libxml/ruby_xml_xpath_context.c +118 -118
  37. data/ext/libxml/ruby_xml_xpath_context.h +1 -1
  38. data/ext/libxml/ruby_xml_xpath_object.c +43 -29
  39. data/ext/libxml/ruby_xml_xpath_object.h +0 -1
  40. data/ext/libxml/ruby_xml_xpointer.c +100 -100
  41. data/ext/libxml/ruby_xml_xpointer.h +1 -1
  42. data/ext/libxml/ruby_xml_xpointer_context.c +21 -21
  43. data/ext/libxml/ruby_xml_xpointer_context.h +1 -1
  44. data/ext/libxml/sax_parser_callbacks.inc +213 -213
  45. data/ext/libxml/version.h +9 -9
  46. data/lib/libxml.rb +24 -3
  47. data/mingw/libiconv-2.dll +0 -0
  48. data/mingw/libxml2-2.dll +0 -0
  49. data/mingw/libxml_ruby.so +0 -0
  50. data/mingw/mingw.rake +36 -0
  51. data/test/dtd-test.rb +24 -24
  52. data/test/etc_doc_to_s.rb +1 -3
  53. data/test/ets_copy_bug.rb +21 -21
  54. data/test/ets_copy_bug2.rb +32 -32
  55. data/test/ets_copy_bug3.rb +38 -0
  56. data/test/ets_doc_file.rb +1 -0
  57. data/test/{model/default_validation_bug.rb → gc.log} +0 -0
  58. data/test/merge_bug.rb +55 -55
  59. data/test/schema-test.rb +74 -74
  60. data/test/tc_well_formed.rb +11 -0
  61. data/test/tc_xml_document.rb +52 -52
  62. data/test/tc_xml_document_write.rb +24 -24
  63. data/test/tc_xml_document_write2.rb +54 -54
  64. data/test/tc_xml_document_write3.rb +96 -96
  65. data/test/tc_xml_html_parser.rb +63 -63
  66. data/test/tc_xml_node.rb +59 -59
  67. data/test/tc_xml_node2.rb +25 -25
  68. data/test/tc_xml_node3.rb +27 -27
  69. data/test/tc_xml_node4.rb +86 -86
  70. data/test/tc_xml_node5.rb +52 -52
  71. data/test/tc_xml_node6.rb +27 -27
  72. data/test/tc_xml_node7.rb +35 -35
  73. data/test/tc_xml_node8.rb +32 -32
  74. data/test/tc_xml_node9.rb +32 -32
  75. data/test/tc_xml_node_set.rb +24 -24
  76. data/test/tc_xml_node_set2.rb +37 -37
  77. data/test/tc_xml_node_xlink.rb +28 -28
  78. data/test/tc_xml_parser.rb +190 -178
  79. data/test/tc_xml_parser2.rb +16 -17
  80. data/test/tc_xml_parser3.rb +23 -23
  81. data/test/tc_xml_parser4.rb +33 -33
  82. data/test/tc_xml_parser5.rb +27 -27
  83. data/test/tc_xml_parser6.rb +23 -23
  84. data/test/tc_xml_parser7.rb +28 -28
  85. data/test/tc_xml_parser8.rb +32 -32
  86. data/test/tc_xml_parser9.rb +11 -0
  87. data/test/tc_xml_parser_context.rb +88 -88
  88. data/test/tc_xml_reader.rb +112 -109
  89. data/test/tc_xml_sax_parser.rb +104 -94
  90. data/test/tc_xml_sax_parser2.rb +51 -0
  91. data/test/tc_xml_xinclude.rb +30 -30
  92. data/test/tc_xml_xpath.rb +38 -38
  93. data/test/tc_xml_xpath2.rb +14 -0
  94. data/test/tc_xml_xpointer.rb +78 -78
  95. data/vc/libxml.sln +20 -0
  96. data/vc/libxml.vcproj +389 -0
  97. data/work/Rakefile +247 -0
  98. data/work/task/make +26 -0
  99. data/work/task/memory +37 -0
  100. data/work/task/rdoc +39 -0
  101. data/work/task/setup +1616 -0
  102. data/work/task/test +29 -0
  103. data/work/test/ets_runner.rb +33 -0
  104. data/work/test/libxml_test.rb +3 -0
  105. data/work/test/runner.rb +0 -0
  106. data/work/test/runner_ets.rb +33 -0
  107. data/work/vc/debug/libxml.exp +0 -0
  108. data/work/vc/debug/libxml.ilk +0 -0
  109. data/work/vc/debug/libxml.lib +0 -0
  110. data/work/vc/debug/libxml.pdb +0 -0
  111. data/work/vc/debug/libxml.so +0 -0
  112. metadata +158 -189
  113. data/MANIFEST +0 -138
  114. data/NOTES +0 -9
  115. data/Rakefile +0 -38
  116. data/TODO +0 -75
  117. data/VERSION +0 -1
  118. data/log/Changelog-0.txt +0 -426
  119. data/log/Changelog.txt +0 -435
  120. data/meta/project.yaml +0 -27
  121. data/meta/unixname +0 -1
  122. data/setup.rb +0 -1472
  123. data/site/css/normal.css +0 -182
  124. data/site/img/raze-tiny.png +0 -0
  125. data/site/img/red-cube.jpg +0 -0
  126. data/site/img/xml-ruby.png +0 -0
  127. data/site/index.xml +0 -43
  128. data/site/install.xml +0 -77
  129. data/site/layout.rhtml +0 -38
  130. data/site/layout.xsl +0 -67
  131. data/site/license.xml +0 -32
  132. data/site/log/changelog.xml +0 -1324
  133. data/site/log/changelog.xsl +0 -42
  134. data/test/model/merge_bug_data.xml +0 -58
  135. data/test/model/rubynet.xml +0 -78
  136. data/test/model/rubynet_project +0 -13
  137. data/test/model/saxtest.xml +0 -5
  138. data/test/model/simple.xml +0 -7
  139. data/test/model/xinclude.xml +0 -5
@@ -1,450 +1,449 @@
1
- /* $Id: ruby_xml_html_parser.c 225 2007-12-07 04:58:09Z transami $ */
2
-
3
- /* Please see the LICENSE file for copyright and distribution information */
4
-
5
- #include "libxml.h"
6
-
7
- VALUE cXMLHTMLParser;
8
-
9
- //static int
10
- //ctxtRead(FILE *f, char * buf, int len) {
11
- // return(fread(buf, 1, len, f));
12
- //}
13
-
14
-
15
- /*
16
- * call-seq:
17
- * parser.filename => "filename"
18
- *
19
- * Obtain the filename this parser will read from.
20
- */
21
- /*
22
- VALUE
23
- ruby_xml_html_parser_filename_get(VALUE self) {
24
- ruby_xml_html_parser *rxp;
25
- rx_file_data *data;
26
-
27
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
- if (rxp->data == NULL)
29
- return(Qnil);
30
-
31
- if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
- return(Qnil);
33
-
34
- data = (rx_file_data *)rxp->data;
35
- return(data->filename);
36
- }
37
- */
38
-
39
- /*
40
- * call-seq:
41
- * parser.filename = "filename"
42
- *
43
- * Set the filename this parser will read from.
44
- */
45
- /*
46
- VALUE
47
- ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
- ruby_xml_html_parser *rxp;
49
- ruby_xml_parser_context *rxpc;
50
- rx_file_data *data;
51
-
52
- Check_Type(filename, T_STRING);
53
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
-
55
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
- if (rxp->data != NULL)
57
- rb_fatal("crap, this should be null");
58
-
59
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
- data = ALLOC(rx_file_data);
61
- rxp->data = data;
62
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
- return(Qnil);
64
- }
65
-
66
- rxp->ctxt = ruby_xml_parser_context_new3();
67
- data = (rx_file_data *)rxp->data;
68
- data->filename = filename;
69
-
70
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
- rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
- if (rxpc->ctxt == NULL)
73
- rb_sys_fail(StringValuePtr(filename));
74
-
75
- return(data->filename);
76
- }
77
- */
78
-
79
- void
80
- ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
- void *data;
82
-
83
- switch(rxp->data_type) {
84
- case RUBY_LIBXML_SRC_TYPE_NULL:
85
- break;
86
- case RUBY_LIBXML_SRC_TYPE_FILE:
87
- data = (void *)(rx_file_data *)rxp->data;
88
- free((rx_file_data *)data);
89
- break;
90
- case RUBY_LIBXML_SRC_TYPE_STRING:
91
- data = (void *)(rx_string_data *)rxp->data;
92
- free((rx_string_data *)data);
93
- break;
94
- case RUBY_LIBXML_SRC_TYPE_IO:
95
- data = (void *)(rx_io_data *)rxp->data;
96
- free((rx_io_data *)data);
97
- break;
98
- default:
99
- rb_fatal("Unknown data type, %d", rxp->data_type);
100
- }
101
-
102
- free(rxp);
103
- }
104
-
105
-
106
- /*
107
- * call-seq:
108
- * parser.io => IO
109
- *
110
- * Obtain the IO instance this parser works with.
111
- */
112
- /*
113
- VALUE
114
- ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
- ruby_xml_html_parser *rxp;
116
- rx_io_data *data;
117
-
118
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
-
120
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
- rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
- rxp->data == NULL)
123
- return(Qnil);
124
-
125
- data = (rx_io_data *)rxp->data;
126
-
127
- return(data->io);
128
- }
129
- */
130
-
131
- /*
132
- * call-seq:
133
- * parser.io = IO
134
- *
135
- * Set the IO instance this parser works with.
136
- */
137
- /*
138
- VALUE
139
- ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
- ruby_xml_html_parser *rxp;
141
- ruby_xml_parser_context *rxpc;
142
- rx_io_data *data;
143
- OpenFile *fptr;
144
- FILE *f;
145
-
146
- if (!rb_obj_is_kind_of(io, rb_cIO))
147
- rb_raise(rb_eTypeError, "need an IO object");
148
-
149
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
-
151
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
- if (rxp->data != NULL)
153
- rb_fatal("crap, this should be null");
154
-
155
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
- data = ALLOC(rx_io_data);
157
- rxp->data = data;
158
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
- return(Qnil);
160
- }
161
-
162
- rxp->ctxt = ruby_xml_parser_context_new3();
163
- data = (rx_io_data *)rxp->data;
164
- data->io = io;
165
-
166
- GetOpenFile(io, fptr);
167
- rb_io_check_readable(fptr);
168
- f = GetWriteFile(fptr);
169
-
170
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
- rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
- (xmlInputReadCallback) ctxtRead,
173
- NULL, f, XML_CHAR_ENCODING_NONE);
174
- if (NIL_P(rxpc->ctxt))
175
- rb_sys_fail(0);
176
-
177
- return(data->io);
178
- }
179
- */
180
-
181
- void
182
- ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
- if (rxp == NULL) return;
184
- if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
185
-
186
- ruby_xml_state_marker();
187
-
188
- switch(rxp->data_type) {
189
- case RUBY_LIBXML_SRC_TYPE_NULL:
190
- break;
191
- case RUBY_LIBXML_SRC_TYPE_FILE:
192
- if (!NIL_P(((rx_file_data *)rxp->data)->filename))
193
- rb_gc_mark(((rx_file_data *)rxp->data)->filename);
194
- break;
195
- case RUBY_LIBXML_SRC_TYPE_STRING:
196
- if (!NIL_P(((rx_string_data *)rxp->data)->str))
197
- rb_gc_mark(((rx_string_data *)rxp->data)->str);
198
- break;
199
- case RUBY_LIBXML_SRC_TYPE_IO:
200
- if (!NIL_P(((rx_io_data *)rxp->data)->io))
201
- rb_gc_mark(((rx_io_data *)rxp->data)->io);
202
- break;
203
- default:
204
- rb_fatal("unknown datatype: %d", rxp->data_type);
205
- }
206
- }
207
-
208
-
209
- /*
210
- * call-seq:
211
- * XML::HTMLParser.new => parser
212
- *
213
- * Create a new parser instance with no pre-determined source.
214
- */
215
- VALUE
216
- ruby_xml_html_parser_new(VALUE class) {
217
- ruby_xml_html_parser *rxp;
218
-
219
- rxp = ALLOC(ruby_xml_html_parser);
220
- rxp->ctxt = Qnil;
221
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
222
- rxp->data = NULL;
223
- rxp->parsed = 0;
224
-
225
- return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
226
- ruby_xml_html_parser_free, rxp));
227
- }
228
-
229
-
230
- /*
231
- * call-seq:
232
- * XML::HTMLParser.file => parser
233
- *
234
- * Create a new parser instance that will read the specified file.
235
- */
236
- /*
237
- VALUE
238
- ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
239
- VALUE obj;
240
- ruby_xml_html_parser *rxp;
241
- rx_file_data *data;
242
-
243
- obj = ruby_xml_html_parser_new(class);
244
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
245
-
246
- data = ALLOC(rx_file_data);
247
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
248
- rxp->data = data;
249
-
250
- ruby_xml_html_parser_filename_set(obj, filename);
251
-
252
- return(obj);
253
- }
254
- */
255
-
256
- /*
257
- * call-seq:
258
- * XML::HTMLParser.io => parser
259
- *
260
- * Create a new parser instance that will read from the
261
- * specified IO object.
262
- */
263
- /*
264
- VALUE
265
- ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
266
- VALUE obj;
267
- ruby_xml_html_parser *rxp;
268
- rx_io_data *data;
269
-
270
- obj = ruby_xml_html_parser_new(class);
271
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
272
-
273
- data = ALLOC(rx_io_data);
274
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
275
- rxp->data = data;
276
-
277
- ruby_xml_html_parser_io_set(obj, io);
278
-
279
- return(obj);
280
- }
281
- */
282
-
283
- /*
284
- * call-seq:
285
- * XML::HTMLParser.string => parser
286
- *
287
- * Create a new parser instance that will parse the given
288
- * string.
289
- */
290
- VALUE
291
- ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
292
- VALUE obj;
293
- ruby_xml_html_parser *rxp;
294
- rx_string_data *data;
295
-
296
- obj = ruby_xml_html_parser_new(class);
297
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
298
-
299
- data = ALLOC(rx_string_data);
300
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
301
- rxp->data = data;
302
-
303
- ruby_xml_html_parser_str_set(obj, str);
304
-
305
- return(obj);
306
- }
307
-
308
-
309
- /*
310
- * call-seq:
311
- * parser.parse => document
312
- *
313
- * Parse the input XML and create an XML::Document with
314
- * it's content. If an error occurs, XML::Parser::ParseError
315
- * is thrown.
316
- */
317
- VALUE
318
- ruby_xml_html_parser_parse(VALUE self) {
319
- ruby_xml_document_t *rxd;
320
- ruby_xml_html_parser *rxp;
321
- ruby_xml_parser_context *rxpc;
322
- htmlDocPtr xdp;
323
- VALUE doc;
324
-
325
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
326
-
327
- switch (rxp->data_type) {
328
- case RUBY_LIBXML_SRC_TYPE_NULL:
329
- return(Qnil);
330
- case RUBY_LIBXML_SRC_TYPE_STRING:
331
- //case RUBY_LIBXML_SRC_TYPE_FILE:
332
- //case RUBY_LIBXML_SRC_TYPE_IO:
333
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
334
-
335
- /* don't check return values here, the HTML parser returns errors
336
- * but still allows the resulting tree to be used.
337
- */
338
- htmlParseDocument(rxpc->ctxt);
339
- xdp = rxpc->ctxt->myDoc;
340
- rxp->parsed = 1;
341
-
342
- doc = ruby_xml_document_wrap(cXMLDocument, xdp);
343
- break;
344
- default:
345
- rb_fatal("Unknown data type, %d", rxp->data_type);
346
- }
347
-
348
- return(doc);
349
- }
350
-
351
-
352
- /*
353
- * call-seq:
354
- * parser.context => context
355
- *
356
- * Obtain the XML::Parser::Context associated with this
357
- * parser.
358
- */
359
- VALUE
360
- ruby_xml_html_parser_context_get(VALUE self) {
361
- ruby_xml_html_parser *rxp;
362
-
363
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
364
- if (rxp->ctxt == Qnil)
365
- return(Qnil);
366
- else
367
- return(rxp->ctxt);
368
- }
369
-
370
-
371
- /*
372
- * call-seq:
373
- * parser.string => "string"
374
- *
375
- * Obtain the string this parser works with.
376
- */
377
- VALUE
378
- ruby_xml_html_parser_str_get(VALUE self) {
379
- ruby_xml_html_parser *rxp;
380
- rx_string_data *data;
381
-
382
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
383
- if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
384
- return(Qnil);
385
-
386
- data = (rx_string_data *)rxp->data;
387
- return(data->str);
388
- }
389
-
390
-
391
- /*
392
- * call-seq:
393
- * parser.string = "string"
394
- *
395
- * Set the string this parser works with.
396
- */
397
- VALUE
398
- ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
399
- ruby_xml_html_parser *rxp;
400
- ruby_xml_parser_context *rxpc;
401
- rx_string_data *data;
402
-
403
- Check_Type(str, T_STRING);
404
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
405
-
406
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
407
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
408
- data = ALLOC(rx_string_data);
409
- rxp->data = data;
410
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
411
- return(Qnil);
412
- }
413
-
414
- rxp->ctxt = ruby_xml_parser_context_new3();
415
- data = (rx_string_data *)rxp->data;
416
- data->str = str;
417
-
418
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
419
- rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
420
-
421
- return(data->str);
422
- }
423
-
424
-
425
- // Rdoc needs to know
426
- #ifdef RDOC_NEVER_DEFINED
427
- mXML = rb_define_module("XML");
428
- #endif
429
-
430
- void
431
- ruby_init_html_parser(void) {
432
- cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
433
-
434
- /*
435
- rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
436
- rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
437
- */
438
- rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
439
- rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
440
- /*
441
- rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
442
- rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
443
- rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
444
- rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
445
- */
446
- rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
447
- rb_define_method(cXMLHTMLParser, "parser_context", ruby_xml_html_parser_context_get, 0);
448
- rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
449
- rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
450
- }
1
+ /* $Id: ruby_xml_html_parser.c 300 2008-07-01 19:14:15Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+
7
+ VALUE cXMLHTMLParser;
8
+
9
+ //static int
10
+ //ctxtRead(FILE *f, char * buf, int len) {
11
+ // return(fread(buf, 1, len, f));
12
+ //}
13
+
14
+
15
+ /*
16
+ * call-seq:
17
+ * parser.filename => "filename"
18
+ *
19
+ * Obtain the filename this parser will read from.
20
+ */
21
+ /*
22
+ VALUE
23
+ ruby_xml_html_parser_filename_get(VALUE self) {
24
+ ruby_xml_html_parser *rxp;
25
+ rx_file_data *data;
26
+
27
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
+ if (rxp->data == NULL)
29
+ return(Qnil);
30
+
31
+ if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
+ return(Qnil);
33
+
34
+ data = (rx_file_data *)rxp->data;
35
+ return(data->filename);
36
+ }
37
+ */
38
+
39
+ /*
40
+ * call-seq:
41
+ * parser.filename = "filename"
42
+ *
43
+ * Set the filename this parser will read from.
44
+ */
45
+ /*
46
+ VALUE
47
+ ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
+ ruby_xml_html_parser *rxp;
49
+ ruby_xml_parser_context *rxpc;
50
+ rx_file_data *data;
51
+
52
+ Check_Type(filename, T_STRING);
53
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
+
55
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
+ if (rxp->data != NULL)
57
+ rb_fatal("crap, this should be null");
58
+
59
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
+ data = ALLOC(rx_file_data);
61
+ rxp->data = data;
62
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
+ return(Qnil);
64
+ }
65
+
66
+ rxp->ctxt = ruby_xml_parser_context_new3();
67
+ data = (rx_file_data *)rxp->data;
68
+ data->filename = filename;
69
+
70
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
+ rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
+ if (rxpc->ctxt == NULL)
73
+ rb_sys_fail(StringValuePtr(filename));
74
+
75
+ return(data->filename);
76
+ }
77
+ */
78
+
79
+ void
80
+ ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
+ void *data;
82
+
83
+ switch(rxp->data_type) {
84
+ case RUBY_LIBXML_SRC_TYPE_NULL:
85
+ break;
86
+ case RUBY_LIBXML_SRC_TYPE_FILE:
87
+ data = (void *)(rx_file_data *)rxp->data;
88
+ ruby_xfree((rx_file_data *)data);
89
+ break;
90
+ case RUBY_LIBXML_SRC_TYPE_STRING:
91
+ data = (void *)(rx_string_data *)rxp->data;
92
+ ruby_xfree((rx_string_data *)data);
93
+ break;
94
+ case RUBY_LIBXML_SRC_TYPE_IO:
95
+ data = (void *)(rx_io_data *)rxp->data;
96
+ ruby_xfree((rx_io_data *)data);
97
+ break;
98
+ default:
99
+ rb_fatal("Unknown data type, %d", rxp->data_type);
100
+ }
101
+
102
+ ruby_xfree(rxp);
103
+ }
104
+
105
+
106
+ /*
107
+ * call-seq:
108
+ * parser.io => IO
109
+ *
110
+ * Obtain the IO instance this parser works with.
111
+ */
112
+ /*
113
+ VALUE
114
+ ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
+ ruby_xml_html_parser *rxp;
116
+ rx_io_data *data;
117
+
118
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
+
120
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
+ rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
+ rxp->data == NULL)
123
+ return(Qnil);
124
+
125
+ data = (rx_io_data *)rxp->data;
126
+
127
+ return(data->io);
128
+ }
129
+ */
130
+
131
+ /*
132
+ * call-seq:
133
+ * parser.io = IO
134
+ *
135
+ * Set the IO instance this parser works with.
136
+ */
137
+ /*
138
+ VALUE
139
+ ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
+ ruby_xml_html_parser *rxp;
141
+ ruby_xml_parser_context *rxpc;
142
+ rx_io_data *data;
143
+ OpenFile *fptr;
144
+ FILE *f;
145
+
146
+ if (!rb_obj_is_kind_of(io, rb_cIO))
147
+ rb_raise(rb_eTypeError, "need an IO object");
148
+
149
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
+
151
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
+ if (rxp->data != NULL)
153
+ rb_fatal("crap, this should be null");
154
+
155
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
+ data = ALLOC(rx_io_data);
157
+ rxp->data = data;
158
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
+ return(Qnil);
160
+ }
161
+
162
+ rxp->ctxt = ruby_xml_parser_context_new3();
163
+ data = (rx_io_data *)rxp->data;
164
+ data->io = io;
165
+
166
+ GetOpenFile(io, fptr);
167
+ rb_io_check_readable(fptr);
168
+ f = GetWriteFile(fptr);
169
+
170
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
+ rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
+ (xmlInputReadCallback) ctxtRead,
173
+ NULL, f, XML_CHAR_ENCODING_NONE);
174
+ if (NIL_P(rxpc->ctxt))
175
+ rb_sys_fail(0);
176
+
177
+ return(data->io);
178
+ }
179
+ */
180
+
181
+ void
182
+ ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
+ if (rxp == NULL) return;
184
+ if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
185
+
186
+ ruby_xml_state_marker();
187
+
188
+ switch(rxp->data_type) {
189
+ case RUBY_LIBXML_SRC_TYPE_NULL:
190
+ break;
191
+ case RUBY_LIBXML_SRC_TYPE_FILE:
192
+ if (!NIL_P(((rx_file_data *)rxp->data)->filename))
193
+ rb_gc_mark(((rx_file_data *)rxp->data)->filename);
194
+ break;
195
+ case RUBY_LIBXML_SRC_TYPE_STRING:
196
+ if (!NIL_P(((rx_string_data *)rxp->data)->str))
197
+ rb_gc_mark(((rx_string_data *)rxp->data)->str);
198
+ break;
199
+ case RUBY_LIBXML_SRC_TYPE_IO:
200
+ if (!NIL_P(((rx_io_data *)rxp->data)->io))
201
+ rb_gc_mark(((rx_io_data *)rxp->data)->io);
202
+ break;
203
+ default:
204
+ rb_fatal("unknown datatype: %d", rxp->data_type);
205
+ }
206
+ }
207
+
208
+
209
+ /*
210
+ * call-seq:
211
+ * XML::HTMLParser.new => parser
212
+ *
213
+ * Create a new parser instance with no pre-determined source.
214
+ */
215
+ VALUE
216
+ ruby_xml_html_parser_new(VALUE class) {
217
+ ruby_xml_html_parser *rxp;
218
+
219
+ rxp = ALLOC(ruby_xml_html_parser);
220
+ rxp->ctxt = Qnil;
221
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
222
+ rxp->data = NULL;
223
+ rxp->parsed = 0;
224
+
225
+ return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
226
+ ruby_xml_html_parser_free, rxp));
227
+ }
228
+
229
+
230
+ /*
231
+ * call-seq:
232
+ * XML::HTMLParser.file => parser
233
+ *
234
+ * Create a new parser instance that will read the specified file.
235
+ */
236
+ /*
237
+ VALUE
238
+ ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
239
+ VALUE obj;
240
+ ruby_xml_html_parser *rxp;
241
+ rx_file_data *data;
242
+
243
+ obj = ruby_xml_html_parser_new(class);
244
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
245
+
246
+ data = ALLOC(rx_file_data);
247
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
248
+ rxp->data = data;
249
+
250
+ ruby_xml_html_parser_filename_set(obj, filename);
251
+
252
+ return(obj);
253
+ }
254
+ */
255
+
256
+ /*
257
+ * call-seq:
258
+ * XML::HTMLParser.io => parser
259
+ *
260
+ * Create a new parser instance that will read from the
261
+ * specified IO object.
262
+ */
263
+ /*
264
+ VALUE
265
+ ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
266
+ VALUE obj;
267
+ ruby_xml_html_parser *rxp;
268
+ rx_io_data *data;
269
+
270
+ obj = ruby_xml_html_parser_new(class);
271
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
272
+
273
+ data = ALLOC(rx_io_data);
274
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
275
+ rxp->data = data;
276
+
277
+ ruby_xml_html_parser_io_set(obj, io);
278
+
279
+ return(obj);
280
+ }
281
+ */
282
+
283
+ /*
284
+ * call-seq:
285
+ * XML::HTMLParser.string => parser
286
+ *
287
+ * Create a new parser instance that will parse the given
288
+ * string.
289
+ */
290
+ VALUE
291
+ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
292
+ VALUE obj;
293
+ ruby_xml_html_parser *rxp;
294
+ rx_string_data *data;
295
+
296
+ obj = ruby_xml_html_parser_new(class);
297
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
298
+
299
+ data = ALLOC(rx_string_data);
300
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
301
+ rxp->data = data;
302
+
303
+ ruby_xml_html_parser_str_set(obj, str);
304
+
305
+ return(obj);
306
+ }
307
+
308
+
309
+ /*
310
+ * call-seq:
311
+ * parser.parse => document
312
+ *
313
+ * Parse the input XML and create an XML::Document with
314
+ * it's content. If an error occurs, XML::Parser::ParseError
315
+ * is thrown.
316
+ */
317
+ VALUE
318
+ ruby_xml_html_parser_parse(VALUE self) {
319
+ ruby_xml_html_parser *rxp;
320
+ ruby_xml_parser_context *rxpc;
321
+ htmlDocPtr xdp;
322
+ VALUE doc;
323
+
324
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
325
+
326
+ switch (rxp->data_type) {
327
+ case RUBY_LIBXML_SRC_TYPE_NULL:
328
+ return(Qnil);
329
+ case RUBY_LIBXML_SRC_TYPE_STRING:
330
+ //case RUBY_LIBXML_SRC_TYPE_FILE:
331
+ //case RUBY_LIBXML_SRC_TYPE_IO:
332
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
333
+
334
+ /* don't check return values here, the HTML parser returns errors
335
+ * but still allows the resulting tree to be used.
336
+ */
337
+ htmlParseDocument(rxpc->ctxt);
338
+ xdp = rxpc->ctxt->myDoc;
339
+ rxp->parsed = 1;
340
+
341
+ doc = ruby_xml_document_wrap(xdp);
342
+ break;
343
+ default:
344
+ rb_fatal("Unknown data type, %d", rxp->data_type);
345
+ }
346
+
347
+ return(doc);
348
+ }
349
+
350
+
351
+ /*
352
+ * call-seq:
353
+ * parser.context => context
354
+ *
355
+ * Obtain the XML::Parser::Context associated with this
356
+ * parser.
357
+ */
358
+ VALUE
359
+ ruby_xml_html_parser_context_get(VALUE self) {
360
+ ruby_xml_html_parser *rxp;
361
+
362
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
363
+ if (rxp->ctxt == Qnil)
364
+ return(Qnil);
365
+ else
366
+ return(rxp->ctxt);
367
+ }
368
+
369
+
370
+ /*
371
+ * call-seq:
372
+ * parser.string => "string"
373
+ *
374
+ * Obtain the string this parser works with.
375
+ */
376
+ VALUE
377
+ ruby_xml_html_parser_str_get(VALUE self) {
378
+ ruby_xml_html_parser *rxp;
379
+ rx_string_data *data;
380
+
381
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
382
+ if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
383
+ return(Qnil);
384
+
385
+ data = (rx_string_data *)rxp->data;
386
+ return(data->str);
387
+ }
388
+
389
+
390
+ /*
391
+ * call-seq:
392
+ * parser.string = "string"
393
+ *
394
+ * Set the string this parser works with.
395
+ */
396
+ VALUE
397
+ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
398
+ ruby_xml_html_parser *rxp;
399
+ ruby_xml_parser_context *rxpc;
400
+ rx_string_data *data;
401
+
402
+ Check_Type(str, T_STRING);
403
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
404
+
405
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
406
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
407
+ data = ALLOC(rx_string_data);
408
+ rxp->data = data;
409
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
410
+ return(Qnil);
411
+ }
412
+
413
+ rxp->ctxt = ruby_xml_parser_context_new3();
414
+ data = (rx_string_data *)rxp->data;
415
+ data->str = str;
416
+
417
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
418
+ rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
419
+
420
+ return(data->str);
421
+ }
422
+
423
+
424
+ // Rdoc needs to know
425
+ #ifdef RDOC_NEVER_DEFINED
426
+ mXML = rb_define_module("XML");
427
+ #endif
428
+
429
+ void
430
+ ruby_init_html_parser(void) {
431
+ cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
432
+
433
+ /*
434
+ rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
435
+ rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
436
+ */
437
+ rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
438
+ rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
439
+ /*
440
+ rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
+ rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
+ rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
+ rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
+ */
445
+ rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
+ rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
+ rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
+ rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
+ }