libxml-ruby 0.5.4 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. data/LICENSE +23 -23
  2. data/README +144 -144
  3. data/ext/libxml/extconf.rb +26 -27
  4. data/ext/libxml/libxml.c +7 -37
  5. data/ext/libxml/{libxml.h → ruby_libxml.h} +93 -98
  6. data/ext/libxml/ruby_xml_attr.c +405 -387
  7. data/ext/libxml/ruby_xml_attr.h +19 -18
  8. data/ext/libxml/ruby_xml_document.c +1111 -1115
  9. data/ext/libxml/ruby_xml_document.h +27 -24
  10. data/ext/libxml/ruby_xml_dtd.c +168 -168
  11. data/ext/libxml/ruby_xml_html_parser.c +449 -450
  12. data/ext/libxml/ruby_xml_html_parser.h +1 -1
  13. data/ext/libxml/ruby_xml_input_cbg.c +158 -158
  14. data/ext/libxml/ruby_xml_node.c +2410 -2395
  15. data/ext/libxml/ruby_xml_node.h +1 -1
  16. data/ext/libxml/ruby_xml_node_set.c +170 -170
  17. data/ext/libxml/ruby_xml_node_set.h +1 -1
  18. data/ext/libxml/ruby_xml_ns.c +153 -153
  19. data/ext/libxml/ruby_xml_ns.h +1 -1
  20. data/ext/libxml/ruby_xml_parser.c +1425 -1422
  21. data/ext/libxml/ruby_xml_parser.h +1 -1
  22. data/ext/libxml/ruby_xml_parser_context.c +750 -716
  23. data/ext/libxml/ruby_xml_parser_context.h +1 -1
  24. data/ext/libxml/ruby_xml_reader.c +900 -896
  25. data/ext/libxml/ruby_xml_sax_parser.c +485 -485
  26. data/ext/libxml/ruby_xml_sax_parser.h +1 -1
  27. data/ext/libxml/ruby_xml_schema.c +146 -142
  28. data/ext/libxml/ruby_xml_state.c +5 -6
  29. data/ext/libxml/ruby_xml_state.h +1 -0
  30. data/ext/libxml/ruby_xml_tree.c +43 -43
  31. data/ext/libxml/ruby_xml_tree.h +1 -1
  32. data/ext/libxml/ruby_xml_xinclude.c +20 -20
  33. data/ext/libxml/ruby_xml_xinclude.h +1 -1
  34. data/ext/libxml/ruby_xml_xpath.c +243 -252
  35. data/ext/libxml/ruby_xml_xpath.h +1 -1
  36. data/ext/libxml/ruby_xml_xpath_context.c +118 -118
  37. data/ext/libxml/ruby_xml_xpath_context.h +1 -1
  38. data/ext/libxml/ruby_xml_xpath_object.c +43 -29
  39. data/ext/libxml/ruby_xml_xpath_object.h +0 -1
  40. data/ext/libxml/ruby_xml_xpointer.c +100 -100
  41. data/ext/libxml/ruby_xml_xpointer.h +1 -1
  42. data/ext/libxml/ruby_xml_xpointer_context.c +21 -21
  43. data/ext/libxml/ruby_xml_xpointer_context.h +1 -1
  44. data/ext/libxml/sax_parser_callbacks.inc +213 -213
  45. data/ext/libxml/version.h +9 -9
  46. data/lib/libxml.rb +24 -3
  47. data/mingw/libiconv-2.dll +0 -0
  48. data/mingw/libxml2-2.dll +0 -0
  49. data/mingw/libxml_ruby.so +0 -0
  50. data/mingw/mingw.rake +36 -0
  51. data/test/dtd-test.rb +24 -24
  52. data/test/etc_doc_to_s.rb +1 -3
  53. data/test/ets_copy_bug.rb +21 -21
  54. data/test/ets_copy_bug2.rb +32 -32
  55. data/test/ets_copy_bug3.rb +38 -0
  56. data/test/ets_doc_file.rb +1 -0
  57. data/test/{model/default_validation_bug.rb → gc.log} +0 -0
  58. data/test/merge_bug.rb +55 -55
  59. data/test/schema-test.rb +74 -74
  60. data/test/tc_well_formed.rb +11 -0
  61. data/test/tc_xml_document.rb +52 -52
  62. data/test/tc_xml_document_write.rb +24 -24
  63. data/test/tc_xml_document_write2.rb +54 -54
  64. data/test/tc_xml_document_write3.rb +96 -96
  65. data/test/tc_xml_html_parser.rb +63 -63
  66. data/test/tc_xml_node.rb +59 -59
  67. data/test/tc_xml_node2.rb +25 -25
  68. data/test/tc_xml_node3.rb +27 -27
  69. data/test/tc_xml_node4.rb +86 -86
  70. data/test/tc_xml_node5.rb +52 -52
  71. data/test/tc_xml_node6.rb +27 -27
  72. data/test/tc_xml_node7.rb +35 -35
  73. data/test/tc_xml_node8.rb +32 -32
  74. data/test/tc_xml_node9.rb +32 -32
  75. data/test/tc_xml_node_set.rb +24 -24
  76. data/test/tc_xml_node_set2.rb +37 -37
  77. data/test/tc_xml_node_xlink.rb +28 -28
  78. data/test/tc_xml_parser.rb +190 -178
  79. data/test/tc_xml_parser2.rb +16 -17
  80. data/test/tc_xml_parser3.rb +23 -23
  81. data/test/tc_xml_parser4.rb +33 -33
  82. data/test/tc_xml_parser5.rb +27 -27
  83. data/test/tc_xml_parser6.rb +23 -23
  84. data/test/tc_xml_parser7.rb +28 -28
  85. data/test/tc_xml_parser8.rb +32 -32
  86. data/test/tc_xml_parser9.rb +11 -0
  87. data/test/tc_xml_parser_context.rb +88 -88
  88. data/test/tc_xml_reader.rb +112 -109
  89. data/test/tc_xml_sax_parser.rb +104 -94
  90. data/test/tc_xml_sax_parser2.rb +51 -0
  91. data/test/tc_xml_xinclude.rb +30 -30
  92. data/test/tc_xml_xpath.rb +38 -38
  93. data/test/tc_xml_xpath2.rb +14 -0
  94. data/test/tc_xml_xpointer.rb +78 -78
  95. data/vc/libxml.sln +20 -0
  96. data/vc/libxml.vcproj +389 -0
  97. data/work/Rakefile +247 -0
  98. data/work/task/make +26 -0
  99. data/work/task/memory +37 -0
  100. data/work/task/rdoc +39 -0
  101. data/work/task/setup +1616 -0
  102. data/work/task/test +29 -0
  103. data/work/test/ets_runner.rb +33 -0
  104. data/work/test/libxml_test.rb +3 -0
  105. data/work/test/runner.rb +0 -0
  106. data/work/test/runner_ets.rb +33 -0
  107. data/work/vc/debug/libxml.exp +0 -0
  108. data/work/vc/debug/libxml.ilk +0 -0
  109. data/work/vc/debug/libxml.lib +0 -0
  110. data/work/vc/debug/libxml.pdb +0 -0
  111. data/work/vc/debug/libxml.so +0 -0
  112. metadata +158 -189
  113. data/MANIFEST +0 -138
  114. data/NOTES +0 -9
  115. data/Rakefile +0 -38
  116. data/TODO +0 -75
  117. data/VERSION +0 -1
  118. data/log/Changelog-0.txt +0 -426
  119. data/log/Changelog.txt +0 -435
  120. data/meta/project.yaml +0 -27
  121. data/meta/unixname +0 -1
  122. data/setup.rb +0 -1472
  123. data/site/css/normal.css +0 -182
  124. data/site/img/raze-tiny.png +0 -0
  125. data/site/img/red-cube.jpg +0 -0
  126. data/site/img/xml-ruby.png +0 -0
  127. data/site/index.xml +0 -43
  128. data/site/install.xml +0 -77
  129. data/site/layout.rhtml +0 -38
  130. data/site/layout.xsl +0 -67
  131. data/site/license.xml +0 -32
  132. data/site/log/changelog.xml +0 -1324
  133. data/site/log/changelog.xsl +0 -42
  134. data/test/model/merge_bug_data.xml +0 -58
  135. data/test/model/rubynet.xml +0 -78
  136. data/test/model/rubynet_project +0 -13
  137. data/test/model/saxtest.xml +0 -5
  138. data/test/model/simple.xml +0 -7
  139. data/test/model/xinclude.xml +0 -5
@@ -1,450 +1,449 @@
1
- /* $Id: ruby_xml_html_parser.c 225 2007-12-07 04:58:09Z transami $ */
2
-
3
- /* Please see the LICENSE file for copyright and distribution information */
4
-
5
- #include "libxml.h"
6
-
7
- VALUE cXMLHTMLParser;
8
-
9
- //static int
10
- //ctxtRead(FILE *f, char * buf, int len) {
11
- // return(fread(buf, 1, len, f));
12
- //}
13
-
14
-
15
- /*
16
- * call-seq:
17
- * parser.filename => "filename"
18
- *
19
- * Obtain the filename this parser will read from.
20
- */
21
- /*
22
- VALUE
23
- ruby_xml_html_parser_filename_get(VALUE self) {
24
- ruby_xml_html_parser *rxp;
25
- rx_file_data *data;
26
-
27
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
- if (rxp->data == NULL)
29
- return(Qnil);
30
-
31
- if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
- return(Qnil);
33
-
34
- data = (rx_file_data *)rxp->data;
35
- return(data->filename);
36
- }
37
- */
38
-
39
- /*
40
- * call-seq:
41
- * parser.filename = "filename"
42
- *
43
- * Set the filename this parser will read from.
44
- */
45
- /*
46
- VALUE
47
- ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
- ruby_xml_html_parser *rxp;
49
- ruby_xml_parser_context *rxpc;
50
- rx_file_data *data;
51
-
52
- Check_Type(filename, T_STRING);
53
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
-
55
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
- if (rxp->data != NULL)
57
- rb_fatal("crap, this should be null");
58
-
59
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
- data = ALLOC(rx_file_data);
61
- rxp->data = data;
62
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
- return(Qnil);
64
- }
65
-
66
- rxp->ctxt = ruby_xml_parser_context_new3();
67
- data = (rx_file_data *)rxp->data;
68
- data->filename = filename;
69
-
70
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
- rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
- if (rxpc->ctxt == NULL)
73
- rb_sys_fail(StringValuePtr(filename));
74
-
75
- return(data->filename);
76
- }
77
- */
78
-
79
- void
80
- ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
- void *data;
82
-
83
- switch(rxp->data_type) {
84
- case RUBY_LIBXML_SRC_TYPE_NULL:
85
- break;
86
- case RUBY_LIBXML_SRC_TYPE_FILE:
87
- data = (void *)(rx_file_data *)rxp->data;
88
- free((rx_file_data *)data);
89
- break;
90
- case RUBY_LIBXML_SRC_TYPE_STRING:
91
- data = (void *)(rx_string_data *)rxp->data;
92
- free((rx_string_data *)data);
93
- break;
94
- case RUBY_LIBXML_SRC_TYPE_IO:
95
- data = (void *)(rx_io_data *)rxp->data;
96
- free((rx_io_data *)data);
97
- break;
98
- default:
99
- rb_fatal("Unknown data type, %d", rxp->data_type);
100
- }
101
-
102
- free(rxp);
103
- }
104
-
105
-
106
- /*
107
- * call-seq:
108
- * parser.io => IO
109
- *
110
- * Obtain the IO instance this parser works with.
111
- */
112
- /*
113
- VALUE
114
- ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
- ruby_xml_html_parser *rxp;
116
- rx_io_data *data;
117
-
118
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
-
120
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
- rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
- rxp->data == NULL)
123
- return(Qnil);
124
-
125
- data = (rx_io_data *)rxp->data;
126
-
127
- return(data->io);
128
- }
129
- */
130
-
131
- /*
132
- * call-seq:
133
- * parser.io = IO
134
- *
135
- * Set the IO instance this parser works with.
136
- */
137
- /*
138
- VALUE
139
- ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
- ruby_xml_html_parser *rxp;
141
- ruby_xml_parser_context *rxpc;
142
- rx_io_data *data;
143
- OpenFile *fptr;
144
- FILE *f;
145
-
146
- if (!rb_obj_is_kind_of(io, rb_cIO))
147
- rb_raise(rb_eTypeError, "need an IO object");
148
-
149
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
-
151
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
- if (rxp->data != NULL)
153
- rb_fatal("crap, this should be null");
154
-
155
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
- data = ALLOC(rx_io_data);
157
- rxp->data = data;
158
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
- return(Qnil);
160
- }
161
-
162
- rxp->ctxt = ruby_xml_parser_context_new3();
163
- data = (rx_io_data *)rxp->data;
164
- data->io = io;
165
-
166
- GetOpenFile(io, fptr);
167
- rb_io_check_readable(fptr);
168
- f = GetWriteFile(fptr);
169
-
170
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
- rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
- (xmlInputReadCallback) ctxtRead,
173
- NULL, f, XML_CHAR_ENCODING_NONE);
174
- if (NIL_P(rxpc->ctxt))
175
- rb_sys_fail(0);
176
-
177
- return(data->io);
178
- }
179
- */
180
-
181
- void
182
- ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
- if (rxp == NULL) return;
184
- if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
185
-
186
- ruby_xml_state_marker();
187
-
188
- switch(rxp->data_type) {
189
- case RUBY_LIBXML_SRC_TYPE_NULL:
190
- break;
191
- case RUBY_LIBXML_SRC_TYPE_FILE:
192
- if (!NIL_P(((rx_file_data *)rxp->data)->filename))
193
- rb_gc_mark(((rx_file_data *)rxp->data)->filename);
194
- break;
195
- case RUBY_LIBXML_SRC_TYPE_STRING:
196
- if (!NIL_P(((rx_string_data *)rxp->data)->str))
197
- rb_gc_mark(((rx_string_data *)rxp->data)->str);
198
- break;
199
- case RUBY_LIBXML_SRC_TYPE_IO:
200
- if (!NIL_P(((rx_io_data *)rxp->data)->io))
201
- rb_gc_mark(((rx_io_data *)rxp->data)->io);
202
- break;
203
- default:
204
- rb_fatal("unknown datatype: %d", rxp->data_type);
205
- }
206
- }
207
-
208
-
209
- /*
210
- * call-seq:
211
- * XML::HTMLParser.new => parser
212
- *
213
- * Create a new parser instance with no pre-determined source.
214
- */
215
- VALUE
216
- ruby_xml_html_parser_new(VALUE class) {
217
- ruby_xml_html_parser *rxp;
218
-
219
- rxp = ALLOC(ruby_xml_html_parser);
220
- rxp->ctxt = Qnil;
221
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
222
- rxp->data = NULL;
223
- rxp->parsed = 0;
224
-
225
- return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
226
- ruby_xml_html_parser_free, rxp));
227
- }
228
-
229
-
230
- /*
231
- * call-seq:
232
- * XML::HTMLParser.file => parser
233
- *
234
- * Create a new parser instance that will read the specified file.
235
- */
236
- /*
237
- VALUE
238
- ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
239
- VALUE obj;
240
- ruby_xml_html_parser *rxp;
241
- rx_file_data *data;
242
-
243
- obj = ruby_xml_html_parser_new(class);
244
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
245
-
246
- data = ALLOC(rx_file_data);
247
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
248
- rxp->data = data;
249
-
250
- ruby_xml_html_parser_filename_set(obj, filename);
251
-
252
- return(obj);
253
- }
254
- */
255
-
256
- /*
257
- * call-seq:
258
- * XML::HTMLParser.io => parser
259
- *
260
- * Create a new parser instance that will read from the
261
- * specified IO object.
262
- */
263
- /*
264
- VALUE
265
- ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
266
- VALUE obj;
267
- ruby_xml_html_parser *rxp;
268
- rx_io_data *data;
269
-
270
- obj = ruby_xml_html_parser_new(class);
271
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
272
-
273
- data = ALLOC(rx_io_data);
274
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
275
- rxp->data = data;
276
-
277
- ruby_xml_html_parser_io_set(obj, io);
278
-
279
- return(obj);
280
- }
281
- */
282
-
283
- /*
284
- * call-seq:
285
- * XML::HTMLParser.string => parser
286
- *
287
- * Create a new parser instance that will parse the given
288
- * string.
289
- */
290
- VALUE
291
- ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
292
- VALUE obj;
293
- ruby_xml_html_parser *rxp;
294
- rx_string_data *data;
295
-
296
- obj = ruby_xml_html_parser_new(class);
297
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
298
-
299
- data = ALLOC(rx_string_data);
300
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
301
- rxp->data = data;
302
-
303
- ruby_xml_html_parser_str_set(obj, str);
304
-
305
- return(obj);
306
- }
307
-
308
-
309
- /*
310
- * call-seq:
311
- * parser.parse => document
312
- *
313
- * Parse the input XML and create an XML::Document with
314
- * it's content. If an error occurs, XML::Parser::ParseError
315
- * is thrown.
316
- */
317
- VALUE
318
- ruby_xml_html_parser_parse(VALUE self) {
319
- ruby_xml_document_t *rxd;
320
- ruby_xml_html_parser *rxp;
321
- ruby_xml_parser_context *rxpc;
322
- htmlDocPtr xdp;
323
- VALUE doc;
324
-
325
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
326
-
327
- switch (rxp->data_type) {
328
- case RUBY_LIBXML_SRC_TYPE_NULL:
329
- return(Qnil);
330
- case RUBY_LIBXML_SRC_TYPE_STRING:
331
- //case RUBY_LIBXML_SRC_TYPE_FILE:
332
- //case RUBY_LIBXML_SRC_TYPE_IO:
333
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
334
-
335
- /* don't check return values here, the HTML parser returns errors
336
- * but still allows the resulting tree to be used.
337
- */
338
- htmlParseDocument(rxpc->ctxt);
339
- xdp = rxpc->ctxt->myDoc;
340
- rxp->parsed = 1;
341
-
342
- doc = ruby_xml_document_wrap(cXMLDocument, xdp);
343
- break;
344
- default:
345
- rb_fatal("Unknown data type, %d", rxp->data_type);
346
- }
347
-
348
- return(doc);
349
- }
350
-
351
-
352
- /*
353
- * call-seq:
354
- * parser.context => context
355
- *
356
- * Obtain the XML::Parser::Context associated with this
357
- * parser.
358
- */
359
- VALUE
360
- ruby_xml_html_parser_context_get(VALUE self) {
361
- ruby_xml_html_parser *rxp;
362
-
363
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
364
- if (rxp->ctxt == Qnil)
365
- return(Qnil);
366
- else
367
- return(rxp->ctxt);
368
- }
369
-
370
-
371
- /*
372
- * call-seq:
373
- * parser.string => "string"
374
- *
375
- * Obtain the string this parser works with.
376
- */
377
- VALUE
378
- ruby_xml_html_parser_str_get(VALUE self) {
379
- ruby_xml_html_parser *rxp;
380
- rx_string_data *data;
381
-
382
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
383
- if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
384
- return(Qnil);
385
-
386
- data = (rx_string_data *)rxp->data;
387
- return(data->str);
388
- }
389
-
390
-
391
- /*
392
- * call-seq:
393
- * parser.string = "string"
394
- *
395
- * Set the string this parser works with.
396
- */
397
- VALUE
398
- ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
399
- ruby_xml_html_parser *rxp;
400
- ruby_xml_parser_context *rxpc;
401
- rx_string_data *data;
402
-
403
- Check_Type(str, T_STRING);
404
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
405
-
406
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
407
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
408
- data = ALLOC(rx_string_data);
409
- rxp->data = data;
410
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
411
- return(Qnil);
412
- }
413
-
414
- rxp->ctxt = ruby_xml_parser_context_new3();
415
- data = (rx_string_data *)rxp->data;
416
- data->str = str;
417
-
418
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
419
- rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
420
-
421
- return(data->str);
422
- }
423
-
424
-
425
- // Rdoc needs to know
426
- #ifdef RDOC_NEVER_DEFINED
427
- mXML = rb_define_module("XML");
428
- #endif
429
-
430
- void
431
- ruby_init_html_parser(void) {
432
- cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
433
-
434
- /*
435
- rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
436
- rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
437
- */
438
- rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
439
- rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
440
- /*
441
- rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
442
- rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
443
- rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
444
- rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
445
- */
446
- rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
447
- rb_define_method(cXMLHTMLParser, "parser_context", ruby_xml_html_parser_context_get, 0);
448
- rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
449
- rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
450
- }
1
+ /* $Id: ruby_xml_html_parser.c 300 2008-07-01 19:14:15Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+
7
+ VALUE cXMLHTMLParser;
8
+
9
+ //static int
10
+ //ctxtRead(FILE *f, char * buf, int len) {
11
+ // return(fread(buf, 1, len, f));
12
+ //}
13
+
14
+
15
+ /*
16
+ * call-seq:
17
+ * parser.filename => "filename"
18
+ *
19
+ * Obtain the filename this parser will read from.
20
+ */
21
+ /*
22
+ VALUE
23
+ ruby_xml_html_parser_filename_get(VALUE self) {
24
+ ruby_xml_html_parser *rxp;
25
+ rx_file_data *data;
26
+
27
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
+ if (rxp->data == NULL)
29
+ return(Qnil);
30
+
31
+ if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
+ return(Qnil);
33
+
34
+ data = (rx_file_data *)rxp->data;
35
+ return(data->filename);
36
+ }
37
+ */
38
+
39
+ /*
40
+ * call-seq:
41
+ * parser.filename = "filename"
42
+ *
43
+ * Set the filename this parser will read from.
44
+ */
45
+ /*
46
+ VALUE
47
+ ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
+ ruby_xml_html_parser *rxp;
49
+ ruby_xml_parser_context *rxpc;
50
+ rx_file_data *data;
51
+
52
+ Check_Type(filename, T_STRING);
53
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
+
55
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
+ if (rxp->data != NULL)
57
+ rb_fatal("crap, this should be null");
58
+
59
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
+ data = ALLOC(rx_file_data);
61
+ rxp->data = data;
62
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
+ return(Qnil);
64
+ }
65
+
66
+ rxp->ctxt = ruby_xml_parser_context_new3();
67
+ data = (rx_file_data *)rxp->data;
68
+ data->filename = filename;
69
+
70
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
+ rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
+ if (rxpc->ctxt == NULL)
73
+ rb_sys_fail(StringValuePtr(filename));
74
+
75
+ return(data->filename);
76
+ }
77
+ */
78
+
79
+ void
80
+ ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
+ void *data;
82
+
83
+ switch(rxp->data_type) {
84
+ case RUBY_LIBXML_SRC_TYPE_NULL:
85
+ break;
86
+ case RUBY_LIBXML_SRC_TYPE_FILE:
87
+ data = (void *)(rx_file_data *)rxp->data;
88
+ ruby_xfree((rx_file_data *)data);
89
+ break;
90
+ case RUBY_LIBXML_SRC_TYPE_STRING:
91
+ data = (void *)(rx_string_data *)rxp->data;
92
+ ruby_xfree((rx_string_data *)data);
93
+ break;
94
+ case RUBY_LIBXML_SRC_TYPE_IO:
95
+ data = (void *)(rx_io_data *)rxp->data;
96
+ ruby_xfree((rx_io_data *)data);
97
+ break;
98
+ default:
99
+ rb_fatal("Unknown data type, %d", rxp->data_type);
100
+ }
101
+
102
+ ruby_xfree(rxp);
103
+ }
104
+
105
+
106
+ /*
107
+ * call-seq:
108
+ * parser.io => IO
109
+ *
110
+ * Obtain the IO instance this parser works with.
111
+ */
112
+ /*
113
+ VALUE
114
+ ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
+ ruby_xml_html_parser *rxp;
116
+ rx_io_data *data;
117
+
118
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
+
120
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
+ rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
+ rxp->data == NULL)
123
+ return(Qnil);
124
+
125
+ data = (rx_io_data *)rxp->data;
126
+
127
+ return(data->io);
128
+ }
129
+ */
130
+
131
+ /*
132
+ * call-seq:
133
+ * parser.io = IO
134
+ *
135
+ * Set the IO instance this parser works with.
136
+ */
137
+ /*
138
+ VALUE
139
+ ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
+ ruby_xml_html_parser *rxp;
141
+ ruby_xml_parser_context *rxpc;
142
+ rx_io_data *data;
143
+ OpenFile *fptr;
144
+ FILE *f;
145
+
146
+ if (!rb_obj_is_kind_of(io, rb_cIO))
147
+ rb_raise(rb_eTypeError, "need an IO object");
148
+
149
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
+
151
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
+ if (rxp->data != NULL)
153
+ rb_fatal("crap, this should be null");
154
+
155
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
+ data = ALLOC(rx_io_data);
157
+ rxp->data = data;
158
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
+ return(Qnil);
160
+ }
161
+
162
+ rxp->ctxt = ruby_xml_parser_context_new3();
163
+ data = (rx_io_data *)rxp->data;
164
+ data->io = io;
165
+
166
+ GetOpenFile(io, fptr);
167
+ rb_io_check_readable(fptr);
168
+ f = GetWriteFile(fptr);
169
+
170
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
+ rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
+ (xmlInputReadCallback) ctxtRead,
173
+ NULL, f, XML_CHAR_ENCODING_NONE);
174
+ if (NIL_P(rxpc->ctxt))
175
+ rb_sys_fail(0);
176
+
177
+ return(data->io);
178
+ }
179
+ */
180
+
181
+ void
182
+ ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
+ if (rxp == NULL) return;
184
+ if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
185
+
186
+ ruby_xml_state_marker();
187
+
188
+ switch(rxp->data_type) {
189
+ case RUBY_LIBXML_SRC_TYPE_NULL:
190
+ break;
191
+ case RUBY_LIBXML_SRC_TYPE_FILE:
192
+ if (!NIL_P(((rx_file_data *)rxp->data)->filename))
193
+ rb_gc_mark(((rx_file_data *)rxp->data)->filename);
194
+ break;
195
+ case RUBY_LIBXML_SRC_TYPE_STRING:
196
+ if (!NIL_P(((rx_string_data *)rxp->data)->str))
197
+ rb_gc_mark(((rx_string_data *)rxp->data)->str);
198
+ break;
199
+ case RUBY_LIBXML_SRC_TYPE_IO:
200
+ if (!NIL_P(((rx_io_data *)rxp->data)->io))
201
+ rb_gc_mark(((rx_io_data *)rxp->data)->io);
202
+ break;
203
+ default:
204
+ rb_fatal("unknown datatype: %d", rxp->data_type);
205
+ }
206
+ }
207
+
208
+
209
+ /*
210
+ * call-seq:
211
+ * XML::HTMLParser.new => parser
212
+ *
213
+ * Create a new parser instance with no pre-determined source.
214
+ */
215
+ VALUE
216
+ ruby_xml_html_parser_new(VALUE class) {
217
+ ruby_xml_html_parser *rxp;
218
+
219
+ rxp = ALLOC(ruby_xml_html_parser);
220
+ rxp->ctxt = Qnil;
221
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
222
+ rxp->data = NULL;
223
+ rxp->parsed = 0;
224
+
225
+ return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
226
+ ruby_xml_html_parser_free, rxp));
227
+ }
228
+
229
+
230
+ /*
231
+ * call-seq:
232
+ * XML::HTMLParser.file => parser
233
+ *
234
+ * Create a new parser instance that will read the specified file.
235
+ */
236
+ /*
237
+ VALUE
238
+ ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
239
+ VALUE obj;
240
+ ruby_xml_html_parser *rxp;
241
+ rx_file_data *data;
242
+
243
+ obj = ruby_xml_html_parser_new(class);
244
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
245
+
246
+ data = ALLOC(rx_file_data);
247
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
248
+ rxp->data = data;
249
+
250
+ ruby_xml_html_parser_filename_set(obj, filename);
251
+
252
+ return(obj);
253
+ }
254
+ */
255
+
256
+ /*
257
+ * call-seq:
258
+ * XML::HTMLParser.io => parser
259
+ *
260
+ * Create a new parser instance that will read from the
261
+ * specified IO object.
262
+ */
263
+ /*
264
+ VALUE
265
+ ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
266
+ VALUE obj;
267
+ ruby_xml_html_parser *rxp;
268
+ rx_io_data *data;
269
+
270
+ obj = ruby_xml_html_parser_new(class);
271
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
272
+
273
+ data = ALLOC(rx_io_data);
274
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
275
+ rxp->data = data;
276
+
277
+ ruby_xml_html_parser_io_set(obj, io);
278
+
279
+ return(obj);
280
+ }
281
+ */
282
+
283
+ /*
284
+ * call-seq:
285
+ * XML::HTMLParser.string => parser
286
+ *
287
+ * Create a new parser instance that will parse the given
288
+ * string.
289
+ */
290
+ VALUE
291
+ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
292
+ VALUE obj;
293
+ ruby_xml_html_parser *rxp;
294
+ rx_string_data *data;
295
+
296
+ obj = ruby_xml_html_parser_new(class);
297
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
298
+
299
+ data = ALLOC(rx_string_data);
300
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
301
+ rxp->data = data;
302
+
303
+ ruby_xml_html_parser_str_set(obj, str);
304
+
305
+ return(obj);
306
+ }
307
+
308
+
309
+ /*
310
+ * call-seq:
311
+ * parser.parse => document
312
+ *
313
+ * Parse the input XML and create an XML::Document with
314
+ * it's content. If an error occurs, XML::Parser::ParseError
315
+ * is thrown.
316
+ */
317
+ VALUE
318
+ ruby_xml_html_parser_parse(VALUE self) {
319
+ ruby_xml_html_parser *rxp;
320
+ ruby_xml_parser_context *rxpc;
321
+ htmlDocPtr xdp;
322
+ VALUE doc;
323
+
324
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
325
+
326
+ switch (rxp->data_type) {
327
+ case RUBY_LIBXML_SRC_TYPE_NULL:
328
+ return(Qnil);
329
+ case RUBY_LIBXML_SRC_TYPE_STRING:
330
+ //case RUBY_LIBXML_SRC_TYPE_FILE:
331
+ //case RUBY_LIBXML_SRC_TYPE_IO:
332
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
333
+
334
+ /* don't check return values here, the HTML parser returns errors
335
+ * but still allows the resulting tree to be used.
336
+ */
337
+ htmlParseDocument(rxpc->ctxt);
338
+ xdp = rxpc->ctxt->myDoc;
339
+ rxp->parsed = 1;
340
+
341
+ doc = ruby_xml_document_wrap(xdp);
342
+ break;
343
+ default:
344
+ rb_fatal("Unknown data type, %d", rxp->data_type);
345
+ }
346
+
347
+ return(doc);
348
+ }
349
+
350
+
351
+ /*
352
+ * call-seq:
353
+ * parser.context => context
354
+ *
355
+ * Obtain the XML::Parser::Context associated with this
356
+ * parser.
357
+ */
358
+ VALUE
359
+ ruby_xml_html_parser_context_get(VALUE self) {
360
+ ruby_xml_html_parser *rxp;
361
+
362
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
363
+ if (rxp->ctxt == Qnil)
364
+ return(Qnil);
365
+ else
366
+ return(rxp->ctxt);
367
+ }
368
+
369
+
370
+ /*
371
+ * call-seq:
372
+ * parser.string => "string"
373
+ *
374
+ * Obtain the string this parser works with.
375
+ */
376
+ VALUE
377
+ ruby_xml_html_parser_str_get(VALUE self) {
378
+ ruby_xml_html_parser *rxp;
379
+ rx_string_data *data;
380
+
381
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
382
+ if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
383
+ return(Qnil);
384
+
385
+ data = (rx_string_data *)rxp->data;
386
+ return(data->str);
387
+ }
388
+
389
+
390
+ /*
391
+ * call-seq:
392
+ * parser.string = "string"
393
+ *
394
+ * Set the string this parser works with.
395
+ */
396
+ VALUE
397
+ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
398
+ ruby_xml_html_parser *rxp;
399
+ ruby_xml_parser_context *rxpc;
400
+ rx_string_data *data;
401
+
402
+ Check_Type(str, T_STRING);
403
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
404
+
405
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
406
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
407
+ data = ALLOC(rx_string_data);
408
+ rxp->data = data;
409
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
410
+ return(Qnil);
411
+ }
412
+
413
+ rxp->ctxt = ruby_xml_parser_context_new3();
414
+ data = (rx_string_data *)rxp->data;
415
+ data->str = str;
416
+
417
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
418
+ rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
419
+
420
+ return(data->str);
421
+ }
422
+
423
+
424
+ // Rdoc needs to know
425
+ #ifdef RDOC_NEVER_DEFINED
426
+ mXML = rb_define_module("XML");
427
+ #endif
428
+
429
+ void
430
+ ruby_init_html_parser(void) {
431
+ cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
432
+
433
+ /*
434
+ rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
435
+ rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
436
+ */
437
+ rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
438
+ rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
439
+ /*
440
+ rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
+ rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
+ rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
+ rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
+ */
445
+ rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
+ rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
+ rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
+ rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
+ }