libxml-ruby 0.8.1 → 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. data/CHANGES +21 -0
  2. data/RAKEFILE +213 -0
  3. data/README +32 -11
  4. data/doc/rdoc/classes/LibXML.html +4 -0
  5. data/doc/rdoc/classes/LibXML/XML.html +7 -0
  6. data/doc/rdoc/classes/LibXML/XML/Attr.html +187 -177
  7. data/doc/rdoc/classes/LibXML/XML/Attributes.html +62 -61
  8. data/doc/rdoc/classes/LibXML/XML/Document.html +318 -254
  9. data/doc/rdoc/classes/LibXML/XML/Dtd.html +62 -8
  10. data/doc/rdoc/classes/LibXML/XML/Error.html +6 -0
  11. data/doc/rdoc/classes/LibXML/XML/HTMLParser.html +59 -41
  12. data/doc/rdoc/classes/LibXML/XML/InputCallbacks.html +12 -6
  13. data/doc/rdoc/classes/LibXML/XML/NS.html +65 -50
  14. data/doc/rdoc/classes/LibXML/XML/Node.html +671 -630
  15. data/doc/rdoc/classes/LibXML/XML/Node/FailedModify.html +12 -0
  16. data/doc/rdoc/classes/LibXML/XML/Node/Set.html +64 -60
  17. data/doc/rdoc/classes/LibXML/XML/Node/SetNamespace.html +12 -0
  18. data/doc/rdoc/classes/LibXML/XML/Node/UnknownType.html +12 -0
  19. data/doc/rdoc/classes/LibXML/XML/Parser.html +407 -383
  20. data/doc/rdoc/classes/LibXML/XML/Parser/Context.html +241 -219
  21. data/doc/rdoc/classes/LibXML/XML/Parser/ParseError.html +12 -0
  22. data/doc/rdoc/classes/LibXML/XML/Reader.html +421 -331
  23. data/doc/rdoc/classes/LibXML/XML/RelaxNG.html +237 -0
  24. data/doc/rdoc/classes/LibXML/XML/SaxParser.html +79 -60
  25. data/doc/rdoc/classes/LibXML/XML/Schema.html +109 -22
  26. data/doc/rdoc/classes/LibXML/XML/State.html +6 -0
  27. data/doc/rdoc/classes/LibXML/XML/XInclude.html +6 -0
  28. data/doc/rdoc/classes/LibXML/XML/XInclude/Error.html +6 -0
  29. data/doc/rdoc/classes/LibXML/XML/XPath.html +4 -48
  30. data/doc/rdoc/classes/LibXML/XML/XPath/Context.html +6 -5
  31. data/doc/rdoc/classes/LibXML/XML/XPath/InvalidPath.html +61 -0
  32. data/doc/rdoc/classes/LibXML/XML/XPath/Object.html +12 -8
  33. data/doc/rdoc/classes/LibXML/XML/XPointer.html +14 -8
  34. data/doc/rdoc/classes/LibXML/XML/XPointer/Context.html +6 -0
  35. data/doc/rdoc/classes/LibXML/XML/XPointer/Context/InvalidPath.html +6 -0
  36. data/doc/rdoc/classes/LibXML/XML/XPointer/InvalidExpression.html +13 -0
  37. data/doc/rdoc/created.rid +1 -1
  38. data/doc/rdoc/files/CHANGES.html +38 -10
  39. data/doc/rdoc/files/LICENSE.html +1 -1
  40. data/doc/rdoc/files/README.html +57 -23
  41. data/doc/rdoc/files/VERSION.html +1 -1
  42. data/doc/rdoc/files/ext/libxml/cbg_c.html +1 -1
  43. data/doc/rdoc/files/ext/libxml/libxml_c.html +1 -1
  44. data/doc/rdoc/files/ext/libxml/ruby_xml_attr_c.html +1 -1
  45. data/doc/rdoc/files/ext/libxml/ruby_xml_attributes_c.html +1 -1
  46. data/doc/rdoc/files/ext/libxml/ruby_xml_document_c.html +1 -1
  47. data/doc/rdoc/files/ext/libxml/ruby_xml_dtd_c.html +1 -1
  48. data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_c.html +1 -1
  49. data/doc/rdoc/files/ext/libxml/ruby_xml_input_cbg_c.html +1 -1
  50. data/doc/rdoc/files/ext/libxml/ruby_xml_node_c.html +1 -1
  51. data/doc/rdoc/files/ext/libxml/ruby_xml_node_set_c.html +1 -1
  52. data/doc/rdoc/files/ext/libxml/ruby_xml_ns_c.html +1 -1
  53. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_c.html +1 -1
  54. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_context_c.html +1 -1
  55. data/doc/rdoc/files/ext/libxml/ruby_xml_reader_c.html +1 -1
  56. data/doc/rdoc/{classes/XML.html → files/ext/libxml/ruby_xml_relaxng_c.html} +16 -35
  57. data/doc/rdoc/files/ext/libxml/ruby_xml_sax_parser_c.html +1 -1
  58. data/doc/rdoc/files/ext/libxml/ruby_xml_schema_c.html +1 -1
  59. data/doc/rdoc/files/ext/libxml/ruby_xml_state_c.html +1 -1
  60. data/doc/rdoc/files/ext/libxml/ruby_xml_xinclude_c.html +1 -1
  61. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_c.html +1 -1
  62. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_context_c.html +1 -1
  63. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_object_c.html +1 -1
  64. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_c.html +1 -1
  65. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_context_c.html +1 -1
  66. data/doc/rdoc/files/lib/libxml/attr_rb.html +1 -1
  67. data/doc/rdoc/files/lib/libxml/attributes_rb.html +1 -1
  68. data/doc/rdoc/files/lib/libxml/document_rb.html +1 -1
  69. data/doc/rdoc/files/lib/libxml/node_rb.html +1 -1
  70. data/doc/rdoc/files/lib/libxml/node_set_rb.html +1 -1
  71. data/doc/rdoc/files/lib/libxml/parser_options_rb.html +1 -1
  72. data/doc/rdoc/files/lib/libxml/parser_rb.html +1 -1
  73. data/doc/rdoc/files/lib/libxml/properties_rb.html +1 -1
  74. data/doc/rdoc/files/lib/libxml/tree_rb.html +1 -1
  75. data/doc/rdoc/files/lib/libxml_rb.html +2 -2
  76. data/doc/rdoc/files/lib/xml/libxml_rb.html +12 -2
  77. data/doc/rdoc/files/lib/xml_rb.html +134 -0
  78. data/doc/rdoc/fr_class_index.html +1 -1
  79. data/doc/rdoc/fr_file_index.html +2 -0
  80. data/doc/rdoc/fr_method_index.html +349 -344
  81. data/ext/libxml/libxml.c +2 -1
  82. data/ext/libxml/ruby_libxml.h +1 -0
  83. data/ext/libxml/ruby_xml_attr.c +8 -2
  84. data/ext/libxml/ruby_xml_attributes.c +5 -5
  85. data/ext/libxml/ruby_xml_document.c +41 -2
  86. data/ext/libxml/ruby_xml_dtd.c +169 -169
  87. data/ext/libxml/ruby_xml_html_parser.c +449 -449
  88. data/ext/libxml/ruby_xml_input_cbg.c +165 -165
  89. data/ext/libxml/ruby_xml_node.c +1 -1
  90. data/ext/libxml/ruby_xml_node_set.c +172 -172
  91. data/ext/libxml/ruby_xml_ns.c +145 -145
  92. data/ext/libxml/ruby_xml_parser.c +1398 -1398
  93. data/ext/libxml/ruby_xml_parser_context.c +697 -697
  94. data/ext/libxml/ruby_xml_reader.c +44 -0
  95. data/ext/libxml/ruby_xml_reader.h +14 -14
  96. data/ext/libxml/ruby_xml_relaxng.c +97 -0
  97. data/ext/libxml/ruby_xml_relaxng.h +15 -0
  98. data/ext/libxml/ruby_xml_sax_parser.c +486 -486
  99. data/ext/libxml/ruby_xml_schema.c +1 -1
  100. data/ext/libxml/ruby_xml_state.c +4 -4
  101. data/ext/libxml/ruby_xml_xinclude.c +21 -21
  102. data/ext/libxml/ruby_xml_xpath.c +89 -89
  103. data/ext/libxml/ruby_xml_xpath_context.c +283 -283
  104. data/ext/libxml/ruby_xml_xpath_object.c +5 -5
  105. data/ext/libxml/ruby_xml_xpointer.c +104 -104
  106. data/ext/libxml/ruby_xml_xpointer_context.c +22 -22
  107. data/ext/libxml/version.h +2 -2
  108. data/ext/libxml/version.h.rej +17 -0
  109. data/ext/mingw/Rakefile +41 -0
  110. data/lib/libxml.rb +15 -4
  111. data/lib/xml.rb +14 -0
  112. data/lib/xml/libxml.rb +4 -1
  113. data/test/etc_doc_to_s.rb +1 -1
  114. data/test/ets_copy_bug.rb +1 -1
  115. data/test/ets_copy_bug3.rb +1 -1
  116. data/test/ets_doc_file.rb +1 -1
  117. data/test/ets_doc_to_s.rb +1 -1
  118. data/test/ets_node_gc.rb +1 -1
  119. data/test/ets_tsr.rb +1 -1
  120. data/test/model/shiporder.rnc +28 -0
  121. data/test/model/shiporder.rng +86 -0
  122. data/test/{tc_xml_attributes.rb → tc_attributes.rb} +1 -10
  123. data/test/{tc_xml_document.rb → tc_document.rb} +1 -1
  124. data/test/{tc_xml_document_write.rb → tc_document_write.rb} +1 -1
  125. data/test/{tc_xml_dtd.rb → tc_dtd.rb} +1 -1
  126. data/test/{tc_xml_html_parser.rb → tc_html_parser.rb} +1 -1
  127. data/test/{tc_xml_node.rb → tc_node.rb} +1 -1
  128. data/test/{tc_xml_node_attr.rb → tc_node_attr.rb} +1 -1
  129. data/test/{tc_xml_node_cdata.rb → tc_node_cdata.rb} +24 -5
  130. data/test/{tc_xml_node_comment.rb → tc_node_comment.rb} +1 -1
  131. data/test/{tc_xml_node_copy.rb → tc_node_copy.rb} +1 -1
  132. data/test/{tc_xml_node_edit.rb → tc_node_edit.rb} +1 -1
  133. data/test/{tc_xml_node_set.rb → tc_node_set.rb} +1 -1
  134. data/test/{tc_xml_node_set2.rb → tc_node_set2.rb} +1 -1
  135. data/test/{tc_xml_node_text.rb → tc_node_text.rb} +1 -1
  136. data/test/{tc_xml_node_xlink.rb → tc_node_xlink.rb} +2 -2
  137. data/test/{tc_xml_parser.rb → tc_parser.rb} +1 -1
  138. data/test/{tc_xml_parser_context.rb → tc_parser_context.rb} +1 -1
  139. data/test/{tc_xml_properties.rb → tc_properties.rb} +1 -7
  140. data/test/{tc_xml_reader.rb → tc_reader.rb} +1 -1
  141. data/test/tc_relaxng.rb +39 -0
  142. data/test/{tc_xml_sax_parser.rb → tc_sax_parser.rb} +1 -1
  143. data/test/{tc_xml_schema.rb → tc_schema.rb} +1 -1
  144. data/test/{tc_xml_traversal.rb → tc_traversal.rb} +1 -1
  145. data/test/{tc_xml_xinclude.rb → tc_xinclude.rb} +1 -1
  146. data/test/{tc_xml_xpath.rb → tc_xpath.rb} +1 -1
  147. data/test/{tc_xml_xpath_context.rb → tc_xpath_context.rb} +1 -1
  148. data/test/{tc_xml_xpointer.rb → tc_xpointer.rb} +1 -1
  149. data/test/test_suite.rb +26 -25
  150. metadata +100 -100
  151. data/benchmark/depixelate.rb +0 -632
  152. data/benchmark/hamlet.xml +0 -9055
  153. data/benchmark/sock_entries.xml +0 -507
  154. data/benchmark/throughput.rb +0 -39
  155. data/benchmark/xml_benchmarks.rb +0 -227
  156. data/mingw/libiconv-2.dll +0 -0
  157. data/mingw/libxml2-2.dll +0 -0
  158. data/mingw/libxml_ruby.so +0 -0
  159. data/test/gc.log +0 -0
  160. data/vc/libxml_ruby.sln +0 -20
  161. data/vc/libxml_ruby.vcproj +0 -389
@@ -1,449 +1,449 @@
1
- /* $Id: ruby_xml_html_parser.c 461 2008-07-15 21:35:56Z cfis $ */
2
-
3
- /* Please see the LICENSE file for copyright and distribution information */
4
-
5
- #include "ruby_libxml.h"
6
-
7
- VALUE cXMLHTMLParser;
8
-
9
- //static int
10
- //ctxtRead(FILE *f, char * buf, int len) {
11
- // return(fread(buf, 1, len, f));
12
- //}
13
-
14
-
15
- /*
16
- * call-seq:
17
- * parser.filename -> "filename"
18
- *
19
- * Obtain the filename this parser will read from.
20
- */
21
- /*
22
- VALUE
23
- ruby_xml_html_parser_filename_get(VALUE self) {
24
- ruby_xml_html_parser *rxp;
25
- rx_file_data *data;
26
-
27
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
- if (rxp->data == NULL)
29
- return(Qnil);
30
-
31
- if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
- return(Qnil);
33
-
34
- data = (rx_file_data *)rxp->data;
35
- return(data->filename);
36
- }
37
- */
38
-
39
- /*
40
- * call-seq:
41
- * parser.filename = "filename"
42
- *
43
- * Set the filename this parser will read from.
44
- */
45
- /*
46
- VALUE
47
- ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
- ruby_xml_html_parser *rxp;
49
- ruby_xml_parser_context *rxpc;
50
- rx_file_data *data;
51
-
52
- Check_Type(filename, T_STRING);
53
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
-
55
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
- if (rxp->data != NULL)
57
- rb_fatal("crap, this should be null");
58
-
59
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
- data = ALLOC(rx_file_data);
61
- rxp->data = data;
62
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
- return(Qnil);
64
- }
65
-
66
- rxp->ctxt = ruby_xml_parser_context_new();
67
- data = (rx_file_data *)rxp->data;
68
- data->filename = filename;
69
-
70
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
- rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
- if (rxpc->ctxt == NULL)
73
- rb_sys_fail(StringValuePtr(filename));
74
-
75
- return(data->filename);
76
- }
77
- */
78
-
79
- void
80
- ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
- void *data;
82
-
83
- switch(rxp->data_type) {
84
- case RUBY_LIBXML_SRC_TYPE_NULL:
85
- break;
86
- case RUBY_LIBXML_SRC_TYPE_FILE:
87
- data = (void *)(rx_file_data *)rxp->data;
88
- ruby_xfree((rx_file_data *)data);
89
- break;
90
- case RUBY_LIBXML_SRC_TYPE_STRING:
91
- data = (void *)(rx_string_data *)rxp->data;
92
- ruby_xfree((rx_string_data *)data);
93
- break;
94
- case RUBY_LIBXML_SRC_TYPE_IO:
95
- data = (void *)(rx_io_data *)rxp->data;
96
- ruby_xfree((rx_io_data *)data);
97
- break;
98
- default:
99
- rb_fatal("Unknown data type, %d", rxp->data_type);
100
- }
101
-
102
- ruby_xfree(rxp);
103
- }
104
-
105
-
106
- /*
107
- * call-seq:
108
- * parser.io -> IO
109
- *
110
- * Obtain the IO instance this parser works with.
111
- */
112
- /*
113
- VALUE
114
- ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
- ruby_xml_html_parser *rxp;
116
- rx_io_data *data;
117
-
118
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
-
120
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
- rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
- rxp->data == NULL)
123
- return(Qnil);
124
-
125
- data = (rx_io_data *)rxp->data;
126
-
127
- return(data->io);
128
- }
129
- */
130
-
131
- /*
132
- * call-seq:
133
- * parser.io = IO
134
- *
135
- * Set the IO instance this parser works with.
136
- */
137
- /*
138
- VALUE
139
- ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
- ruby_xml_html_parser *rxp;
141
- ruby_xml_parser_context *rxpc;
142
- rx_io_data *data;
143
- OpenFile *fptr;
144
- FILE *f;
145
-
146
- if (!rb_obj_is_kind_of(io, rb_cIO))
147
- rb_raise(rb_eTypeError, "need an IO object");
148
-
149
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
-
151
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
- if (rxp->data != NULL)
153
- rb_fatal("crap, this should be null");
154
-
155
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
- data = ALLOC(rx_io_data);
157
- rxp->data = data;
158
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
- return(Qnil);
160
- }
161
-
162
- rxp->ctxt = ruby_xml_parser_context_new();
163
- data = (rx_io_data *)rxp->data;
164
- data->io = io;
165
-
166
- GetOpenFile(io, fptr);
167
- rb_io_check_readable(fptr);
168
- f = GetWriteFile(fptr);
169
-
170
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
- rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
- (xmlInputReadCallback) ctxtRead,
173
- NULL, f, XML_CHAR_ENCODING_NONE);
174
- if (NIL_P(rxpc->ctxt))
175
- rb_sys_fail(0);
176
-
177
- return(data->io);
178
- }
179
- */
180
-
181
- void
182
- ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
- if (rxp == NULL) return;
184
- if (!NIL_P(rxp->ctxt))
185
- rb_gc_mark(rxp->ctxt);
186
-
187
- rb_gc_mark(LIBXML_STATE);
188
-
189
- switch(rxp->data_type) {
190
- case RUBY_LIBXML_SRC_TYPE_NULL:
191
- break;
192
- case RUBY_LIBXML_SRC_TYPE_FILE:
193
- if (!NIL_P(((rx_file_data *)rxp->data)->filename))
194
- rb_gc_mark(((rx_file_data *)rxp->data)->filename);
195
- break;
196
- case RUBY_LIBXML_SRC_TYPE_STRING:
197
- if (!NIL_P(((rx_string_data *)rxp->data)->str))
198
- rb_gc_mark(((rx_string_data *)rxp->data)->str);
199
- break;
200
- case RUBY_LIBXML_SRC_TYPE_IO:
201
- if (!NIL_P(((rx_io_data *)rxp->data)->io))
202
- rb_gc_mark(((rx_io_data *)rxp->data)->io);
203
- break;
204
- default:
205
- rb_fatal("unknown datatype: %d", rxp->data_type);
206
- }
207
- }
208
-
209
-
210
- /*
211
- * call-seq:
212
- * XML::HTMLParser.new -> parser
213
- *
214
- * Create a new parser instance with no pre-determined source.
215
- */
216
- VALUE
217
- ruby_xml_html_parser_new(VALUE class) {
218
- ruby_xml_html_parser *rxp;
219
-
220
- rxp = ALLOC(ruby_xml_html_parser);
221
- rxp->ctxt = Qnil;
222
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
223
- rxp->data = NULL;
224
- rxp->parsed = 0;
225
-
226
- return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
227
- ruby_xml_html_parser_free, rxp));
228
- }
229
-
230
-
231
- /*
232
- * call-seq:
233
- * XML::HTMLParser.file -> parser
234
- *
235
- * Create a new parser instance that will read the specified file.
236
- */
237
- /*
238
- VALUE
239
- ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
240
- VALUE obj;
241
- ruby_xml_html_parser *rxp;
242
- rx_file_data *data;
243
-
244
- obj = ruby_xml_html_parser_new(class);
245
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
246
-
247
- data = ALLOC(rx_file_data);
248
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
249
- rxp->data = data;
250
-
251
- ruby_xml_html_parser_filename_set(obj, filename);
252
-
253
- return(obj);
254
- }
255
- */
256
-
257
- /*
258
- * call-seq:
259
- * XML::HTMLParser.io -> parser
260
- *
261
- * Create a new parser instance that will read from the
262
- * specified IO object.
263
- */
264
- /*
265
- VALUE
266
- ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
267
- VALUE obj;
268
- ruby_xml_html_parser *rxp;
269
- rx_io_data *data;
270
-
271
- obj = ruby_xml_html_parser_new(class);
272
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
273
-
274
- data = ALLOC(rx_io_data);
275
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
276
- rxp->data = data;
277
-
278
- ruby_xml_html_parser_io_set(obj, io);
279
-
280
- return(obj);
281
- }
282
- */
283
-
284
- /*
285
- * call-seq:
286
- * XML::HTMLParser.string -> parser
287
- *
288
- * Create a new parser instance that will parse the given
289
- * string.
290
- */
291
- VALUE
292
- ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
293
- VALUE obj;
294
- ruby_xml_html_parser *rxp;
295
- rx_string_data *data;
296
-
297
- obj = ruby_xml_html_parser_new(class);
298
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
299
-
300
- data = ALLOC(rx_string_data);
301
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
302
- rxp->data = data;
303
-
304
- ruby_xml_html_parser_str_set(obj, str);
305
-
306
- return(obj);
307
- }
308
-
309
-
310
- /*
311
- * call-seq:
312
- * parser.parse -> document
313
- *
314
- * Parse the input XML and create an XML::Document with
315
- * it's content. If an error occurs, XML::Parser::ParseError
316
- * is thrown.
317
- */
318
- VALUE
319
- ruby_xml_html_parser_parse(VALUE self) {
320
- ruby_xml_html_parser *rxp;
321
- ruby_xml_parser_context *rxpc;
322
- htmlDocPtr xdp;
323
- VALUE doc;
324
-
325
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
326
-
327
- switch (rxp->data_type) {
328
- case RUBY_LIBXML_SRC_TYPE_NULL:
329
- return(Qnil);
330
- case RUBY_LIBXML_SRC_TYPE_STRING:
331
- //case RUBY_LIBXML_SRC_TYPE_FILE:
332
- //case RUBY_LIBXML_SRC_TYPE_IO:
333
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
334
-
335
- /* don't check return values here, the HTML parser returns errors
336
- * but still allows the resulting tree to be used.
337
- */
338
- htmlParseDocument(rxpc->ctxt);
339
- xdp = rxpc->ctxt->myDoc;
340
- rxp->parsed = 1;
341
-
342
- doc = ruby_xml_document_wrap(xdp);
343
- break;
344
- default:
345
- rb_fatal("Unknown data type, %d", rxp->data_type);
346
- }
347
-
348
- return(doc);
349
- }
350
-
351
-
352
- /*
353
- * call-seq:
354
- * parser.context -> context
355
- *
356
- * Obtain the XML::Parser::Context associated with this
357
- * parser.
358
- */
359
- VALUE
360
- ruby_xml_html_parser_context_get(VALUE self) {
361
- ruby_xml_html_parser *rxp;
362
-
363
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
364
- if (rxp->ctxt == Qnil)
365
- return(Qnil);
366
- else
367
- return(rxp->ctxt);
368
- }
369
-
370
-
371
- /*
372
- * call-seq:
373
- * parser.string -> "string"
374
- *
375
- * Obtain the string this parser works with.
376
- */
377
- VALUE
378
- ruby_xml_html_parser_str_get(VALUE self) {
379
- ruby_xml_html_parser *rxp;
380
- rx_string_data *data;
381
-
382
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
383
- if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
384
- return(Qnil);
385
-
386
- data = (rx_string_data *)rxp->data;
387
- return(data->str);
388
- }
389
-
390
-
391
- /*
392
- * call-seq:
393
- * parser.string = "string"
394
- *
395
- * Set the string this parser works with.
396
- */
397
- VALUE
398
- ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
399
- ruby_xml_html_parser *rxp;
400
- ruby_xml_parser_context *rxpc;
401
- rx_string_data *data;
402
-
403
- Check_Type(str, T_STRING);
404
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
405
-
406
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
407
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
408
- data = ALLOC(rx_string_data);
409
- rxp->data = data;
410
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
411
- return(Qnil);
412
- }
413
-
414
- rxp->ctxt = ruby_xml_parser_context_new();
415
- data = (rx_string_data *)rxp->data;
416
- data->str = str;
417
-
418
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
419
- rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
420
-
421
- return(data->str);
422
- }
423
-
424
-
425
- // Rdoc needs to know
426
- #ifdef RDOC_NEVER_DEFINED
427
- mLibXML = rb_define_module("LibXML");
428
- mXML = rb_define_module_under(mLibXML, "XML");
429
- #endif
430
-
431
- void
432
- ruby_init_html_parser(void) {
433
- cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
434
-
435
- rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
436
- rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
437
- /*
438
- rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
439
- rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
440
- rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
- rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
- rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
- rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
- */
445
- rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
- rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
- rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
- rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
- }
1
+ /* $Id: ruby_xml_html_parser.c 461 2008-07-15 21:35:56Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+
7
+ VALUE cXMLHTMLParser;
8
+
9
+ //static int
10
+ //ctxtRead(FILE *f, char * buf, int len) {
11
+ // return(fread(buf, 1, len, f));
12
+ //}
13
+
14
+
15
+ /*
16
+ * call-seq:
17
+ * parser.filename -> "filename"
18
+ *
19
+ * Obtain the filename this parser will read from.
20
+ */
21
+ /*
22
+ VALUE
23
+ ruby_xml_html_parser_filename_get(VALUE self) {
24
+ ruby_xml_html_parser *rxp;
25
+ rx_file_data *data;
26
+
27
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
+ if (rxp->data == NULL)
29
+ return(Qnil);
30
+
31
+ if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
+ return(Qnil);
33
+
34
+ data = (rx_file_data *)rxp->data;
35
+ return(data->filename);
36
+ }
37
+ */
38
+
39
+ /*
40
+ * call-seq:
41
+ * parser.filename = "filename"
42
+ *
43
+ * Set the filename this parser will read from.
44
+ */
45
+ /*
46
+ VALUE
47
+ ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
+ ruby_xml_html_parser *rxp;
49
+ ruby_xml_parser_context *rxpc;
50
+ rx_file_data *data;
51
+
52
+ Check_Type(filename, T_STRING);
53
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
+
55
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
+ if (rxp->data != NULL)
57
+ rb_fatal("crap, this should be null");
58
+
59
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
+ data = ALLOC(rx_file_data);
61
+ rxp->data = data;
62
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
+ return(Qnil);
64
+ }
65
+
66
+ rxp->ctxt = ruby_xml_parser_context_new();
67
+ data = (rx_file_data *)rxp->data;
68
+ data->filename = filename;
69
+
70
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
+ rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
+ if (rxpc->ctxt == NULL)
73
+ rb_sys_fail(StringValuePtr(filename));
74
+
75
+ return(data->filename);
76
+ }
77
+ */
78
+
79
+ void
80
+ ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
+ void *data;
82
+
83
+ switch(rxp->data_type) {
84
+ case RUBY_LIBXML_SRC_TYPE_NULL:
85
+ break;
86
+ case RUBY_LIBXML_SRC_TYPE_FILE:
87
+ data = (void *)(rx_file_data *)rxp->data;
88
+ ruby_xfree((rx_file_data *)data);
89
+ break;
90
+ case RUBY_LIBXML_SRC_TYPE_STRING:
91
+ data = (void *)(rx_string_data *)rxp->data;
92
+ ruby_xfree((rx_string_data *)data);
93
+ break;
94
+ case RUBY_LIBXML_SRC_TYPE_IO:
95
+ data = (void *)(rx_io_data *)rxp->data;
96
+ ruby_xfree((rx_io_data *)data);
97
+ break;
98
+ default:
99
+ rb_fatal("Unknown data type, %d", rxp->data_type);
100
+ }
101
+
102
+ ruby_xfree(rxp);
103
+ }
104
+
105
+
106
+ /*
107
+ * call-seq:
108
+ * parser.io -> IO
109
+ *
110
+ * Obtain the IO instance this parser works with.
111
+ */
112
+ /*
113
+ VALUE
114
+ ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
+ ruby_xml_html_parser *rxp;
116
+ rx_io_data *data;
117
+
118
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
+
120
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
+ rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
+ rxp->data == NULL)
123
+ return(Qnil);
124
+
125
+ data = (rx_io_data *)rxp->data;
126
+
127
+ return(data->io);
128
+ }
129
+ */
130
+
131
+ /*
132
+ * call-seq:
133
+ * parser.io = IO
134
+ *
135
+ * Set the IO instance this parser works with.
136
+ */
137
+ /*
138
+ VALUE
139
+ ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
+ ruby_xml_html_parser *rxp;
141
+ ruby_xml_parser_context *rxpc;
142
+ rx_io_data *data;
143
+ OpenFile *fptr;
144
+ FILE *f;
145
+
146
+ if (!rb_obj_is_kind_of(io, rb_cIO))
147
+ rb_raise(rb_eTypeError, "need an IO object");
148
+
149
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
+
151
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
+ if (rxp->data != NULL)
153
+ rb_fatal("crap, this should be null");
154
+
155
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
+ data = ALLOC(rx_io_data);
157
+ rxp->data = data;
158
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
+ return(Qnil);
160
+ }
161
+
162
+ rxp->ctxt = ruby_xml_parser_context_new();
163
+ data = (rx_io_data *)rxp->data;
164
+ data->io = io;
165
+
166
+ GetOpenFile(io, fptr);
167
+ rb_io_check_readable(fptr);
168
+ f = GetWriteFile(fptr);
169
+
170
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
+ rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
+ (xmlInputReadCallback) ctxtRead,
173
+ NULL, f, XML_CHAR_ENCODING_NONE);
174
+ if (NIL_P(rxpc->ctxt))
175
+ rb_sys_fail(0);
176
+
177
+ return(data->io);
178
+ }
179
+ */
180
+
181
+ void
182
+ ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
+ if (rxp == NULL) return;
184
+ if (!NIL_P(rxp->ctxt))
185
+ rb_gc_mark(rxp->ctxt);
186
+
187
+ rb_gc_mark(LIBXML_STATE);
188
+
189
+ switch(rxp->data_type) {
190
+ case RUBY_LIBXML_SRC_TYPE_NULL:
191
+ break;
192
+ case RUBY_LIBXML_SRC_TYPE_FILE:
193
+ if (!NIL_P(((rx_file_data *)rxp->data)->filename))
194
+ rb_gc_mark(((rx_file_data *)rxp->data)->filename);
195
+ break;
196
+ case RUBY_LIBXML_SRC_TYPE_STRING:
197
+ if (!NIL_P(((rx_string_data *)rxp->data)->str))
198
+ rb_gc_mark(((rx_string_data *)rxp->data)->str);
199
+ break;
200
+ case RUBY_LIBXML_SRC_TYPE_IO:
201
+ if (!NIL_P(((rx_io_data *)rxp->data)->io))
202
+ rb_gc_mark(((rx_io_data *)rxp->data)->io);
203
+ break;
204
+ default:
205
+ rb_fatal("unknown datatype: %d", rxp->data_type);
206
+ }
207
+ }
208
+
209
+
210
+ /*
211
+ * call-seq:
212
+ * XML::HTMLParser.new -> parser
213
+ *
214
+ * Create a new parser instance with no pre-determined source.
215
+ */
216
+ VALUE
217
+ ruby_xml_html_parser_new(VALUE class) {
218
+ ruby_xml_html_parser *rxp;
219
+
220
+ rxp = ALLOC(ruby_xml_html_parser);
221
+ rxp->ctxt = Qnil;
222
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
223
+ rxp->data = NULL;
224
+ rxp->parsed = 0;
225
+
226
+ return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
227
+ ruby_xml_html_parser_free, rxp));
228
+ }
229
+
230
+
231
+ /*
232
+ * call-seq:
233
+ * XML::HTMLParser.file -> parser
234
+ *
235
+ * Create a new parser instance that will read the specified file.
236
+ */
237
+ /*
238
+ VALUE
239
+ ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
240
+ VALUE obj;
241
+ ruby_xml_html_parser *rxp;
242
+ rx_file_data *data;
243
+
244
+ obj = ruby_xml_html_parser_new(class);
245
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
246
+
247
+ data = ALLOC(rx_file_data);
248
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
249
+ rxp->data = data;
250
+
251
+ ruby_xml_html_parser_filename_set(obj, filename);
252
+
253
+ return(obj);
254
+ }
255
+ */
256
+
257
+ /*
258
+ * call-seq:
259
+ * XML::HTMLParser.io -> parser
260
+ *
261
+ * Create a new parser instance that will read from the
262
+ * specified IO object.
263
+ */
264
+ /*
265
+ VALUE
266
+ ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
267
+ VALUE obj;
268
+ ruby_xml_html_parser *rxp;
269
+ rx_io_data *data;
270
+
271
+ obj = ruby_xml_html_parser_new(class);
272
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
273
+
274
+ data = ALLOC(rx_io_data);
275
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
276
+ rxp->data = data;
277
+
278
+ ruby_xml_html_parser_io_set(obj, io);
279
+
280
+ return(obj);
281
+ }
282
+ */
283
+
284
+ /*
285
+ * call-seq:
286
+ * XML::HTMLParser.string -> parser
287
+ *
288
+ * Create a new parser instance that will parse the given
289
+ * string.
290
+ */
291
+ VALUE
292
+ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
293
+ VALUE obj;
294
+ ruby_xml_html_parser *rxp;
295
+ rx_string_data *data;
296
+
297
+ obj = ruby_xml_html_parser_new(class);
298
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
299
+
300
+ data = ALLOC(rx_string_data);
301
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
302
+ rxp->data = data;
303
+
304
+ ruby_xml_html_parser_str_set(obj, str);
305
+
306
+ return(obj);
307
+ }
308
+
309
+
310
+ /*
311
+ * call-seq:
312
+ * parser.parse -> document
313
+ *
314
+ * Parse the input XML and create an XML::Document with
315
+ * it's content. If an error occurs, XML::Parser::ParseError
316
+ * is thrown.
317
+ */
318
+ VALUE
319
+ ruby_xml_html_parser_parse(VALUE self) {
320
+ ruby_xml_html_parser *rxp;
321
+ ruby_xml_parser_context *rxpc;
322
+ htmlDocPtr xdp;
323
+ VALUE doc;
324
+
325
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
326
+
327
+ switch (rxp->data_type) {
328
+ case RUBY_LIBXML_SRC_TYPE_NULL:
329
+ return(Qnil);
330
+ case RUBY_LIBXML_SRC_TYPE_STRING:
331
+ //case RUBY_LIBXML_SRC_TYPE_FILE:
332
+ //case RUBY_LIBXML_SRC_TYPE_IO:
333
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
334
+
335
+ /* don't check return values here, the HTML parser returns errors
336
+ * but still allows the resulting tree to be used.
337
+ */
338
+ htmlParseDocument(rxpc->ctxt);
339
+ xdp = rxpc->ctxt->myDoc;
340
+ rxp->parsed = 1;
341
+
342
+ doc = ruby_xml_document_wrap(xdp);
343
+ break;
344
+ default:
345
+ rb_fatal("Unknown data type, %d", rxp->data_type);
346
+ }
347
+
348
+ return(doc);
349
+ }
350
+
351
+
352
+ /*
353
+ * call-seq:
354
+ * parser.context -> context
355
+ *
356
+ * Obtain the XML::Parser::Context associated with this
357
+ * parser.
358
+ */
359
+ VALUE
360
+ ruby_xml_html_parser_context_get(VALUE self) {
361
+ ruby_xml_html_parser *rxp;
362
+
363
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
364
+ if (rxp->ctxt == Qnil)
365
+ return(Qnil);
366
+ else
367
+ return(rxp->ctxt);
368
+ }
369
+
370
+
371
+ /*
372
+ * call-seq:
373
+ * parser.string -> "string"
374
+ *
375
+ * Obtain the string this parser works with.
376
+ */
377
+ VALUE
378
+ ruby_xml_html_parser_str_get(VALUE self) {
379
+ ruby_xml_html_parser *rxp;
380
+ rx_string_data *data;
381
+
382
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
383
+ if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
384
+ return(Qnil);
385
+
386
+ data = (rx_string_data *)rxp->data;
387
+ return(data->str);
388
+ }
389
+
390
+
391
+ /*
392
+ * call-seq:
393
+ * parser.string = "string"
394
+ *
395
+ * Set the string this parser works with.
396
+ */
397
+ VALUE
398
+ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
399
+ ruby_xml_html_parser *rxp;
400
+ ruby_xml_parser_context *rxpc;
401
+ rx_string_data *data;
402
+
403
+ Check_Type(str, T_STRING);
404
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
405
+
406
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
407
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
408
+ data = ALLOC(rx_string_data);
409
+ rxp->data = data;
410
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
411
+ return(Qnil);
412
+ }
413
+
414
+ rxp->ctxt = ruby_xml_parser_context_new();
415
+ data = (rx_string_data *)rxp->data;
416
+ data->str = str;
417
+
418
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
419
+ rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
420
+
421
+ return(data->str);
422
+ }
423
+
424
+
425
+ // Rdoc needs to know
426
+ #ifdef RDOC_NEVER_DEFINED
427
+ mLibXML = rb_define_module("LibXML");
428
+ mXML = rb_define_module_under(mLibXML, "XML");
429
+ #endif
430
+
431
+ void
432
+ ruby_init_html_parser(void) {
433
+ cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
434
+
435
+ rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
436
+ rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
437
+ /*
438
+ rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
439
+ rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
440
+ rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
+ rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
+ rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
+ rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
+ */
445
+ rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
+ rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
+ rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
+ rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
+ }