libxml-ruby 0.8.1-x86-mswin32-60 → 0.8.2-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. data/CHANGES +21 -0
  2. data/RAKEFILE +213 -0
  3. data/README +32 -11
  4. data/doc/rdoc/classes/LibXML.html +4 -0
  5. data/doc/rdoc/classes/LibXML/XML.html +7 -0
  6. data/doc/rdoc/classes/LibXML/XML/Attr.html +187 -177
  7. data/doc/rdoc/classes/LibXML/XML/Attributes.html +62 -61
  8. data/doc/rdoc/classes/LibXML/XML/Document.html +318 -254
  9. data/doc/rdoc/classes/LibXML/XML/Dtd.html +62 -8
  10. data/doc/rdoc/classes/LibXML/XML/Error.html +6 -0
  11. data/doc/rdoc/classes/LibXML/XML/HTMLParser.html +59 -41
  12. data/doc/rdoc/classes/LibXML/XML/InputCallbacks.html +12 -6
  13. data/doc/rdoc/classes/LibXML/XML/NS.html +65 -50
  14. data/doc/rdoc/classes/LibXML/XML/Node.html +671 -630
  15. data/doc/rdoc/classes/LibXML/XML/Node/FailedModify.html +12 -0
  16. data/doc/rdoc/classes/LibXML/XML/Node/Set.html +64 -60
  17. data/doc/rdoc/classes/LibXML/XML/Node/SetNamespace.html +12 -0
  18. data/doc/rdoc/classes/LibXML/XML/Node/UnknownType.html +12 -0
  19. data/doc/rdoc/classes/LibXML/XML/Parser.html +407 -383
  20. data/doc/rdoc/classes/LibXML/XML/Parser/Context.html +241 -219
  21. data/doc/rdoc/classes/LibXML/XML/Parser/ParseError.html +12 -0
  22. data/doc/rdoc/classes/LibXML/XML/Reader.html +421 -331
  23. data/doc/rdoc/classes/LibXML/XML/RelaxNG.html +237 -0
  24. data/doc/rdoc/classes/LibXML/XML/SaxParser.html +79 -60
  25. data/doc/rdoc/classes/LibXML/XML/Schema.html +109 -22
  26. data/doc/rdoc/classes/LibXML/XML/State.html +6 -0
  27. data/doc/rdoc/classes/LibXML/XML/XInclude.html +6 -0
  28. data/doc/rdoc/classes/LibXML/XML/XInclude/Error.html +6 -0
  29. data/doc/rdoc/classes/LibXML/XML/XPath.html +4 -48
  30. data/doc/rdoc/classes/LibXML/XML/XPath/Context.html +6 -5
  31. data/doc/rdoc/classes/LibXML/XML/XPath/InvalidPath.html +61 -0
  32. data/doc/rdoc/classes/LibXML/XML/XPath/Object.html +12 -8
  33. data/doc/rdoc/classes/LibXML/XML/XPointer.html +14 -8
  34. data/doc/rdoc/classes/LibXML/XML/XPointer/Context.html +6 -0
  35. data/doc/rdoc/classes/LibXML/XML/XPointer/Context/InvalidPath.html +6 -0
  36. data/doc/rdoc/classes/LibXML/XML/XPointer/InvalidExpression.html +13 -0
  37. data/doc/rdoc/created.rid +1 -1
  38. data/doc/rdoc/files/CHANGES.html +38 -10
  39. data/doc/rdoc/files/LICENSE.html +1 -1
  40. data/doc/rdoc/files/README.html +57 -23
  41. data/doc/rdoc/files/VERSION.html +1 -1
  42. data/doc/rdoc/files/ext/libxml/cbg_c.html +1 -1
  43. data/doc/rdoc/files/ext/libxml/libxml_c.html +1 -1
  44. data/doc/rdoc/files/ext/libxml/ruby_xml_attr_c.html +1 -1
  45. data/doc/rdoc/files/ext/libxml/ruby_xml_attributes_c.html +1 -1
  46. data/doc/rdoc/files/ext/libxml/ruby_xml_document_c.html +1 -1
  47. data/doc/rdoc/files/ext/libxml/ruby_xml_dtd_c.html +1 -1
  48. data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_c.html +1 -1
  49. data/doc/rdoc/files/ext/libxml/ruby_xml_input_cbg_c.html +1 -1
  50. data/doc/rdoc/files/ext/libxml/ruby_xml_node_c.html +1 -1
  51. data/doc/rdoc/files/ext/libxml/ruby_xml_node_set_c.html +1 -1
  52. data/doc/rdoc/files/ext/libxml/ruby_xml_ns_c.html +1 -1
  53. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_c.html +1 -1
  54. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_context_c.html +1 -1
  55. data/doc/rdoc/files/ext/libxml/ruby_xml_reader_c.html +1 -1
  56. data/doc/rdoc/{classes/XML.html → files/ext/libxml/ruby_xml_relaxng_c.html} +16 -35
  57. data/doc/rdoc/files/ext/libxml/ruby_xml_sax_parser_c.html +1 -1
  58. data/doc/rdoc/files/ext/libxml/ruby_xml_schema_c.html +1 -1
  59. data/doc/rdoc/files/ext/libxml/ruby_xml_state_c.html +1 -1
  60. data/doc/rdoc/files/ext/libxml/ruby_xml_xinclude_c.html +1 -1
  61. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_c.html +1 -1
  62. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_context_c.html +1 -1
  63. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_object_c.html +1 -1
  64. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_c.html +1 -1
  65. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_context_c.html +1 -1
  66. data/doc/rdoc/files/lib/libxml/attr_rb.html +1 -1
  67. data/doc/rdoc/files/lib/libxml/attributes_rb.html +1 -1
  68. data/doc/rdoc/files/lib/libxml/document_rb.html +1 -1
  69. data/doc/rdoc/files/lib/libxml/node_rb.html +1 -1
  70. data/doc/rdoc/files/lib/libxml/node_set_rb.html +1 -1
  71. data/doc/rdoc/files/lib/libxml/parser_options_rb.html +1 -1
  72. data/doc/rdoc/files/lib/libxml/parser_rb.html +1 -1
  73. data/doc/rdoc/files/lib/libxml/properties_rb.html +1 -1
  74. data/doc/rdoc/files/lib/libxml/tree_rb.html +1 -1
  75. data/doc/rdoc/files/lib/libxml_rb.html +2 -2
  76. data/doc/rdoc/files/lib/xml/libxml_rb.html +12 -2
  77. data/doc/rdoc/files/lib/xml_rb.html +134 -0
  78. data/doc/rdoc/fr_class_index.html +1 -1
  79. data/doc/rdoc/fr_file_index.html +2 -0
  80. data/doc/rdoc/fr_method_index.html +349 -344
  81. data/ext/libxml/libxml.c +2 -1
  82. data/ext/libxml/ruby_libxml.h +1 -0
  83. data/ext/libxml/ruby_xml_attr.c +8 -2
  84. data/ext/libxml/ruby_xml_attributes.c +5 -5
  85. data/ext/libxml/ruby_xml_document.c +41 -2
  86. data/ext/libxml/ruby_xml_dtd.c +169 -169
  87. data/ext/libxml/ruby_xml_html_parser.c +449 -449
  88. data/ext/libxml/ruby_xml_input_cbg.c +165 -165
  89. data/ext/libxml/ruby_xml_node.c +1 -1
  90. data/ext/libxml/ruby_xml_node_set.c +172 -172
  91. data/ext/libxml/ruby_xml_ns.c +145 -145
  92. data/ext/libxml/ruby_xml_parser.c +1398 -1398
  93. data/ext/libxml/ruby_xml_parser_context.c +697 -697
  94. data/ext/libxml/ruby_xml_reader.c +44 -0
  95. data/ext/libxml/ruby_xml_reader.h +14 -14
  96. data/ext/libxml/ruby_xml_relaxng.c +97 -0
  97. data/ext/libxml/ruby_xml_relaxng.h +15 -0
  98. data/ext/libxml/ruby_xml_sax_parser.c +486 -486
  99. data/ext/libxml/ruby_xml_schema.c +1 -1
  100. data/ext/libxml/ruby_xml_state.c +4 -4
  101. data/ext/libxml/ruby_xml_xinclude.c +21 -21
  102. data/ext/libxml/ruby_xml_xpath.c +89 -89
  103. data/ext/libxml/ruby_xml_xpath_context.c +283 -283
  104. data/ext/libxml/ruby_xml_xpath_object.c +5 -5
  105. data/ext/libxml/ruby_xml_xpointer.c +104 -104
  106. data/ext/libxml/ruby_xml_xpointer_context.c +22 -22
  107. data/ext/libxml/version.h +2 -2
  108. data/ext/libxml/version.h.rej +17 -0
  109. data/ext/mingw/Rakefile +41 -0
  110. data/{mingw → lib}/libiconv-2.dll +0 -0
  111. data/lib/libxml.rb +15 -4
  112. data/lib/libxml2-2.dll +0 -0
  113. data/lib/libxml_ruby.dll.a +0 -0
  114. data/lib/libxml_ruby.so +0 -0
  115. data/lib/xml.rb +14 -0
  116. data/lib/xml/libxml.rb +4 -1
  117. data/test/etc_doc_to_s.rb +1 -1
  118. data/test/ets_copy_bug.rb +1 -1
  119. data/test/ets_copy_bug3.rb +1 -1
  120. data/test/ets_doc_file.rb +1 -1
  121. data/test/ets_doc_to_s.rb +1 -1
  122. data/test/ets_node_gc.rb +1 -1
  123. data/test/ets_tsr.rb +1 -1
  124. data/test/model/shiporder.rnc +28 -0
  125. data/test/model/shiporder.rng +86 -0
  126. data/test/{tc_xml_attributes.rb → tc_attributes.rb} +1 -10
  127. data/test/{tc_xml_document.rb → tc_document.rb} +1 -1
  128. data/test/{tc_xml_document_write.rb → tc_document_write.rb} +1 -1
  129. data/test/{tc_xml_dtd.rb → tc_dtd.rb} +1 -1
  130. data/test/{tc_xml_html_parser.rb → tc_html_parser.rb} +1 -1
  131. data/test/{tc_xml_node.rb → tc_node.rb} +1 -1
  132. data/test/{tc_xml_node_attr.rb → tc_node_attr.rb} +1 -1
  133. data/test/{tc_xml_node_cdata.rb → tc_node_cdata.rb} +24 -5
  134. data/test/{tc_xml_node_comment.rb → tc_node_comment.rb} +1 -1
  135. data/test/{tc_xml_node_copy.rb → tc_node_copy.rb} +1 -1
  136. data/test/{tc_xml_node_edit.rb → tc_node_edit.rb} +1 -1
  137. data/test/{tc_xml_node_set.rb → tc_node_set.rb} +1 -1
  138. data/test/{tc_xml_node_set2.rb → tc_node_set2.rb} +1 -1
  139. data/test/{tc_xml_node_text.rb → tc_node_text.rb} +1 -1
  140. data/test/{tc_xml_node_xlink.rb → tc_node_xlink.rb} +2 -2
  141. data/test/{tc_xml_parser.rb → tc_parser.rb} +1 -1
  142. data/test/{tc_xml_parser_context.rb → tc_parser_context.rb} +1 -1
  143. data/test/{tc_xml_properties.rb → tc_properties.rb} +1 -7
  144. data/test/{tc_xml_reader.rb → tc_reader.rb} +1 -1
  145. data/test/tc_relaxng.rb +39 -0
  146. data/test/{tc_xml_sax_parser.rb → tc_sax_parser.rb} +1 -1
  147. data/test/{tc_xml_schema.rb → tc_schema.rb} +1 -1
  148. data/test/{tc_xml_traversal.rb → tc_traversal.rb} +1 -1
  149. data/test/{tc_xml_xinclude.rb → tc_xinclude.rb} +1 -1
  150. data/test/{tc_xml_xpath.rb → tc_xpath.rb} +1 -1
  151. data/test/{tc_xml_xpath_context.rb → tc_xpath_context.rb} +1 -1
  152. data/test/{tc_xml_xpointer.rb → tc_xpointer.rb} +1 -1
  153. data/test/test_suite.rb +26 -25
  154. metadata +103 -100
  155. data/benchmark/depixelate.rb +0 -632
  156. data/benchmark/hamlet.xml +0 -9055
  157. data/benchmark/sock_entries.xml +0 -507
  158. data/benchmark/throughput.rb +0 -39
  159. data/benchmark/xml_benchmarks.rb +0 -227
  160. data/mingw/libxml2-2.dll +0 -0
  161. data/mingw/libxml_ruby.so +0 -0
  162. data/test/gc.log +0 -0
  163. data/vc/libxml_ruby.sln +0 -20
  164. data/vc/libxml_ruby.vcproj +0 -389
@@ -1,449 +1,449 @@
1
- /* $Id: ruby_xml_html_parser.c 461 2008-07-15 21:35:56Z cfis $ */
2
-
3
- /* Please see the LICENSE file for copyright and distribution information */
4
-
5
- #include "ruby_libxml.h"
6
-
7
- VALUE cXMLHTMLParser;
8
-
9
- //static int
10
- //ctxtRead(FILE *f, char * buf, int len) {
11
- // return(fread(buf, 1, len, f));
12
- //}
13
-
14
-
15
- /*
16
- * call-seq:
17
- * parser.filename -> "filename"
18
- *
19
- * Obtain the filename this parser will read from.
20
- */
21
- /*
22
- VALUE
23
- ruby_xml_html_parser_filename_get(VALUE self) {
24
- ruby_xml_html_parser *rxp;
25
- rx_file_data *data;
26
-
27
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
- if (rxp->data == NULL)
29
- return(Qnil);
30
-
31
- if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
- return(Qnil);
33
-
34
- data = (rx_file_data *)rxp->data;
35
- return(data->filename);
36
- }
37
- */
38
-
39
- /*
40
- * call-seq:
41
- * parser.filename = "filename"
42
- *
43
- * Set the filename this parser will read from.
44
- */
45
- /*
46
- VALUE
47
- ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
- ruby_xml_html_parser *rxp;
49
- ruby_xml_parser_context *rxpc;
50
- rx_file_data *data;
51
-
52
- Check_Type(filename, T_STRING);
53
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
-
55
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
- if (rxp->data != NULL)
57
- rb_fatal("crap, this should be null");
58
-
59
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
- data = ALLOC(rx_file_data);
61
- rxp->data = data;
62
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
- return(Qnil);
64
- }
65
-
66
- rxp->ctxt = ruby_xml_parser_context_new();
67
- data = (rx_file_data *)rxp->data;
68
- data->filename = filename;
69
-
70
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
- rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
- if (rxpc->ctxt == NULL)
73
- rb_sys_fail(StringValuePtr(filename));
74
-
75
- return(data->filename);
76
- }
77
- */
78
-
79
- void
80
- ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
- void *data;
82
-
83
- switch(rxp->data_type) {
84
- case RUBY_LIBXML_SRC_TYPE_NULL:
85
- break;
86
- case RUBY_LIBXML_SRC_TYPE_FILE:
87
- data = (void *)(rx_file_data *)rxp->data;
88
- ruby_xfree((rx_file_data *)data);
89
- break;
90
- case RUBY_LIBXML_SRC_TYPE_STRING:
91
- data = (void *)(rx_string_data *)rxp->data;
92
- ruby_xfree((rx_string_data *)data);
93
- break;
94
- case RUBY_LIBXML_SRC_TYPE_IO:
95
- data = (void *)(rx_io_data *)rxp->data;
96
- ruby_xfree((rx_io_data *)data);
97
- break;
98
- default:
99
- rb_fatal("Unknown data type, %d", rxp->data_type);
100
- }
101
-
102
- ruby_xfree(rxp);
103
- }
104
-
105
-
106
- /*
107
- * call-seq:
108
- * parser.io -> IO
109
- *
110
- * Obtain the IO instance this parser works with.
111
- */
112
- /*
113
- VALUE
114
- ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
- ruby_xml_html_parser *rxp;
116
- rx_io_data *data;
117
-
118
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
-
120
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
- rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
- rxp->data == NULL)
123
- return(Qnil);
124
-
125
- data = (rx_io_data *)rxp->data;
126
-
127
- return(data->io);
128
- }
129
- */
130
-
131
- /*
132
- * call-seq:
133
- * parser.io = IO
134
- *
135
- * Set the IO instance this parser works with.
136
- */
137
- /*
138
- VALUE
139
- ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
- ruby_xml_html_parser *rxp;
141
- ruby_xml_parser_context *rxpc;
142
- rx_io_data *data;
143
- OpenFile *fptr;
144
- FILE *f;
145
-
146
- if (!rb_obj_is_kind_of(io, rb_cIO))
147
- rb_raise(rb_eTypeError, "need an IO object");
148
-
149
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
-
151
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
- if (rxp->data != NULL)
153
- rb_fatal("crap, this should be null");
154
-
155
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
- data = ALLOC(rx_io_data);
157
- rxp->data = data;
158
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
- return(Qnil);
160
- }
161
-
162
- rxp->ctxt = ruby_xml_parser_context_new();
163
- data = (rx_io_data *)rxp->data;
164
- data->io = io;
165
-
166
- GetOpenFile(io, fptr);
167
- rb_io_check_readable(fptr);
168
- f = GetWriteFile(fptr);
169
-
170
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
- rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
- (xmlInputReadCallback) ctxtRead,
173
- NULL, f, XML_CHAR_ENCODING_NONE);
174
- if (NIL_P(rxpc->ctxt))
175
- rb_sys_fail(0);
176
-
177
- return(data->io);
178
- }
179
- */
180
-
181
- void
182
- ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
- if (rxp == NULL) return;
184
- if (!NIL_P(rxp->ctxt))
185
- rb_gc_mark(rxp->ctxt);
186
-
187
- rb_gc_mark(LIBXML_STATE);
188
-
189
- switch(rxp->data_type) {
190
- case RUBY_LIBXML_SRC_TYPE_NULL:
191
- break;
192
- case RUBY_LIBXML_SRC_TYPE_FILE:
193
- if (!NIL_P(((rx_file_data *)rxp->data)->filename))
194
- rb_gc_mark(((rx_file_data *)rxp->data)->filename);
195
- break;
196
- case RUBY_LIBXML_SRC_TYPE_STRING:
197
- if (!NIL_P(((rx_string_data *)rxp->data)->str))
198
- rb_gc_mark(((rx_string_data *)rxp->data)->str);
199
- break;
200
- case RUBY_LIBXML_SRC_TYPE_IO:
201
- if (!NIL_P(((rx_io_data *)rxp->data)->io))
202
- rb_gc_mark(((rx_io_data *)rxp->data)->io);
203
- break;
204
- default:
205
- rb_fatal("unknown datatype: %d", rxp->data_type);
206
- }
207
- }
208
-
209
-
210
- /*
211
- * call-seq:
212
- * XML::HTMLParser.new -> parser
213
- *
214
- * Create a new parser instance with no pre-determined source.
215
- */
216
- VALUE
217
- ruby_xml_html_parser_new(VALUE class) {
218
- ruby_xml_html_parser *rxp;
219
-
220
- rxp = ALLOC(ruby_xml_html_parser);
221
- rxp->ctxt = Qnil;
222
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
223
- rxp->data = NULL;
224
- rxp->parsed = 0;
225
-
226
- return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
227
- ruby_xml_html_parser_free, rxp));
228
- }
229
-
230
-
231
- /*
232
- * call-seq:
233
- * XML::HTMLParser.file -> parser
234
- *
235
- * Create a new parser instance that will read the specified file.
236
- */
237
- /*
238
- VALUE
239
- ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
240
- VALUE obj;
241
- ruby_xml_html_parser *rxp;
242
- rx_file_data *data;
243
-
244
- obj = ruby_xml_html_parser_new(class);
245
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
246
-
247
- data = ALLOC(rx_file_data);
248
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
249
- rxp->data = data;
250
-
251
- ruby_xml_html_parser_filename_set(obj, filename);
252
-
253
- return(obj);
254
- }
255
- */
256
-
257
- /*
258
- * call-seq:
259
- * XML::HTMLParser.io -> parser
260
- *
261
- * Create a new parser instance that will read from the
262
- * specified IO object.
263
- */
264
- /*
265
- VALUE
266
- ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
267
- VALUE obj;
268
- ruby_xml_html_parser *rxp;
269
- rx_io_data *data;
270
-
271
- obj = ruby_xml_html_parser_new(class);
272
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
273
-
274
- data = ALLOC(rx_io_data);
275
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
276
- rxp->data = data;
277
-
278
- ruby_xml_html_parser_io_set(obj, io);
279
-
280
- return(obj);
281
- }
282
- */
283
-
284
- /*
285
- * call-seq:
286
- * XML::HTMLParser.string -> parser
287
- *
288
- * Create a new parser instance that will parse the given
289
- * string.
290
- */
291
- VALUE
292
- ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
293
- VALUE obj;
294
- ruby_xml_html_parser *rxp;
295
- rx_string_data *data;
296
-
297
- obj = ruby_xml_html_parser_new(class);
298
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
299
-
300
- data = ALLOC(rx_string_data);
301
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
302
- rxp->data = data;
303
-
304
- ruby_xml_html_parser_str_set(obj, str);
305
-
306
- return(obj);
307
- }
308
-
309
-
310
- /*
311
- * call-seq:
312
- * parser.parse -> document
313
- *
314
- * Parse the input XML and create an XML::Document with
315
- * it's content. If an error occurs, XML::Parser::ParseError
316
- * is thrown.
317
- */
318
- VALUE
319
- ruby_xml_html_parser_parse(VALUE self) {
320
- ruby_xml_html_parser *rxp;
321
- ruby_xml_parser_context *rxpc;
322
- htmlDocPtr xdp;
323
- VALUE doc;
324
-
325
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
326
-
327
- switch (rxp->data_type) {
328
- case RUBY_LIBXML_SRC_TYPE_NULL:
329
- return(Qnil);
330
- case RUBY_LIBXML_SRC_TYPE_STRING:
331
- //case RUBY_LIBXML_SRC_TYPE_FILE:
332
- //case RUBY_LIBXML_SRC_TYPE_IO:
333
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
334
-
335
- /* don't check return values here, the HTML parser returns errors
336
- * but still allows the resulting tree to be used.
337
- */
338
- htmlParseDocument(rxpc->ctxt);
339
- xdp = rxpc->ctxt->myDoc;
340
- rxp->parsed = 1;
341
-
342
- doc = ruby_xml_document_wrap(xdp);
343
- break;
344
- default:
345
- rb_fatal("Unknown data type, %d", rxp->data_type);
346
- }
347
-
348
- return(doc);
349
- }
350
-
351
-
352
- /*
353
- * call-seq:
354
- * parser.context -> context
355
- *
356
- * Obtain the XML::Parser::Context associated with this
357
- * parser.
358
- */
359
- VALUE
360
- ruby_xml_html_parser_context_get(VALUE self) {
361
- ruby_xml_html_parser *rxp;
362
-
363
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
364
- if (rxp->ctxt == Qnil)
365
- return(Qnil);
366
- else
367
- return(rxp->ctxt);
368
- }
369
-
370
-
371
- /*
372
- * call-seq:
373
- * parser.string -> "string"
374
- *
375
- * Obtain the string this parser works with.
376
- */
377
- VALUE
378
- ruby_xml_html_parser_str_get(VALUE self) {
379
- ruby_xml_html_parser *rxp;
380
- rx_string_data *data;
381
-
382
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
383
- if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
384
- return(Qnil);
385
-
386
- data = (rx_string_data *)rxp->data;
387
- return(data->str);
388
- }
389
-
390
-
391
- /*
392
- * call-seq:
393
- * parser.string = "string"
394
- *
395
- * Set the string this parser works with.
396
- */
397
- VALUE
398
- ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
399
- ruby_xml_html_parser *rxp;
400
- ruby_xml_parser_context *rxpc;
401
- rx_string_data *data;
402
-
403
- Check_Type(str, T_STRING);
404
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
405
-
406
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
407
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
408
- data = ALLOC(rx_string_data);
409
- rxp->data = data;
410
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
411
- return(Qnil);
412
- }
413
-
414
- rxp->ctxt = ruby_xml_parser_context_new();
415
- data = (rx_string_data *)rxp->data;
416
- data->str = str;
417
-
418
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
419
- rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
420
-
421
- return(data->str);
422
- }
423
-
424
-
425
- // Rdoc needs to know
426
- #ifdef RDOC_NEVER_DEFINED
427
- mLibXML = rb_define_module("LibXML");
428
- mXML = rb_define_module_under(mLibXML, "XML");
429
- #endif
430
-
431
- void
432
- ruby_init_html_parser(void) {
433
- cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
434
-
435
- rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
436
- rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
437
- /*
438
- rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
439
- rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
440
- rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
- rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
- rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
- rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
- */
445
- rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
- rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
- rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
- rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
- }
1
+ /* $Id: ruby_xml_html_parser.c 461 2008-07-15 21:35:56Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+
7
+ VALUE cXMLHTMLParser;
8
+
9
+ //static int
10
+ //ctxtRead(FILE *f, char * buf, int len) {
11
+ // return(fread(buf, 1, len, f));
12
+ //}
13
+
14
+
15
+ /*
16
+ * call-seq:
17
+ * parser.filename -> "filename"
18
+ *
19
+ * Obtain the filename this parser will read from.
20
+ */
21
+ /*
22
+ VALUE
23
+ ruby_xml_html_parser_filename_get(VALUE self) {
24
+ ruby_xml_html_parser *rxp;
25
+ rx_file_data *data;
26
+
27
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
+ if (rxp->data == NULL)
29
+ return(Qnil);
30
+
31
+ if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
+ return(Qnil);
33
+
34
+ data = (rx_file_data *)rxp->data;
35
+ return(data->filename);
36
+ }
37
+ */
38
+
39
+ /*
40
+ * call-seq:
41
+ * parser.filename = "filename"
42
+ *
43
+ * Set the filename this parser will read from.
44
+ */
45
+ /*
46
+ VALUE
47
+ ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
+ ruby_xml_html_parser *rxp;
49
+ ruby_xml_parser_context *rxpc;
50
+ rx_file_data *data;
51
+
52
+ Check_Type(filename, T_STRING);
53
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
+
55
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
+ if (rxp->data != NULL)
57
+ rb_fatal("crap, this should be null");
58
+
59
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
+ data = ALLOC(rx_file_data);
61
+ rxp->data = data;
62
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
+ return(Qnil);
64
+ }
65
+
66
+ rxp->ctxt = ruby_xml_parser_context_new();
67
+ data = (rx_file_data *)rxp->data;
68
+ data->filename = filename;
69
+
70
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
+ rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
+ if (rxpc->ctxt == NULL)
73
+ rb_sys_fail(StringValuePtr(filename));
74
+
75
+ return(data->filename);
76
+ }
77
+ */
78
+
79
+ void
80
+ ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
+ void *data;
82
+
83
+ switch(rxp->data_type) {
84
+ case RUBY_LIBXML_SRC_TYPE_NULL:
85
+ break;
86
+ case RUBY_LIBXML_SRC_TYPE_FILE:
87
+ data = (void *)(rx_file_data *)rxp->data;
88
+ ruby_xfree((rx_file_data *)data);
89
+ break;
90
+ case RUBY_LIBXML_SRC_TYPE_STRING:
91
+ data = (void *)(rx_string_data *)rxp->data;
92
+ ruby_xfree((rx_string_data *)data);
93
+ break;
94
+ case RUBY_LIBXML_SRC_TYPE_IO:
95
+ data = (void *)(rx_io_data *)rxp->data;
96
+ ruby_xfree((rx_io_data *)data);
97
+ break;
98
+ default:
99
+ rb_fatal("Unknown data type, %d", rxp->data_type);
100
+ }
101
+
102
+ ruby_xfree(rxp);
103
+ }
104
+
105
+
106
+ /*
107
+ * call-seq:
108
+ * parser.io -> IO
109
+ *
110
+ * Obtain the IO instance this parser works with.
111
+ */
112
+ /*
113
+ VALUE
114
+ ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
+ ruby_xml_html_parser *rxp;
116
+ rx_io_data *data;
117
+
118
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
+
120
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
+ rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
+ rxp->data == NULL)
123
+ return(Qnil);
124
+
125
+ data = (rx_io_data *)rxp->data;
126
+
127
+ return(data->io);
128
+ }
129
+ */
130
+
131
+ /*
132
+ * call-seq:
133
+ * parser.io = IO
134
+ *
135
+ * Set the IO instance this parser works with.
136
+ */
137
+ /*
138
+ VALUE
139
+ ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
+ ruby_xml_html_parser *rxp;
141
+ ruby_xml_parser_context *rxpc;
142
+ rx_io_data *data;
143
+ OpenFile *fptr;
144
+ FILE *f;
145
+
146
+ if (!rb_obj_is_kind_of(io, rb_cIO))
147
+ rb_raise(rb_eTypeError, "need an IO object");
148
+
149
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
+
151
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
+ if (rxp->data != NULL)
153
+ rb_fatal("crap, this should be null");
154
+
155
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
+ data = ALLOC(rx_io_data);
157
+ rxp->data = data;
158
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
+ return(Qnil);
160
+ }
161
+
162
+ rxp->ctxt = ruby_xml_parser_context_new();
163
+ data = (rx_io_data *)rxp->data;
164
+ data->io = io;
165
+
166
+ GetOpenFile(io, fptr);
167
+ rb_io_check_readable(fptr);
168
+ f = GetWriteFile(fptr);
169
+
170
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
+ rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
+ (xmlInputReadCallback) ctxtRead,
173
+ NULL, f, XML_CHAR_ENCODING_NONE);
174
+ if (NIL_P(rxpc->ctxt))
175
+ rb_sys_fail(0);
176
+
177
+ return(data->io);
178
+ }
179
+ */
180
+
181
+ void
182
+ ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
+ if (rxp == NULL) return;
184
+ if (!NIL_P(rxp->ctxt))
185
+ rb_gc_mark(rxp->ctxt);
186
+
187
+ rb_gc_mark(LIBXML_STATE);
188
+
189
+ switch(rxp->data_type) {
190
+ case RUBY_LIBXML_SRC_TYPE_NULL:
191
+ break;
192
+ case RUBY_LIBXML_SRC_TYPE_FILE:
193
+ if (!NIL_P(((rx_file_data *)rxp->data)->filename))
194
+ rb_gc_mark(((rx_file_data *)rxp->data)->filename);
195
+ break;
196
+ case RUBY_LIBXML_SRC_TYPE_STRING:
197
+ if (!NIL_P(((rx_string_data *)rxp->data)->str))
198
+ rb_gc_mark(((rx_string_data *)rxp->data)->str);
199
+ break;
200
+ case RUBY_LIBXML_SRC_TYPE_IO:
201
+ if (!NIL_P(((rx_io_data *)rxp->data)->io))
202
+ rb_gc_mark(((rx_io_data *)rxp->data)->io);
203
+ break;
204
+ default:
205
+ rb_fatal("unknown datatype: %d", rxp->data_type);
206
+ }
207
+ }
208
+
209
+
210
+ /*
211
+ * call-seq:
212
+ * XML::HTMLParser.new -> parser
213
+ *
214
+ * Create a new parser instance with no pre-determined source.
215
+ */
216
+ VALUE
217
+ ruby_xml_html_parser_new(VALUE class) {
218
+ ruby_xml_html_parser *rxp;
219
+
220
+ rxp = ALLOC(ruby_xml_html_parser);
221
+ rxp->ctxt = Qnil;
222
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
223
+ rxp->data = NULL;
224
+ rxp->parsed = 0;
225
+
226
+ return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
227
+ ruby_xml_html_parser_free, rxp));
228
+ }
229
+
230
+
231
+ /*
232
+ * call-seq:
233
+ * XML::HTMLParser.file -> parser
234
+ *
235
+ * Create a new parser instance that will read the specified file.
236
+ */
237
+ /*
238
+ VALUE
239
+ ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
240
+ VALUE obj;
241
+ ruby_xml_html_parser *rxp;
242
+ rx_file_data *data;
243
+
244
+ obj = ruby_xml_html_parser_new(class);
245
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
246
+
247
+ data = ALLOC(rx_file_data);
248
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
249
+ rxp->data = data;
250
+
251
+ ruby_xml_html_parser_filename_set(obj, filename);
252
+
253
+ return(obj);
254
+ }
255
+ */
256
+
257
+ /*
258
+ * call-seq:
259
+ * XML::HTMLParser.io -> parser
260
+ *
261
+ * Create a new parser instance that will read from the
262
+ * specified IO object.
263
+ */
264
+ /*
265
+ VALUE
266
+ ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
267
+ VALUE obj;
268
+ ruby_xml_html_parser *rxp;
269
+ rx_io_data *data;
270
+
271
+ obj = ruby_xml_html_parser_new(class);
272
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
273
+
274
+ data = ALLOC(rx_io_data);
275
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
276
+ rxp->data = data;
277
+
278
+ ruby_xml_html_parser_io_set(obj, io);
279
+
280
+ return(obj);
281
+ }
282
+ */
283
+
284
+ /*
285
+ * call-seq:
286
+ * XML::HTMLParser.string -> parser
287
+ *
288
+ * Create a new parser instance that will parse the given
289
+ * string.
290
+ */
291
+ VALUE
292
+ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
293
+ VALUE obj;
294
+ ruby_xml_html_parser *rxp;
295
+ rx_string_data *data;
296
+
297
+ obj = ruby_xml_html_parser_new(class);
298
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
299
+
300
+ data = ALLOC(rx_string_data);
301
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
302
+ rxp->data = data;
303
+
304
+ ruby_xml_html_parser_str_set(obj, str);
305
+
306
+ return(obj);
307
+ }
308
+
309
+
310
+ /*
311
+ * call-seq:
312
+ * parser.parse -> document
313
+ *
314
+ * Parse the input XML and create an XML::Document with
315
+ * it's content. If an error occurs, XML::Parser::ParseError
316
+ * is thrown.
317
+ */
318
+ VALUE
319
+ ruby_xml_html_parser_parse(VALUE self) {
320
+ ruby_xml_html_parser *rxp;
321
+ ruby_xml_parser_context *rxpc;
322
+ htmlDocPtr xdp;
323
+ VALUE doc;
324
+
325
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
326
+
327
+ switch (rxp->data_type) {
328
+ case RUBY_LIBXML_SRC_TYPE_NULL:
329
+ return(Qnil);
330
+ case RUBY_LIBXML_SRC_TYPE_STRING:
331
+ //case RUBY_LIBXML_SRC_TYPE_FILE:
332
+ //case RUBY_LIBXML_SRC_TYPE_IO:
333
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
334
+
335
+ /* don't check return values here, the HTML parser returns errors
336
+ * but still allows the resulting tree to be used.
337
+ */
338
+ htmlParseDocument(rxpc->ctxt);
339
+ xdp = rxpc->ctxt->myDoc;
340
+ rxp->parsed = 1;
341
+
342
+ doc = ruby_xml_document_wrap(xdp);
343
+ break;
344
+ default:
345
+ rb_fatal("Unknown data type, %d", rxp->data_type);
346
+ }
347
+
348
+ return(doc);
349
+ }
350
+
351
+
352
+ /*
353
+ * call-seq:
354
+ * parser.context -> context
355
+ *
356
+ * Obtain the XML::Parser::Context associated with this
357
+ * parser.
358
+ */
359
+ VALUE
360
+ ruby_xml_html_parser_context_get(VALUE self) {
361
+ ruby_xml_html_parser *rxp;
362
+
363
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
364
+ if (rxp->ctxt == Qnil)
365
+ return(Qnil);
366
+ else
367
+ return(rxp->ctxt);
368
+ }
369
+
370
+
371
+ /*
372
+ * call-seq:
373
+ * parser.string -> "string"
374
+ *
375
+ * Obtain the string this parser works with.
376
+ */
377
+ VALUE
378
+ ruby_xml_html_parser_str_get(VALUE self) {
379
+ ruby_xml_html_parser *rxp;
380
+ rx_string_data *data;
381
+
382
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
383
+ if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
384
+ return(Qnil);
385
+
386
+ data = (rx_string_data *)rxp->data;
387
+ return(data->str);
388
+ }
389
+
390
+
391
+ /*
392
+ * call-seq:
393
+ * parser.string = "string"
394
+ *
395
+ * Set the string this parser works with.
396
+ */
397
+ VALUE
398
+ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
399
+ ruby_xml_html_parser *rxp;
400
+ ruby_xml_parser_context *rxpc;
401
+ rx_string_data *data;
402
+
403
+ Check_Type(str, T_STRING);
404
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
405
+
406
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
407
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
408
+ data = ALLOC(rx_string_data);
409
+ rxp->data = data;
410
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
411
+ return(Qnil);
412
+ }
413
+
414
+ rxp->ctxt = ruby_xml_parser_context_new();
415
+ data = (rx_string_data *)rxp->data;
416
+ data->str = str;
417
+
418
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
419
+ rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
420
+
421
+ return(data->str);
422
+ }
423
+
424
+
425
+ // Rdoc needs to know
426
+ #ifdef RDOC_NEVER_DEFINED
427
+ mLibXML = rb_define_module("LibXML");
428
+ mXML = rb_define_module_under(mLibXML, "XML");
429
+ #endif
430
+
431
+ void
432
+ ruby_init_html_parser(void) {
433
+ cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
434
+
435
+ rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
436
+ rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
437
+ /*
438
+ rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
439
+ rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
440
+ rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
+ rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
+ rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
+ rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
+ */
445
+ rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
+ rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
+ rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
+ rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
+ }