libxml-ruby 0.3.8.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +6 -0
- data/LICENSE +1 -1
- data/README +1 -1
- data/Rakefile +8 -5
- data/TODO +1 -1
- data/ext/xml/extconf.rb +4 -5
- data/ext/xml/libxml.c +5 -2
- data/ext/xml/libxml.h +16 -7
- data/ext/xml/libxml.rb +3 -3
- data/ext/xml/ruby_xml_attr.c +118 -99
- data/ext/xml/ruby_xml_attr.h +4 -7
- data/ext/xml/ruby_xml_document.c +131 -170
- data/ext/xml/ruby_xml_document.h +5 -9
- data/ext/xml/ruby_xml_html_parser.c +453 -0
- data/ext/xml/ruby_xml_html_parser.h +29 -0
- data/ext/xml/ruby_xml_node.c +219 -253
- data/ext/xml/ruby_xml_node.h +4 -7
- data/ext/xml/ruby_xml_node_set.c +6 -6
- data/ext/xml/ruby_xml_node_set.h +1 -1
- data/ext/xml/ruby_xml_ns.c +1 -1
- data/ext/xml/ruby_xml_ns.h +1 -1
- data/ext/xml/ruby_xml_parser.c +5 -8
- data/ext/xml/ruby_xml_parser.h +1 -1
- data/ext/xml/ruby_xml_parser_context.c +3 -4
- data/ext/xml/ruby_xml_parser_context.h +1 -1
- data/ext/xml/ruby_xml_reader.c +893 -0
- data/ext/xml/ruby_xml_reader.h +14 -0
- data/ext/xml/ruby_xml_sax_parser.c +255 -204
- data/ext/xml/ruby_xml_sax_parser.h +6 -2
- data/ext/xml/ruby_xml_tree.c +1 -1
- data/ext/xml/ruby_xml_tree.h +1 -1
- data/ext/xml/ruby_xml_xinclude.c +1 -1
- data/ext/xml/ruby_xml_xinclude.h +1 -1
- data/ext/xml/ruby_xml_xpath.c +3 -2
- data/ext/xml/ruby_xml_xpath.h +1 -1
- data/ext/xml/ruby_xml_xpath_context.c +4 -4
- data/ext/xml/ruby_xml_xpath_context.h +1 -1
- data/ext/xml/ruby_xml_xpointer.c +10 -4
- data/ext/xml/ruby_xml_xpointer.h +1 -1
- data/ext/xml/ruby_xml_xpointer_context.c +1 -1
- data/ext/xml/ruby_xml_xpointer_context.h +1 -1
- data/ext/xml/sax_parser_callbacks.inc +55 -54
- data/tests/model/rubynet_project +1 -1
- data/tests/model/simple.xml +7 -0
- data/tests/tc_xml_document.rb +1 -1
- data/tests/tc_xml_document_write.rb +1 -1
- data/tests/tc_xml_document_write2.rb +1 -1
- data/tests/tc_xml_document_write3.rb +1 -1
- data/tests/tc_xml_html_parser.rb +60 -0
- data/tests/tc_xml_node.rb +1 -1
- data/tests/tc_xml_node2.rb +1 -1
- data/tests/tc_xml_node3.rb +1 -1
- data/tests/tc_xml_node4.rb +8 -5
- data/tests/tc_xml_node5.rb +1 -1
- data/tests/tc_xml_node6.rb +1 -1
- data/tests/tc_xml_node7.rb +1 -1
- data/tests/tc_xml_node_set.rb +1 -1
- data/tests/tc_xml_node_set2.rb +1 -1
- data/tests/tc_xml_node_xlink.rb +1 -1
- data/tests/tc_xml_parser.rb +5 -1
- data/tests/tc_xml_parser2.rb +1 -1
- data/tests/tc_xml_parser3.rb +1 -1
- data/tests/tc_xml_parser4.rb +1 -1
- data/tests/tc_xml_parser5.rb +1 -1
- data/tests/tc_xml_parser6.rb +1 -1
- data/tests/tc_xml_parser7.rb +1 -1
- data/tests/tc_xml_parser8.rb +1 -1
- data/tests/tc_xml_parser_context.rb +1 -1
- data/tests/tc_xml_reader.rb +101 -0
- data/tests/tc_xml_sax_parser.rb +95 -0
- data/tests/tc_xml_xinclude.rb +1 -1
- data/tests/tc_xml_xpath.rb +1 -1
- data/tests/tc_xml_xpointer.rb +1 -1
- metadata +79 -73
- data/ext/xml/ruby_xml_attribute.c +0 -224
- data/ext/xml/ruby_xml_attribute.h +0 -21
- data/tests/test_xml_sax_parser.rb +0 -64
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
/* $Id: ruby_xml_html_parser.c 138 2007-08-29 18:00:35Z danj $ */
|
|
2
|
+
|
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
|
4
|
+
|
|
5
|
+
#include "libxml.h"
|
|
6
|
+
|
|
7
|
+
VALUE cXMLHTMLParser;
|
|
8
|
+
|
|
9
|
+
//static int
|
|
10
|
+
//ctxtRead(FILE *f, char * buf, int len) {
|
|
11
|
+
// return(fread(buf, 1, len, f));
|
|
12
|
+
//}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
* call-seq:
|
|
17
|
+
* parser.filename => "filename"
|
|
18
|
+
*
|
|
19
|
+
* Obtain the filename this parser will read from.
|
|
20
|
+
*/
|
|
21
|
+
/*
|
|
22
|
+
VALUE
|
|
23
|
+
ruby_xml_html_parser_filename_get(VALUE self) {
|
|
24
|
+
ruby_xml_html_parser *rxp;
|
|
25
|
+
rx_file_data *data;
|
|
26
|
+
|
|
27
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
|
28
|
+
if (rxp->data == NULL)
|
|
29
|
+
return(Qnil);
|
|
30
|
+
|
|
31
|
+
if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
|
|
32
|
+
return(Qnil);
|
|
33
|
+
|
|
34
|
+
data = (rx_file_data *)rxp->data;
|
|
35
|
+
return(data->filename);
|
|
36
|
+
}
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
/*
|
|
40
|
+
* call-seq:
|
|
41
|
+
* parser.filename = "filename"
|
|
42
|
+
*
|
|
43
|
+
* Set the filename this parser will read from.
|
|
44
|
+
*/
|
|
45
|
+
/*
|
|
46
|
+
VALUE
|
|
47
|
+
ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
|
|
48
|
+
ruby_xml_html_parser *rxp;
|
|
49
|
+
ruby_xml_parser_context *rxpc;
|
|
50
|
+
rx_file_data *data;
|
|
51
|
+
|
|
52
|
+
Check_Type(filename, T_STRING);
|
|
53
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
|
54
|
+
|
|
55
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
|
56
|
+
if (rxp->data != NULL)
|
|
57
|
+
rb_fatal("crap, this should be null");
|
|
58
|
+
|
|
59
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
|
60
|
+
data = ALLOC(rx_file_data);
|
|
61
|
+
rxp->data = data;
|
|
62
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
|
|
63
|
+
return(Qnil);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
rxp->ctxt = ruby_xml_parser_context_new3();
|
|
67
|
+
data = (rx_file_data *)rxp->data;
|
|
68
|
+
data->filename = filename;
|
|
69
|
+
|
|
70
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
|
71
|
+
rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
|
|
72
|
+
if (rxpc->ctxt == NULL)
|
|
73
|
+
rb_sys_fail(StringValuePtr(filename));
|
|
74
|
+
|
|
75
|
+
return(data->filename);
|
|
76
|
+
}
|
|
77
|
+
*/
|
|
78
|
+
|
|
79
|
+
void
|
|
80
|
+
ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
|
|
81
|
+
void *data;
|
|
82
|
+
|
|
83
|
+
ruby_xml_parser_count--;
|
|
84
|
+
if (ruby_xml_parser_count == 0)
|
|
85
|
+
xmlCleanupParser();
|
|
86
|
+
|
|
87
|
+
switch(rxp->data_type) {
|
|
88
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
|
89
|
+
break;
|
|
90
|
+
case RUBY_LIBXML_SRC_TYPE_FILE:
|
|
91
|
+
data = (void *)(rx_file_data *)rxp->data;
|
|
92
|
+
free((rx_file_data *)data);
|
|
93
|
+
break;
|
|
94
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
|
95
|
+
data = (void *)(rx_string_data *)rxp->data;
|
|
96
|
+
free((rx_string_data *)data);
|
|
97
|
+
break;
|
|
98
|
+
case RUBY_LIBXML_SRC_TYPE_IO:
|
|
99
|
+
data = (void *)(rx_io_data *)rxp->data;
|
|
100
|
+
free((rx_io_data *)data);
|
|
101
|
+
break;
|
|
102
|
+
default:
|
|
103
|
+
rb_fatal("Unknown data type, %d", rxp->data_type);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
free(rxp);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
/*
|
|
111
|
+
* call-seq:
|
|
112
|
+
* parser.io => IO
|
|
113
|
+
*
|
|
114
|
+
* Obtain the IO instance this parser works with.
|
|
115
|
+
*/
|
|
116
|
+
/*
|
|
117
|
+
VALUE
|
|
118
|
+
ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
|
|
119
|
+
ruby_xml_html_parser *rxp;
|
|
120
|
+
rx_io_data *data;
|
|
121
|
+
|
|
122
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
|
123
|
+
|
|
124
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
|
|
125
|
+
rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
|
|
126
|
+
rxp->data == NULL)
|
|
127
|
+
return(Qnil);
|
|
128
|
+
|
|
129
|
+
data = (rx_io_data *)rxp->data;
|
|
130
|
+
|
|
131
|
+
return(data->io);
|
|
132
|
+
}
|
|
133
|
+
*/
|
|
134
|
+
|
|
135
|
+
/*
|
|
136
|
+
* call-seq:
|
|
137
|
+
* parser.io = IO
|
|
138
|
+
*
|
|
139
|
+
* Set the IO instance this parser works with.
|
|
140
|
+
*/
|
|
141
|
+
/*
|
|
142
|
+
VALUE
|
|
143
|
+
ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
|
|
144
|
+
ruby_xml_html_parser *rxp;
|
|
145
|
+
ruby_xml_parser_context *rxpc;
|
|
146
|
+
rx_io_data *data;
|
|
147
|
+
OpenFile *fptr;
|
|
148
|
+
FILE *f;
|
|
149
|
+
|
|
150
|
+
if (!rb_obj_is_kind_of(io, rb_cIO))
|
|
151
|
+
rb_raise(rb_eTypeError, "need an IO object");
|
|
152
|
+
|
|
153
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
|
154
|
+
|
|
155
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
|
156
|
+
if (rxp->data != NULL)
|
|
157
|
+
rb_fatal("crap, this should be null");
|
|
158
|
+
|
|
159
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
|
160
|
+
data = ALLOC(rx_io_data);
|
|
161
|
+
rxp->data = data;
|
|
162
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
|
|
163
|
+
return(Qnil);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
rxp->ctxt = ruby_xml_parser_context_new3();
|
|
167
|
+
data = (rx_io_data *)rxp->data;
|
|
168
|
+
data->io = io;
|
|
169
|
+
|
|
170
|
+
GetOpenFile(io, fptr);
|
|
171
|
+
rb_io_check_readable(fptr);
|
|
172
|
+
f = GetWriteFile(fptr);
|
|
173
|
+
|
|
174
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
|
175
|
+
rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
|
|
176
|
+
(xmlInputReadCallback) ctxtRead,
|
|
177
|
+
NULL, f, XML_CHAR_ENCODING_NONE);
|
|
178
|
+
if (NIL_P(rxpc->ctxt))
|
|
179
|
+
rb_sys_fail(0);
|
|
180
|
+
|
|
181
|
+
return(data->io);
|
|
182
|
+
}
|
|
183
|
+
*/
|
|
184
|
+
|
|
185
|
+
void
|
|
186
|
+
ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
|
|
187
|
+
if (rxp == NULL) return;
|
|
188
|
+
if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
|
|
189
|
+
|
|
190
|
+
switch(rxp->data_type) {
|
|
191
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
|
192
|
+
break;
|
|
193
|
+
case RUBY_LIBXML_SRC_TYPE_FILE:
|
|
194
|
+
if (!NIL_P(((rx_file_data *)rxp->data)->filename))
|
|
195
|
+
rb_gc_mark(((rx_file_data *)rxp->data)->filename);
|
|
196
|
+
break;
|
|
197
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
|
198
|
+
if (!NIL_P(((rx_string_data *)rxp->data)->str))
|
|
199
|
+
rb_gc_mark(((rx_string_data *)rxp->data)->str);
|
|
200
|
+
break;
|
|
201
|
+
case RUBY_LIBXML_SRC_TYPE_IO:
|
|
202
|
+
if (!NIL_P(((rx_io_data *)rxp->data)->io))
|
|
203
|
+
rb_gc_mark(((rx_io_data *)rxp->data)->io);
|
|
204
|
+
break;
|
|
205
|
+
default:
|
|
206
|
+
rb_fatal("unknown datatype: %d", rxp->data_type);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
/*
|
|
212
|
+
* call-seq:
|
|
213
|
+
* XML::HTMLParser.new => parser
|
|
214
|
+
*
|
|
215
|
+
* Create a new parser instance with no pre-determined source.
|
|
216
|
+
*/
|
|
217
|
+
VALUE
|
|
218
|
+
ruby_xml_html_parser_new(VALUE class) {
|
|
219
|
+
ruby_xml_html_parser *rxp;
|
|
220
|
+
|
|
221
|
+
ruby_xml_parser_count++;
|
|
222
|
+
rxp = ALLOC(ruby_xml_html_parser);
|
|
223
|
+
rxp->ctxt = Qnil;
|
|
224
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
|
|
225
|
+
rxp->data = NULL;
|
|
226
|
+
rxp->parsed = 0;
|
|
227
|
+
|
|
228
|
+
return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
|
|
229
|
+
ruby_xml_html_parser_free, rxp));
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
/*
|
|
234
|
+
* call-seq:
|
|
235
|
+
* XML::HTMLParser.file => parser
|
|
236
|
+
*
|
|
237
|
+
* Create a new parser instance that will read the specified file.
|
|
238
|
+
*/
|
|
239
|
+
/*
|
|
240
|
+
VALUE
|
|
241
|
+
ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
|
|
242
|
+
VALUE obj;
|
|
243
|
+
ruby_xml_html_parser *rxp;
|
|
244
|
+
rx_file_data *data;
|
|
245
|
+
|
|
246
|
+
obj = ruby_xml_html_parser_new(class);
|
|
247
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
|
248
|
+
|
|
249
|
+
data = ALLOC(rx_file_data);
|
|
250
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
|
251
|
+
rxp->data = data;
|
|
252
|
+
|
|
253
|
+
ruby_xml_html_parser_filename_set(obj, filename);
|
|
254
|
+
|
|
255
|
+
return(obj);
|
|
256
|
+
}
|
|
257
|
+
*/
|
|
258
|
+
|
|
259
|
+
/*
|
|
260
|
+
* call-seq:
|
|
261
|
+
* XML::HTMLParser.io => parser
|
|
262
|
+
*
|
|
263
|
+
* Create a new parser instance that will read from the
|
|
264
|
+
* specified IO object.
|
|
265
|
+
*/
|
|
266
|
+
/*
|
|
267
|
+
VALUE
|
|
268
|
+
ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
|
|
269
|
+
VALUE obj;
|
|
270
|
+
ruby_xml_html_parser *rxp;
|
|
271
|
+
rx_io_data *data;
|
|
272
|
+
|
|
273
|
+
obj = ruby_xml_html_parser_new(class);
|
|
274
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
|
275
|
+
|
|
276
|
+
data = ALLOC(rx_io_data);
|
|
277
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
|
278
|
+
rxp->data = data;
|
|
279
|
+
|
|
280
|
+
ruby_xml_html_parser_io_set(obj, io);
|
|
281
|
+
|
|
282
|
+
return(obj);
|
|
283
|
+
}
|
|
284
|
+
*/
|
|
285
|
+
|
|
286
|
+
/*
|
|
287
|
+
* call-seq:
|
|
288
|
+
* XML::HTMLParser.string => parser
|
|
289
|
+
*
|
|
290
|
+
* Create a new parser instance that will parse the given
|
|
291
|
+
* string.
|
|
292
|
+
*/
|
|
293
|
+
VALUE
|
|
294
|
+
ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
|
|
295
|
+
VALUE obj;
|
|
296
|
+
ruby_xml_html_parser *rxp;
|
|
297
|
+
rx_string_data *data;
|
|
298
|
+
|
|
299
|
+
obj = ruby_xml_html_parser_new(class);
|
|
300
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
|
301
|
+
|
|
302
|
+
data = ALLOC(rx_string_data);
|
|
303
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
|
304
|
+
rxp->data = data;
|
|
305
|
+
|
|
306
|
+
ruby_xml_html_parser_str_set(obj, str);
|
|
307
|
+
|
|
308
|
+
return(obj);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
/*
|
|
313
|
+
* call-seq:
|
|
314
|
+
* parser.parse => document
|
|
315
|
+
*
|
|
316
|
+
* Parse the input XML and create an XML::Document with
|
|
317
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
|
318
|
+
* is thrown.
|
|
319
|
+
*/
|
|
320
|
+
VALUE
|
|
321
|
+
ruby_xml_html_parser_parse(VALUE self) {
|
|
322
|
+
ruby_xml_document_t *rxd;
|
|
323
|
+
ruby_xml_html_parser *rxp;
|
|
324
|
+
ruby_xml_parser_context *rxpc;
|
|
325
|
+
htmlDocPtr xdp;
|
|
326
|
+
VALUE doc;
|
|
327
|
+
|
|
328
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
|
329
|
+
|
|
330
|
+
switch (rxp->data_type) {
|
|
331
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
|
332
|
+
return(Qnil);
|
|
333
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
|
334
|
+
//case RUBY_LIBXML_SRC_TYPE_FILE:
|
|
335
|
+
//case RUBY_LIBXML_SRC_TYPE_IO:
|
|
336
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
|
337
|
+
|
|
338
|
+
/* don't check return values here, the HTML parser returns errors
|
|
339
|
+
* but still allows the resulting tree to be used.
|
|
340
|
+
*/
|
|
341
|
+
htmlParseDocument(rxpc->ctxt);
|
|
342
|
+
xdp = rxpc->ctxt->myDoc;
|
|
343
|
+
rxp->parsed = 1;
|
|
344
|
+
|
|
345
|
+
doc = ruby_xml_document_wrap(cXMLDocument, xdp);
|
|
346
|
+
break;
|
|
347
|
+
default:
|
|
348
|
+
rb_fatal("Unknown data type, %d", rxp->data_type);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return(doc);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
/*
|
|
356
|
+
* call-seq:
|
|
357
|
+
* parser.context => context
|
|
358
|
+
*
|
|
359
|
+
* Obtain the XML::Parser::Context associated with this
|
|
360
|
+
* parser.
|
|
361
|
+
*/
|
|
362
|
+
VALUE
|
|
363
|
+
ruby_xml_html_parser_context_get(VALUE self) {
|
|
364
|
+
ruby_xml_html_parser *rxp;
|
|
365
|
+
|
|
366
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
|
367
|
+
if (rxp->ctxt == Qnil)
|
|
368
|
+
return(Qnil);
|
|
369
|
+
else
|
|
370
|
+
return(rxp->ctxt);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
/*
|
|
375
|
+
* call-seq:
|
|
376
|
+
* parser.string => "string"
|
|
377
|
+
*
|
|
378
|
+
* Obtain the string this parser works with.
|
|
379
|
+
*/
|
|
380
|
+
VALUE
|
|
381
|
+
ruby_xml_html_parser_str_get(VALUE self) {
|
|
382
|
+
ruby_xml_html_parser *rxp;
|
|
383
|
+
rx_string_data *data;
|
|
384
|
+
|
|
385
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
|
386
|
+
if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
|
|
387
|
+
return(Qnil);
|
|
388
|
+
|
|
389
|
+
data = (rx_string_data *)rxp->data;
|
|
390
|
+
return(data->str);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
/*
|
|
395
|
+
* call-seq:
|
|
396
|
+
* parser.string = "string"
|
|
397
|
+
*
|
|
398
|
+
* Set the string this parser works with.
|
|
399
|
+
*/
|
|
400
|
+
VALUE
|
|
401
|
+
ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
|
|
402
|
+
ruby_xml_html_parser *rxp;
|
|
403
|
+
ruby_xml_parser_context *rxpc;
|
|
404
|
+
rx_string_data *data;
|
|
405
|
+
|
|
406
|
+
Check_Type(str, T_STRING);
|
|
407
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
|
408
|
+
|
|
409
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
|
410
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
|
411
|
+
data = ALLOC(rx_string_data);
|
|
412
|
+
rxp->data = data;
|
|
413
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
|
|
414
|
+
return(Qnil);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
rxp->ctxt = ruby_xml_parser_context_new3();
|
|
418
|
+
data = (rx_string_data *)rxp->data;
|
|
419
|
+
data->str = str;
|
|
420
|
+
|
|
421
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
|
422
|
+
rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
|
|
423
|
+
|
|
424
|
+
return(data->str);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
// Rdoc needs to know
|
|
429
|
+
#ifdef RDOC_NEVER_DEFINED
|
|
430
|
+
mXML = rb_define_module("XML");
|
|
431
|
+
#endif
|
|
432
|
+
|
|
433
|
+
void
|
|
434
|
+
ruby_init_html_parser(void) {
|
|
435
|
+
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
|
436
|
+
|
|
437
|
+
/*
|
|
438
|
+
rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
|
|
439
|
+
rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
|
|
440
|
+
*/
|
|
441
|
+
rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
|
|
442
|
+
rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
|
|
443
|
+
/*
|
|
444
|
+
rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
|
|
445
|
+
rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
|
|
446
|
+
rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
|
|
447
|
+
rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
|
|
448
|
+
*/
|
|
449
|
+
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
|
450
|
+
rb_define_method(cXMLHTMLParser, "parser_context", ruby_xml_html_parser_context_get, 0);
|
|
451
|
+
rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
|
|
452
|
+
rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
|
|
453
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/* $Id: ruby_xml_html_parser.h 111 2006-11-20 01:39:14Z roscopeco $ */
|
|
2
|
+
|
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
|
4
|
+
|
|
5
|
+
#ifndef __RUBY_XML_HTML_PARSER__
|
|
6
|
+
#define __RUBY_XML_HTML_PARSER__
|
|
7
|
+
|
|
8
|
+
extern int ruby_xml_html_parser_count;
|
|
9
|
+
extern VALUE cXMLHTMLParser;
|
|
10
|
+
|
|
11
|
+
typedef struct ruby_xml_html_parser {
|
|
12
|
+
VALUE ctxt;
|
|
13
|
+
int parsed;
|
|
14
|
+
void *data;
|
|
15
|
+
int data_type;
|
|
16
|
+
} ruby_xml_html_parser;
|
|
17
|
+
|
|
18
|
+
/*
|
|
19
|
+
* VALUE ruby_xml_html_parser_filename_get(VALUE self);
|
|
20
|
+
VALUE ruby_xml_html_parser_filename_set(VALUE self, VALUE filename);
|
|
21
|
+
VALUE ruby_xml_html_parser_new(VALUE class);
|
|
22
|
+
*/
|
|
23
|
+
VALUE ruby_xml_html_parser_parse(VALUE self);
|
|
24
|
+
VALUE ruby_xml_html_parser_str_get(VALUE self);
|
|
25
|
+
VALUE ruby_xml_html_parser_str_set(VALUE self, VALUE str);
|
|
26
|
+
|
|
27
|
+
void ruby_init_html_parser(void);
|
|
28
|
+
|
|
29
|
+
#endif
|