libxml-ruby 0.8.1 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +21 -0
- data/RAKEFILE +213 -0
- data/README +32 -11
- data/doc/rdoc/classes/LibXML.html +4 -0
- data/doc/rdoc/classes/LibXML/XML.html +7 -0
- data/doc/rdoc/classes/LibXML/XML/Attr.html +187 -177
- data/doc/rdoc/classes/LibXML/XML/Attributes.html +62 -61
- data/doc/rdoc/classes/LibXML/XML/Document.html +318 -254
- data/doc/rdoc/classes/LibXML/XML/Dtd.html +62 -8
- data/doc/rdoc/classes/LibXML/XML/Error.html +6 -0
- data/doc/rdoc/classes/LibXML/XML/HTMLParser.html +59 -41
- data/doc/rdoc/classes/LibXML/XML/InputCallbacks.html +12 -6
- data/doc/rdoc/classes/LibXML/XML/NS.html +65 -50
- data/doc/rdoc/classes/LibXML/XML/Node.html +671 -630
- data/doc/rdoc/classes/LibXML/XML/Node/FailedModify.html +12 -0
- data/doc/rdoc/classes/LibXML/XML/Node/Set.html +64 -60
- data/doc/rdoc/classes/LibXML/XML/Node/SetNamespace.html +12 -0
- data/doc/rdoc/classes/LibXML/XML/Node/UnknownType.html +12 -0
- data/doc/rdoc/classes/LibXML/XML/Parser.html +407 -383
- data/doc/rdoc/classes/LibXML/XML/Parser/Context.html +241 -219
- data/doc/rdoc/classes/LibXML/XML/Parser/ParseError.html +12 -0
- data/doc/rdoc/classes/LibXML/XML/Reader.html +421 -331
- data/doc/rdoc/classes/LibXML/XML/RelaxNG.html +237 -0
- data/doc/rdoc/classes/LibXML/XML/SaxParser.html +79 -60
- data/doc/rdoc/classes/LibXML/XML/Schema.html +109 -22
- data/doc/rdoc/classes/LibXML/XML/State.html +6 -0
- data/doc/rdoc/classes/LibXML/XML/XInclude.html +6 -0
- data/doc/rdoc/classes/LibXML/XML/XInclude/Error.html +6 -0
- data/doc/rdoc/classes/LibXML/XML/XPath.html +4 -48
- data/doc/rdoc/classes/LibXML/XML/XPath/Context.html +6 -5
- data/doc/rdoc/classes/LibXML/XML/XPath/InvalidPath.html +61 -0
- data/doc/rdoc/classes/LibXML/XML/XPath/Object.html +12 -8
- data/doc/rdoc/classes/LibXML/XML/XPointer.html +14 -8
- data/doc/rdoc/classes/LibXML/XML/XPointer/Context.html +6 -0
- data/doc/rdoc/classes/LibXML/XML/XPointer/Context/InvalidPath.html +6 -0
- data/doc/rdoc/classes/LibXML/XML/XPointer/InvalidExpression.html +13 -0
- data/doc/rdoc/created.rid +1 -1
- data/doc/rdoc/files/CHANGES.html +38 -10
- data/doc/rdoc/files/LICENSE.html +1 -1
- data/doc/rdoc/files/README.html +57 -23
- data/doc/rdoc/files/VERSION.html +1 -1
- data/doc/rdoc/files/ext/libxml/cbg_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/libxml_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_attr_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_attributes_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_document_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_dtd_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_input_cbg_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_node_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_node_set_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_ns_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_parser_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_parser_context_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_reader_c.html +1 -1
- data/doc/rdoc/{classes/XML.html → files/ext/libxml/ruby_xml_relaxng_c.html} +16 -35
- data/doc/rdoc/files/ext/libxml/ruby_xml_sax_parser_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_schema_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_state_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_xinclude_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_context_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_object_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_c.html +1 -1
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_context_c.html +1 -1
- data/doc/rdoc/files/lib/libxml/attr_rb.html +1 -1
- data/doc/rdoc/files/lib/libxml/attributes_rb.html +1 -1
- data/doc/rdoc/files/lib/libxml/document_rb.html +1 -1
- data/doc/rdoc/files/lib/libxml/node_rb.html +1 -1
- data/doc/rdoc/files/lib/libxml/node_set_rb.html +1 -1
- data/doc/rdoc/files/lib/libxml/parser_options_rb.html +1 -1
- data/doc/rdoc/files/lib/libxml/parser_rb.html +1 -1
- data/doc/rdoc/files/lib/libxml/properties_rb.html +1 -1
- data/doc/rdoc/files/lib/libxml/tree_rb.html +1 -1
- data/doc/rdoc/files/lib/libxml_rb.html +2 -2
- data/doc/rdoc/files/lib/xml/libxml_rb.html +12 -2
- data/doc/rdoc/files/lib/xml_rb.html +134 -0
- data/doc/rdoc/fr_class_index.html +1 -1
- data/doc/rdoc/fr_file_index.html +2 -0
- data/doc/rdoc/fr_method_index.html +349 -344
- data/ext/libxml/libxml.c +2 -1
- data/ext/libxml/ruby_libxml.h +1 -0
- data/ext/libxml/ruby_xml_attr.c +8 -2
- data/ext/libxml/ruby_xml_attributes.c +5 -5
- data/ext/libxml/ruby_xml_document.c +41 -2
- data/ext/libxml/ruby_xml_dtd.c +169 -169
- data/ext/libxml/ruby_xml_html_parser.c +449 -449
- data/ext/libxml/ruby_xml_input_cbg.c +165 -165
- data/ext/libxml/ruby_xml_node.c +1 -1
- data/ext/libxml/ruby_xml_node_set.c +172 -172
- data/ext/libxml/ruby_xml_ns.c +145 -145
- data/ext/libxml/ruby_xml_parser.c +1398 -1398
- data/ext/libxml/ruby_xml_parser_context.c +697 -697
- data/ext/libxml/ruby_xml_reader.c +44 -0
- data/ext/libxml/ruby_xml_reader.h +14 -14
- data/ext/libxml/ruby_xml_relaxng.c +97 -0
- data/ext/libxml/ruby_xml_relaxng.h +15 -0
- data/ext/libxml/ruby_xml_sax_parser.c +486 -486
- data/ext/libxml/ruby_xml_schema.c +1 -1
- data/ext/libxml/ruby_xml_state.c +4 -4
- data/ext/libxml/ruby_xml_xinclude.c +21 -21
- data/ext/libxml/ruby_xml_xpath.c +89 -89
- data/ext/libxml/ruby_xml_xpath_context.c +283 -283
- data/ext/libxml/ruby_xml_xpath_object.c +5 -5
- data/ext/libxml/ruby_xml_xpointer.c +104 -104
- data/ext/libxml/ruby_xml_xpointer_context.c +22 -22
- data/ext/libxml/version.h +2 -2
- data/ext/libxml/version.h.rej +17 -0
- data/ext/mingw/Rakefile +41 -0
- data/lib/libxml.rb +15 -4
- data/lib/xml.rb +14 -0
- data/lib/xml/libxml.rb +4 -1
- data/test/etc_doc_to_s.rb +1 -1
- data/test/ets_copy_bug.rb +1 -1
- data/test/ets_copy_bug3.rb +1 -1
- data/test/ets_doc_file.rb +1 -1
- data/test/ets_doc_to_s.rb +1 -1
- data/test/ets_node_gc.rb +1 -1
- data/test/ets_tsr.rb +1 -1
- data/test/model/shiporder.rnc +28 -0
- data/test/model/shiporder.rng +86 -0
- data/test/{tc_xml_attributes.rb → tc_attributes.rb} +1 -10
- data/test/{tc_xml_document.rb → tc_document.rb} +1 -1
- data/test/{tc_xml_document_write.rb → tc_document_write.rb} +1 -1
- data/test/{tc_xml_dtd.rb → tc_dtd.rb} +1 -1
- data/test/{tc_xml_html_parser.rb → tc_html_parser.rb} +1 -1
- data/test/{tc_xml_node.rb → tc_node.rb} +1 -1
- data/test/{tc_xml_node_attr.rb → tc_node_attr.rb} +1 -1
- data/test/{tc_xml_node_cdata.rb → tc_node_cdata.rb} +24 -5
- data/test/{tc_xml_node_comment.rb → tc_node_comment.rb} +1 -1
- data/test/{tc_xml_node_copy.rb → tc_node_copy.rb} +1 -1
- data/test/{tc_xml_node_edit.rb → tc_node_edit.rb} +1 -1
- data/test/{tc_xml_node_set.rb → tc_node_set.rb} +1 -1
- data/test/{tc_xml_node_set2.rb → tc_node_set2.rb} +1 -1
- data/test/{tc_xml_node_text.rb → tc_node_text.rb} +1 -1
- data/test/{tc_xml_node_xlink.rb → tc_node_xlink.rb} +2 -2
- data/test/{tc_xml_parser.rb → tc_parser.rb} +1 -1
- data/test/{tc_xml_parser_context.rb → tc_parser_context.rb} +1 -1
- data/test/{tc_xml_properties.rb → tc_properties.rb} +1 -7
- data/test/{tc_xml_reader.rb → tc_reader.rb} +1 -1
- data/test/tc_relaxng.rb +39 -0
- data/test/{tc_xml_sax_parser.rb → tc_sax_parser.rb} +1 -1
- data/test/{tc_xml_schema.rb → tc_schema.rb} +1 -1
- data/test/{tc_xml_traversal.rb → tc_traversal.rb} +1 -1
- data/test/{tc_xml_xinclude.rb → tc_xinclude.rb} +1 -1
- data/test/{tc_xml_xpath.rb → tc_xpath.rb} +1 -1
- data/test/{tc_xml_xpath_context.rb → tc_xpath_context.rb} +1 -1
- data/test/{tc_xml_xpointer.rb → tc_xpointer.rb} +1 -1
- data/test/test_suite.rb +26 -25
- metadata +100 -100
- data/benchmark/depixelate.rb +0 -632
- data/benchmark/hamlet.xml +0 -9055
- data/benchmark/sock_entries.xml +0 -507
- data/benchmark/throughput.rb +0 -39
- data/benchmark/xml_benchmarks.rb +0 -227
- data/mingw/libiconv-2.dll +0 -0
- data/mingw/libxml2-2.dll +0 -0
- data/mingw/libxml_ruby.so +0 -0
- data/test/gc.log +0 -0
- data/vc/libxml_ruby.sln +0 -20
- data/vc/libxml_ruby.vcproj +0 -389
@@ -1,449 +1,449 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c 461 2008-07-15 21:35:56Z cfis $ */
|
2
|
-
|
3
|
-
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
-
|
5
|
-
#include "ruby_libxml.h"
|
6
|
-
|
7
|
-
VALUE cXMLHTMLParser;
|
8
|
-
|
9
|
-
//static int
|
10
|
-
//ctxtRead(FILE *f, char * buf, int len) {
|
11
|
-
// return(fread(buf, 1, len, f));
|
12
|
-
//}
|
13
|
-
|
14
|
-
|
15
|
-
/*
|
16
|
-
* call-seq:
|
17
|
-
* parser.filename -> "filename"
|
18
|
-
*
|
19
|
-
* Obtain the filename this parser will read from.
|
20
|
-
*/
|
21
|
-
/*
|
22
|
-
VALUE
|
23
|
-
ruby_xml_html_parser_filename_get(VALUE self) {
|
24
|
-
ruby_xml_html_parser *rxp;
|
25
|
-
rx_file_data *data;
|
26
|
-
|
27
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
28
|
-
if (rxp->data == NULL)
|
29
|
-
return(Qnil);
|
30
|
-
|
31
|
-
if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
|
32
|
-
return(Qnil);
|
33
|
-
|
34
|
-
data = (rx_file_data *)rxp->data;
|
35
|
-
return(data->filename);
|
36
|
-
}
|
37
|
-
*/
|
38
|
-
|
39
|
-
/*
|
40
|
-
* call-seq:
|
41
|
-
* parser.filename = "filename"
|
42
|
-
*
|
43
|
-
* Set the filename this parser will read from.
|
44
|
-
*/
|
45
|
-
/*
|
46
|
-
VALUE
|
47
|
-
ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
|
48
|
-
ruby_xml_html_parser *rxp;
|
49
|
-
ruby_xml_parser_context *rxpc;
|
50
|
-
rx_file_data *data;
|
51
|
-
|
52
|
-
Check_Type(filename, T_STRING);
|
53
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
54
|
-
|
55
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
56
|
-
if (rxp->data != NULL)
|
57
|
-
rb_fatal("crap, this should be null");
|
58
|
-
|
59
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
60
|
-
data = ALLOC(rx_file_data);
|
61
|
-
rxp->data = data;
|
62
|
-
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
|
63
|
-
return(Qnil);
|
64
|
-
}
|
65
|
-
|
66
|
-
rxp->ctxt = ruby_xml_parser_context_new();
|
67
|
-
data = (rx_file_data *)rxp->data;
|
68
|
-
data->filename = filename;
|
69
|
-
|
70
|
-
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
71
|
-
rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
|
72
|
-
if (rxpc->ctxt == NULL)
|
73
|
-
rb_sys_fail(StringValuePtr(filename));
|
74
|
-
|
75
|
-
return(data->filename);
|
76
|
-
}
|
77
|
-
*/
|
78
|
-
|
79
|
-
void
|
80
|
-
ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
|
81
|
-
void *data;
|
82
|
-
|
83
|
-
switch(rxp->data_type) {
|
84
|
-
case RUBY_LIBXML_SRC_TYPE_NULL:
|
85
|
-
break;
|
86
|
-
case RUBY_LIBXML_SRC_TYPE_FILE:
|
87
|
-
data = (void *)(rx_file_data *)rxp->data;
|
88
|
-
ruby_xfree((rx_file_data *)data);
|
89
|
-
break;
|
90
|
-
case RUBY_LIBXML_SRC_TYPE_STRING:
|
91
|
-
data = (void *)(rx_string_data *)rxp->data;
|
92
|
-
ruby_xfree((rx_string_data *)data);
|
93
|
-
break;
|
94
|
-
case RUBY_LIBXML_SRC_TYPE_IO:
|
95
|
-
data = (void *)(rx_io_data *)rxp->data;
|
96
|
-
ruby_xfree((rx_io_data *)data);
|
97
|
-
break;
|
98
|
-
default:
|
99
|
-
rb_fatal("Unknown data type, %d", rxp->data_type);
|
100
|
-
}
|
101
|
-
|
102
|
-
ruby_xfree(rxp);
|
103
|
-
}
|
104
|
-
|
105
|
-
|
106
|
-
/*
|
107
|
-
* call-seq:
|
108
|
-
* parser.io -> IO
|
109
|
-
*
|
110
|
-
* Obtain the IO instance this parser works with.
|
111
|
-
*/
|
112
|
-
/*
|
113
|
-
VALUE
|
114
|
-
ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
|
115
|
-
ruby_xml_html_parser *rxp;
|
116
|
-
rx_io_data *data;
|
117
|
-
|
118
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
119
|
-
|
120
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
|
121
|
-
rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
|
122
|
-
rxp->data == NULL)
|
123
|
-
return(Qnil);
|
124
|
-
|
125
|
-
data = (rx_io_data *)rxp->data;
|
126
|
-
|
127
|
-
return(data->io);
|
128
|
-
}
|
129
|
-
*/
|
130
|
-
|
131
|
-
/*
|
132
|
-
* call-seq:
|
133
|
-
* parser.io = IO
|
134
|
-
*
|
135
|
-
* Set the IO instance this parser works with.
|
136
|
-
*/
|
137
|
-
/*
|
138
|
-
VALUE
|
139
|
-
ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
|
140
|
-
ruby_xml_html_parser *rxp;
|
141
|
-
ruby_xml_parser_context *rxpc;
|
142
|
-
rx_io_data *data;
|
143
|
-
OpenFile *fptr;
|
144
|
-
FILE *f;
|
145
|
-
|
146
|
-
if (!rb_obj_is_kind_of(io, rb_cIO))
|
147
|
-
rb_raise(rb_eTypeError, "need an IO object");
|
148
|
-
|
149
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
150
|
-
|
151
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
152
|
-
if (rxp->data != NULL)
|
153
|
-
rb_fatal("crap, this should be null");
|
154
|
-
|
155
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
156
|
-
data = ALLOC(rx_io_data);
|
157
|
-
rxp->data = data;
|
158
|
-
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
|
159
|
-
return(Qnil);
|
160
|
-
}
|
161
|
-
|
162
|
-
rxp->ctxt = ruby_xml_parser_context_new();
|
163
|
-
data = (rx_io_data *)rxp->data;
|
164
|
-
data->io = io;
|
165
|
-
|
166
|
-
GetOpenFile(io, fptr);
|
167
|
-
rb_io_check_readable(fptr);
|
168
|
-
f = GetWriteFile(fptr);
|
169
|
-
|
170
|
-
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
171
|
-
rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
|
172
|
-
(xmlInputReadCallback) ctxtRead,
|
173
|
-
NULL, f, XML_CHAR_ENCODING_NONE);
|
174
|
-
if (NIL_P(rxpc->ctxt))
|
175
|
-
rb_sys_fail(0);
|
176
|
-
|
177
|
-
return(data->io);
|
178
|
-
}
|
179
|
-
*/
|
180
|
-
|
181
|
-
void
|
182
|
-
ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
|
183
|
-
if (rxp == NULL) return;
|
184
|
-
if (!NIL_P(rxp->ctxt))
|
185
|
-
rb_gc_mark(rxp->ctxt);
|
186
|
-
|
187
|
-
rb_gc_mark(LIBXML_STATE);
|
188
|
-
|
189
|
-
switch(rxp->data_type) {
|
190
|
-
case RUBY_LIBXML_SRC_TYPE_NULL:
|
191
|
-
break;
|
192
|
-
case RUBY_LIBXML_SRC_TYPE_FILE:
|
193
|
-
if (!NIL_P(((rx_file_data *)rxp->data)->filename))
|
194
|
-
rb_gc_mark(((rx_file_data *)rxp->data)->filename);
|
195
|
-
break;
|
196
|
-
case RUBY_LIBXML_SRC_TYPE_STRING:
|
197
|
-
if (!NIL_P(((rx_string_data *)rxp->data)->str))
|
198
|
-
rb_gc_mark(((rx_string_data *)rxp->data)->str);
|
199
|
-
break;
|
200
|
-
case RUBY_LIBXML_SRC_TYPE_IO:
|
201
|
-
if (!NIL_P(((rx_io_data *)rxp->data)->io))
|
202
|
-
rb_gc_mark(((rx_io_data *)rxp->data)->io);
|
203
|
-
break;
|
204
|
-
default:
|
205
|
-
rb_fatal("unknown datatype: %d", rxp->data_type);
|
206
|
-
}
|
207
|
-
}
|
208
|
-
|
209
|
-
|
210
|
-
/*
|
211
|
-
* call-seq:
|
212
|
-
* XML::HTMLParser.new -> parser
|
213
|
-
*
|
214
|
-
* Create a new parser instance with no pre-determined source.
|
215
|
-
*/
|
216
|
-
VALUE
|
217
|
-
ruby_xml_html_parser_new(VALUE class) {
|
218
|
-
ruby_xml_html_parser *rxp;
|
219
|
-
|
220
|
-
rxp = ALLOC(ruby_xml_html_parser);
|
221
|
-
rxp->ctxt = Qnil;
|
222
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
|
223
|
-
rxp->data = NULL;
|
224
|
-
rxp->parsed = 0;
|
225
|
-
|
226
|
-
return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
|
227
|
-
ruby_xml_html_parser_free, rxp));
|
228
|
-
}
|
229
|
-
|
230
|
-
|
231
|
-
/*
|
232
|
-
* call-seq:
|
233
|
-
* XML::HTMLParser.file -> parser
|
234
|
-
*
|
235
|
-
* Create a new parser instance that will read the specified file.
|
236
|
-
*/
|
237
|
-
/*
|
238
|
-
VALUE
|
239
|
-
ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
|
240
|
-
VALUE obj;
|
241
|
-
ruby_xml_html_parser *rxp;
|
242
|
-
rx_file_data *data;
|
243
|
-
|
244
|
-
obj = ruby_xml_html_parser_new(class);
|
245
|
-
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
246
|
-
|
247
|
-
data = ALLOC(rx_file_data);
|
248
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
249
|
-
rxp->data = data;
|
250
|
-
|
251
|
-
ruby_xml_html_parser_filename_set(obj, filename);
|
252
|
-
|
253
|
-
return(obj);
|
254
|
-
}
|
255
|
-
*/
|
256
|
-
|
257
|
-
/*
|
258
|
-
* call-seq:
|
259
|
-
* XML::HTMLParser.io -> parser
|
260
|
-
*
|
261
|
-
* Create a new parser instance that will read from the
|
262
|
-
* specified IO object.
|
263
|
-
*/
|
264
|
-
/*
|
265
|
-
VALUE
|
266
|
-
ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
|
267
|
-
VALUE obj;
|
268
|
-
ruby_xml_html_parser *rxp;
|
269
|
-
rx_io_data *data;
|
270
|
-
|
271
|
-
obj = ruby_xml_html_parser_new(class);
|
272
|
-
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
273
|
-
|
274
|
-
data = ALLOC(rx_io_data);
|
275
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
276
|
-
rxp->data = data;
|
277
|
-
|
278
|
-
ruby_xml_html_parser_io_set(obj, io);
|
279
|
-
|
280
|
-
return(obj);
|
281
|
-
}
|
282
|
-
*/
|
283
|
-
|
284
|
-
/*
|
285
|
-
* call-seq:
|
286
|
-
* XML::HTMLParser.string -> parser
|
287
|
-
*
|
288
|
-
* Create a new parser instance that will parse the given
|
289
|
-
* string.
|
290
|
-
*/
|
291
|
-
VALUE
|
292
|
-
ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
|
293
|
-
VALUE obj;
|
294
|
-
ruby_xml_html_parser *rxp;
|
295
|
-
rx_string_data *data;
|
296
|
-
|
297
|
-
obj = ruby_xml_html_parser_new(class);
|
298
|
-
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
299
|
-
|
300
|
-
data = ALLOC(rx_string_data);
|
301
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
302
|
-
rxp->data = data;
|
303
|
-
|
304
|
-
ruby_xml_html_parser_str_set(obj, str);
|
305
|
-
|
306
|
-
return(obj);
|
307
|
-
}
|
308
|
-
|
309
|
-
|
310
|
-
/*
|
311
|
-
* call-seq:
|
312
|
-
* parser.parse -> document
|
313
|
-
*
|
314
|
-
* Parse the input XML and create an XML::Document with
|
315
|
-
* it's content. If an error occurs, XML::Parser::ParseError
|
316
|
-
* is thrown.
|
317
|
-
*/
|
318
|
-
VALUE
|
319
|
-
ruby_xml_html_parser_parse(VALUE self) {
|
320
|
-
ruby_xml_html_parser *rxp;
|
321
|
-
ruby_xml_parser_context *rxpc;
|
322
|
-
htmlDocPtr xdp;
|
323
|
-
VALUE doc;
|
324
|
-
|
325
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
326
|
-
|
327
|
-
switch (rxp->data_type) {
|
328
|
-
case RUBY_LIBXML_SRC_TYPE_NULL:
|
329
|
-
return(Qnil);
|
330
|
-
case RUBY_LIBXML_SRC_TYPE_STRING:
|
331
|
-
//case RUBY_LIBXML_SRC_TYPE_FILE:
|
332
|
-
//case RUBY_LIBXML_SRC_TYPE_IO:
|
333
|
-
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
334
|
-
|
335
|
-
/* don't check return values here, the HTML parser returns errors
|
336
|
-
* but still allows the resulting tree to be used.
|
337
|
-
*/
|
338
|
-
htmlParseDocument(rxpc->ctxt);
|
339
|
-
xdp = rxpc->ctxt->myDoc;
|
340
|
-
rxp->parsed = 1;
|
341
|
-
|
342
|
-
doc = ruby_xml_document_wrap(xdp);
|
343
|
-
break;
|
344
|
-
default:
|
345
|
-
rb_fatal("Unknown data type, %d", rxp->data_type);
|
346
|
-
}
|
347
|
-
|
348
|
-
return(doc);
|
349
|
-
}
|
350
|
-
|
351
|
-
|
352
|
-
/*
|
353
|
-
* call-seq:
|
354
|
-
* parser.context -> context
|
355
|
-
*
|
356
|
-
* Obtain the XML::Parser::Context associated with this
|
357
|
-
* parser.
|
358
|
-
*/
|
359
|
-
VALUE
|
360
|
-
ruby_xml_html_parser_context_get(VALUE self) {
|
361
|
-
ruby_xml_html_parser *rxp;
|
362
|
-
|
363
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
364
|
-
if (rxp->ctxt == Qnil)
|
365
|
-
return(Qnil);
|
366
|
-
else
|
367
|
-
return(rxp->ctxt);
|
368
|
-
}
|
369
|
-
|
370
|
-
|
371
|
-
/*
|
372
|
-
* call-seq:
|
373
|
-
* parser.string -> "string"
|
374
|
-
*
|
375
|
-
* Obtain the string this parser works with.
|
376
|
-
*/
|
377
|
-
VALUE
|
378
|
-
ruby_xml_html_parser_str_get(VALUE self) {
|
379
|
-
ruby_xml_html_parser *rxp;
|
380
|
-
rx_string_data *data;
|
381
|
-
|
382
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
383
|
-
if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
|
384
|
-
return(Qnil);
|
385
|
-
|
386
|
-
data = (rx_string_data *)rxp->data;
|
387
|
-
return(data->str);
|
388
|
-
}
|
389
|
-
|
390
|
-
|
391
|
-
/*
|
392
|
-
* call-seq:
|
393
|
-
* parser.string = "string"
|
394
|
-
*
|
395
|
-
* Set the string this parser works with.
|
396
|
-
*/
|
397
|
-
VALUE
|
398
|
-
ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
|
399
|
-
ruby_xml_html_parser *rxp;
|
400
|
-
ruby_xml_parser_context *rxpc;
|
401
|
-
rx_string_data *data;
|
402
|
-
|
403
|
-
Check_Type(str, T_STRING);
|
404
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
405
|
-
|
406
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
407
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
408
|
-
data = ALLOC(rx_string_data);
|
409
|
-
rxp->data = data;
|
410
|
-
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
|
411
|
-
return(Qnil);
|
412
|
-
}
|
413
|
-
|
414
|
-
rxp->ctxt = ruby_xml_parser_context_new();
|
415
|
-
data = (rx_string_data *)rxp->data;
|
416
|
-
data->str = str;
|
417
|
-
|
418
|
-
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
419
|
-
rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
|
420
|
-
|
421
|
-
return(data->str);
|
422
|
-
}
|
423
|
-
|
424
|
-
|
425
|
-
// Rdoc needs to know
|
426
|
-
#ifdef RDOC_NEVER_DEFINED
|
427
|
-
mLibXML = rb_define_module("LibXML");
|
428
|
-
mXML = rb_define_module_under(mLibXML, "XML");
|
429
|
-
#endif
|
430
|
-
|
431
|
-
void
|
432
|
-
ruby_init_html_parser(void) {
|
433
|
-
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
434
|
-
|
435
|
-
rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
|
436
|
-
rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
|
437
|
-
/*
|
438
|
-
rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
|
439
|
-
rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
|
440
|
-
rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
|
441
|
-
rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
|
442
|
-
rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
|
443
|
-
rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
|
444
|
-
*/
|
445
|
-
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
446
|
-
rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
|
447
|
-
rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
|
448
|
-
rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
|
449
|
-
}
|
1
|
+
/* $Id: ruby_xml_html_parser.c 461 2008-07-15 21:35:56Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include "ruby_libxml.h"
|
6
|
+
|
7
|
+
VALUE cXMLHTMLParser;
|
8
|
+
|
9
|
+
//static int
|
10
|
+
//ctxtRead(FILE *f, char * buf, int len) {
|
11
|
+
// return(fread(buf, 1, len, f));
|
12
|
+
//}
|
13
|
+
|
14
|
+
|
15
|
+
/*
|
16
|
+
* call-seq:
|
17
|
+
* parser.filename -> "filename"
|
18
|
+
*
|
19
|
+
* Obtain the filename this parser will read from.
|
20
|
+
*/
|
21
|
+
/*
|
22
|
+
VALUE
|
23
|
+
ruby_xml_html_parser_filename_get(VALUE self) {
|
24
|
+
ruby_xml_html_parser *rxp;
|
25
|
+
rx_file_data *data;
|
26
|
+
|
27
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
28
|
+
if (rxp->data == NULL)
|
29
|
+
return(Qnil);
|
30
|
+
|
31
|
+
if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
|
32
|
+
return(Qnil);
|
33
|
+
|
34
|
+
data = (rx_file_data *)rxp->data;
|
35
|
+
return(data->filename);
|
36
|
+
}
|
37
|
+
*/
|
38
|
+
|
39
|
+
/*
|
40
|
+
* call-seq:
|
41
|
+
* parser.filename = "filename"
|
42
|
+
*
|
43
|
+
* Set the filename this parser will read from.
|
44
|
+
*/
|
45
|
+
/*
|
46
|
+
VALUE
|
47
|
+
ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
|
48
|
+
ruby_xml_html_parser *rxp;
|
49
|
+
ruby_xml_parser_context *rxpc;
|
50
|
+
rx_file_data *data;
|
51
|
+
|
52
|
+
Check_Type(filename, T_STRING);
|
53
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
54
|
+
|
55
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
56
|
+
if (rxp->data != NULL)
|
57
|
+
rb_fatal("crap, this should be null");
|
58
|
+
|
59
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
60
|
+
data = ALLOC(rx_file_data);
|
61
|
+
rxp->data = data;
|
62
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
|
63
|
+
return(Qnil);
|
64
|
+
}
|
65
|
+
|
66
|
+
rxp->ctxt = ruby_xml_parser_context_new();
|
67
|
+
data = (rx_file_data *)rxp->data;
|
68
|
+
data->filename = filename;
|
69
|
+
|
70
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
71
|
+
rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
|
72
|
+
if (rxpc->ctxt == NULL)
|
73
|
+
rb_sys_fail(StringValuePtr(filename));
|
74
|
+
|
75
|
+
return(data->filename);
|
76
|
+
}
|
77
|
+
*/
|
78
|
+
|
79
|
+
void
|
80
|
+
ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
|
81
|
+
void *data;
|
82
|
+
|
83
|
+
switch(rxp->data_type) {
|
84
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
85
|
+
break;
|
86
|
+
case RUBY_LIBXML_SRC_TYPE_FILE:
|
87
|
+
data = (void *)(rx_file_data *)rxp->data;
|
88
|
+
ruby_xfree((rx_file_data *)data);
|
89
|
+
break;
|
90
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
91
|
+
data = (void *)(rx_string_data *)rxp->data;
|
92
|
+
ruby_xfree((rx_string_data *)data);
|
93
|
+
break;
|
94
|
+
case RUBY_LIBXML_SRC_TYPE_IO:
|
95
|
+
data = (void *)(rx_io_data *)rxp->data;
|
96
|
+
ruby_xfree((rx_io_data *)data);
|
97
|
+
break;
|
98
|
+
default:
|
99
|
+
rb_fatal("Unknown data type, %d", rxp->data_type);
|
100
|
+
}
|
101
|
+
|
102
|
+
ruby_xfree(rxp);
|
103
|
+
}
|
104
|
+
|
105
|
+
|
106
|
+
/*
|
107
|
+
* call-seq:
|
108
|
+
* parser.io -> IO
|
109
|
+
*
|
110
|
+
* Obtain the IO instance this parser works with.
|
111
|
+
*/
|
112
|
+
/*
|
113
|
+
VALUE
|
114
|
+
ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
|
115
|
+
ruby_xml_html_parser *rxp;
|
116
|
+
rx_io_data *data;
|
117
|
+
|
118
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
119
|
+
|
120
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
|
121
|
+
rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
|
122
|
+
rxp->data == NULL)
|
123
|
+
return(Qnil);
|
124
|
+
|
125
|
+
data = (rx_io_data *)rxp->data;
|
126
|
+
|
127
|
+
return(data->io);
|
128
|
+
}
|
129
|
+
*/
|
130
|
+
|
131
|
+
/*
|
132
|
+
* call-seq:
|
133
|
+
* parser.io = IO
|
134
|
+
*
|
135
|
+
* Set the IO instance this parser works with.
|
136
|
+
*/
|
137
|
+
/*
|
138
|
+
VALUE
|
139
|
+
ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
|
140
|
+
ruby_xml_html_parser *rxp;
|
141
|
+
ruby_xml_parser_context *rxpc;
|
142
|
+
rx_io_data *data;
|
143
|
+
OpenFile *fptr;
|
144
|
+
FILE *f;
|
145
|
+
|
146
|
+
if (!rb_obj_is_kind_of(io, rb_cIO))
|
147
|
+
rb_raise(rb_eTypeError, "need an IO object");
|
148
|
+
|
149
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
150
|
+
|
151
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
152
|
+
if (rxp->data != NULL)
|
153
|
+
rb_fatal("crap, this should be null");
|
154
|
+
|
155
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
156
|
+
data = ALLOC(rx_io_data);
|
157
|
+
rxp->data = data;
|
158
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
|
159
|
+
return(Qnil);
|
160
|
+
}
|
161
|
+
|
162
|
+
rxp->ctxt = ruby_xml_parser_context_new();
|
163
|
+
data = (rx_io_data *)rxp->data;
|
164
|
+
data->io = io;
|
165
|
+
|
166
|
+
GetOpenFile(io, fptr);
|
167
|
+
rb_io_check_readable(fptr);
|
168
|
+
f = GetWriteFile(fptr);
|
169
|
+
|
170
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
171
|
+
rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
|
172
|
+
(xmlInputReadCallback) ctxtRead,
|
173
|
+
NULL, f, XML_CHAR_ENCODING_NONE);
|
174
|
+
if (NIL_P(rxpc->ctxt))
|
175
|
+
rb_sys_fail(0);
|
176
|
+
|
177
|
+
return(data->io);
|
178
|
+
}
|
179
|
+
*/
|
180
|
+
|
181
|
+
void
|
182
|
+
ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
|
183
|
+
if (rxp == NULL) return;
|
184
|
+
if (!NIL_P(rxp->ctxt))
|
185
|
+
rb_gc_mark(rxp->ctxt);
|
186
|
+
|
187
|
+
rb_gc_mark(LIBXML_STATE);
|
188
|
+
|
189
|
+
switch(rxp->data_type) {
|
190
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
191
|
+
break;
|
192
|
+
case RUBY_LIBXML_SRC_TYPE_FILE:
|
193
|
+
if (!NIL_P(((rx_file_data *)rxp->data)->filename))
|
194
|
+
rb_gc_mark(((rx_file_data *)rxp->data)->filename);
|
195
|
+
break;
|
196
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
197
|
+
if (!NIL_P(((rx_string_data *)rxp->data)->str))
|
198
|
+
rb_gc_mark(((rx_string_data *)rxp->data)->str);
|
199
|
+
break;
|
200
|
+
case RUBY_LIBXML_SRC_TYPE_IO:
|
201
|
+
if (!NIL_P(((rx_io_data *)rxp->data)->io))
|
202
|
+
rb_gc_mark(((rx_io_data *)rxp->data)->io);
|
203
|
+
break;
|
204
|
+
default:
|
205
|
+
rb_fatal("unknown datatype: %d", rxp->data_type);
|
206
|
+
}
|
207
|
+
}
|
208
|
+
|
209
|
+
|
210
|
+
/*
|
211
|
+
* call-seq:
|
212
|
+
* XML::HTMLParser.new -> parser
|
213
|
+
*
|
214
|
+
* Create a new parser instance with no pre-determined source.
|
215
|
+
*/
|
216
|
+
VALUE
|
217
|
+
ruby_xml_html_parser_new(VALUE class) {
|
218
|
+
ruby_xml_html_parser *rxp;
|
219
|
+
|
220
|
+
rxp = ALLOC(ruby_xml_html_parser);
|
221
|
+
rxp->ctxt = Qnil;
|
222
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
|
223
|
+
rxp->data = NULL;
|
224
|
+
rxp->parsed = 0;
|
225
|
+
|
226
|
+
return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
|
227
|
+
ruby_xml_html_parser_free, rxp));
|
228
|
+
}
|
229
|
+
|
230
|
+
|
231
|
+
/*
|
232
|
+
* call-seq:
|
233
|
+
* XML::HTMLParser.file -> parser
|
234
|
+
*
|
235
|
+
* Create a new parser instance that will read the specified file.
|
236
|
+
*/
|
237
|
+
/*
|
238
|
+
VALUE
|
239
|
+
ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
|
240
|
+
VALUE obj;
|
241
|
+
ruby_xml_html_parser *rxp;
|
242
|
+
rx_file_data *data;
|
243
|
+
|
244
|
+
obj = ruby_xml_html_parser_new(class);
|
245
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
246
|
+
|
247
|
+
data = ALLOC(rx_file_data);
|
248
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
249
|
+
rxp->data = data;
|
250
|
+
|
251
|
+
ruby_xml_html_parser_filename_set(obj, filename);
|
252
|
+
|
253
|
+
return(obj);
|
254
|
+
}
|
255
|
+
*/
|
256
|
+
|
257
|
+
/*
|
258
|
+
* call-seq:
|
259
|
+
* XML::HTMLParser.io -> parser
|
260
|
+
*
|
261
|
+
* Create a new parser instance that will read from the
|
262
|
+
* specified IO object.
|
263
|
+
*/
|
264
|
+
/*
|
265
|
+
VALUE
|
266
|
+
ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
|
267
|
+
VALUE obj;
|
268
|
+
ruby_xml_html_parser *rxp;
|
269
|
+
rx_io_data *data;
|
270
|
+
|
271
|
+
obj = ruby_xml_html_parser_new(class);
|
272
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
273
|
+
|
274
|
+
data = ALLOC(rx_io_data);
|
275
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
276
|
+
rxp->data = data;
|
277
|
+
|
278
|
+
ruby_xml_html_parser_io_set(obj, io);
|
279
|
+
|
280
|
+
return(obj);
|
281
|
+
}
|
282
|
+
*/
|
283
|
+
|
284
|
+
/*
|
285
|
+
* call-seq:
|
286
|
+
* XML::HTMLParser.string -> parser
|
287
|
+
*
|
288
|
+
* Create a new parser instance that will parse the given
|
289
|
+
* string.
|
290
|
+
*/
|
291
|
+
VALUE
|
292
|
+
ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
|
293
|
+
VALUE obj;
|
294
|
+
ruby_xml_html_parser *rxp;
|
295
|
+
rx_string_data *data;
|
296
|
+
|
297
|
+
obj = ruby_xml_html_parser_new(class);
|
298
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
299
|
+
|
300
|
+
data = ALLOC(rx_string_data);
|
301
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
302
|
+
rxp->data = data;
|
303
|
+
|
304
|
+
ruby_xml_html_parser_str_set(obj, str);
|
305
|
+
|
306
|
+
return(obj);
|
307
|
+
}
|
308
|
+
|
309
|
+
|
310
|
+
/*
|
311
|
+
* call-seq:
|
312
|
+
* parser.parse -> document
|
313
|
+
*
|
314
|
+
* Parse the input XML and create an XML::Document with
|
315
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
316
|
+
* is thrown.
|
317
|
+
*/
|
318
|
+
VALUE
|
319
|
+
ruby_xml_html_parser_parse(VALUE self) {
|
320
|
+
ruby_xml_html_parser *rxp;
|
321
|
+
ruby_xml_parser_context *rxpc;
|
322
|
+
htmlDocPtr xdp;
|
323
|
+
VALUE doc;
|
324
|
+
|
325
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
326
|
+
|
327
|
+
switch (rxp->data_type) {
|
328
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
329
|
+
return(Qnil);
|
330
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
331
|
+
//case RUBY_LIBXML_SRC_TYPE_FILE:
|
332
|
+
//case RUBY_LIBXML_SRC_TYPE_IO:
|
333
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
334
|
+
|
335
|
+
/* don't check return values here, the HTML parser returns errors
|
336
|
+
* but still allows the resulting tree to be used.
|
337
|
+
*/
|
338
|
+
htmlParseDocument(rxpc->ctxt);
|
339
|
+
xdp = rxpc->ctxt->myDoc;
|
340
|
+
rxp->parsed = 1;
|
341
|
+
|
342
|
+
doc = ruby_xml_document_wrap(xdp);
|
343
|
+
break;
|
344
|
+
default:
|
345
|
+
rb_fatal("Unknown data type, %d", rxp->data_type);
|
346
|
+
}
|
347
|
+
|
348
|
+
return(doc);
|
349
|
+
}
|
350
|
+
|
351
|
+
|
352
|
+
/*
|
353
|
+
* call-seq:
|
354
|
+
* parser.context -> context
|
355
|
+
*
|
356
|
+
* Obtain the XML::Parser::Context associated with this
|
357
|
+
* parser.
|
358
|
+
*/
|
359
|
+
VALUE
|
360
|
+
ruby_xml_html_parser_context_get(VALUE self) {
|
361
|
+
ruby_xml_html_parser *rxp;
|
362
|
+
|
363
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
364
|
+
if (rxp->ctxt == Qnil)
|
365
|
+
return(Qnil);
|
366
|
+
else
|
367
|
+
return(rxp->ctxt);
|
368
|
+
}
|
369
|
+
|
370
|
+
|
371
|
+
/*
|
372
|
+
* call-seq:
|
373
|
+
* parser.string -> "string"
|
374
|
+
*
|
375
|
+
* Obtain the string this parser works with.
|
376
|
+
*/
|
377
|
+
VALUE
|
378
|
+
ruby_xml_html_parser_str_get(VALUE self) {
|
379
|
+
ruby_xml_html_parser *rxp;
|
380
|
+
rx_string_data *data;
|
381
|
+
|
382
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
383
|
+
if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
|
384
|
+
return(Qnil);
|
385
|
+
|
386
|
+
data = (rx_string_data *)rxp->data;
|
387
|
+
return(data->str);
|
388
|
+
}
|
389
|
+
|
390
|
+
|
391
|
+
/*
|
392
|
+
* call-seq:
|
393
|
+
* parser.string = "string"
|
394
|
+
*
|
395
|
+
* Set the string this parser works with.
|
396
|
+
*/
|
397
|
+
VALUE
|
398
|
+
ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
|
399
|
+
ruby_xml_html_parser *rxp;
|
400
|
+
ruby_xml_parser_context *rxpc;
|
401
|
+
rx_string_data *data;
|
402
|
+
|
403
|
+
Check_Type(str, T_STRING);
|
404
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
405
|
+
|
406
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
407
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
408
|
+
data = ALLOC(rx_string_data);
|
409
|
+
rxp->data = data;
|
410
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
|
411
|
+
return(Qnil);
|
412
|
+
}
|
413
|
+
|
414
|
+
rxp->ctxt = ruby_xml_parser_context_new();
|
415
|
+
data = (rx_string_data *)rxp->data;
|
416
|
+
data->str = str;
|
417
|
+
|
418
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
419
|
+
rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
|
420
|
+
|
421
|
+
return(data->str);
|
422
|
+
}
|
423
|
+
|
424
|
+
|
425
|
+
// Rdoc needs to know
|
426
|
+
#ifdef RDOC_NEVER_DEFINED
|
427
|
+
mLibXML = rb_define_module("LibXML");
|
428
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
429
|
+
#endif
|
430
|
+
|
431
|
+
void
|
432
|
+
ruby_init_html_parser(void) {
|
433
|
+
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
434
|
+
|
435
|
+
rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
|
436
|
+
rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
|
437
|
+
/*
|
438
|
+
rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
|
439
|
+
rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
|
440
|
+
rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
|
441
|
+
rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
|
442
|
+
rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
|
443
|
+
rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
|
444
|
+
*/
|
445
|
+
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
446
|
+
rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
|
447
|
+
rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
|
448
|
+
rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
|
449
|
+
}
|