libxml-ruby 0.9.9-x86-mswin32-60 → 1.0.0-x86-mswin32-60

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,175 +1,290 @@
1
- /* $Id: ruby_xml_parser_context.c 710 2009-01-20 05:30:51Z cfis $ */
2
-
3
- /* Please see the LICENSE file for copyright and distribution information */
4
-
5
- #include "ruby_libxml.h"
6
- #include "ruby_xml_html_parser_context.h"
7
-
8
- /*
9
- * Document-class: LibXML::XML::HTMLParser::Context
10
- *
11
- * The XML::HTMLParser::Context class provides in-depth control over how
12
- * a document is parsed.
13
- */
14
-
15
- VALUE cXMLHtmlParserContext;
16
- ID IO_ATTR;
17
-
18
- static void rxml_html_parser_context_free(xmlParserCtxtPtr ctxt)
19
- {
20
- xmlFreeParserCtxt(ctxt);
21
- }
22
-
23
- static VALUE rxml_html_parser_context_wrap(xmlParserCtxtPtr ctxt)
24
- {
25
- return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
26
- }
27
-
28
- /* call-seq:
29
- * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
30
- *
31
- * Creates a new parser context based on the specified file or uri.
32
- *
33
- * Parameters:
34
- *
35
- * file - A filename or uri.
36
- */
37
- static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
38
- {
39
- xmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
40
- if (!ctxt)
41
- rxml_raise(&xmlLastError);
42
-
43
- return rxml_html_parser_context_wrap(ctxt);
44
- }
45
-
46
- /* call-seq:
47
- * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
48
- *
49
- * Creates a new parser context based on the specified io object.
50
- *
51
- * Parameters:
52
- *
53
- * io - A ruby IO object.
54
- */
55
- static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
56
- {
57
- VALUE result;
58
- htmlParserCtxtPtr ctxt;
59
- xmlParserInputBufferPtr input;
60
- xmlParserInputPtr stream;
61
-
62
- if (NIL_P(io))
63
- rb_raise(rb_eTypeError, "Must pass in an IO object");
64
-
65
- input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
66
- (void*)io, XML_CHAR_ENCODING_NONE);
67
-
68
- ctxt = htmlNewParserCtxt();
69
- if (!ctxt)
70
- {
71
- xmlFreeParserInputBuffer(input);
72
- rxml_raise(&xmlLastError);
73
- }
74
-
75
- stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
76
-
77
- if (!stream)
78
- {
79
- xmlFreeParserInputBuffer(input);
80
- xmlFreeParserCtxt(ctxt);
81
- rxml_raise(&xmlLastError);
82
- }
83
- inputPush(ctxt, stream);
84
- result = rxml_html_parser_context_wrap(ctxt);
85
-
86
- /* Attach io object to parser so it won't get freed.*/
87
- rb_ivar_set(result, IO_ATTR, io);
88
-
89
- return result;
90
- }
91
-
92
- /* call-seq:
93
- * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
94
- *
95
- * Creates a new parser context based on the specified string.
96
- *
97
- * Parameters:
98
- *
99
- * string - A string that contains the data to parse.
100
- */
101
- static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
102
- {
103
- xmlParserCtxtPtr ctxt;
104
- Check_Type(string, T_STRING);
105
-
106
- if (RSTRING_LEN(string) == 0)
107
- rb_raise(rb_eArgError, "Must specify a string with one or more characters");
108
-
109
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
110
- RSTRING_LEN(string));
111
- if (!ctxt)
112
- rxml_raise(&xmlLastError);
113
-
114
- htmlDefaultSAXHandlerInit();
115
- if (ctxt->sax != NULL)
116
- memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
117
-
118
- return rxml_html_parser_context_wrap(ctxt);
119
- }
120
-
121
- /*
122
- * call-seq:
123
- * context.disable_cdata = (true|false)
124
- *
125
- * Control whether the CDATA nodes will be created in this context.
126
- */
127
- static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE bool)
128
- {
129
- xmlParserCtxtPtr ctxt;
130
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
131
-
132
- if (ctxt->sax == NULL)
133
- rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
134
-
135
- /* LibXML controls this internally with the default SAX handler. */
136
- if (bool)
137
- ctxt->sax->cdataBlock = NULL;
138
- else
139
- ctxt->sax->cdataBlock = htmlDefaultSAXHandler.cdataBlock;
140
-
141
- return bool;
142
- }
143
-
144
- /*
145
- * call-seq:
146
- * context.options = XML::Parser::Options::NOENT |
147
- XML::Parser::Options::NOCDATA
148
- *
149
- * Provides control over the execution of a parser. Valid values
150
- * are the constants defined on XML::Parser::Options. Multiple
151
- * options can be combined by using Bitwise OR (|).
152
- */
153
- static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
154
- {
155
- int result;
156
- xmlParserCtxtPtr ctxt;
157
- Check_Type(options, T_FIXNUM);
158
-
159
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
160
- result = htmlCtxtUseOptions(ctxt, NUM2INT(options));
161
-
162
- return self;
163
- }
164
-
165
- void rxml_init_html_parser_context(void)
166
- {
167
- IO_ATTR = ID2SYM(rb_intern("@io"));
168
- cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
169
-
170
- rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
171
- rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
172
- rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
173
- rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
174
- rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
175
- }
1
+ /* $Id: ruby_xml_parser_context.c 710 2009-01-20 05:30:51Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+ #include "ruby_xml_html_parser_context.h"
7
+
8
+
9
+ /*
10
+ * Document-class: LibXML::XML::HTMLParser::Context
11
+ *
12
+ * The XML::HTMLParser::Context class provides in-depth control over how
13
+ * a document is parsed.
14
+ */
15
+
16
+ VALUE cXMLHtmlParserContext;
17
+ static ID IO_ATTR;
18
+
19
+ /* OS X 10.5 ships with libxml2 version 2.6.16 which does not expose the
20
+ htmlNewParserCtxt (or htmlInitParserCtxt which it uses) method. htmlNewParserCtxt
21
+ wasn't added to the libxml2 header files until 2.6.27. So the next two
22
+ methods are simply copied from a newer version of libxml2 (2.7.2). */
23
+ #if LIBXML_VERSION <= 20627
24
+ #define XML_CTXT_FINISH_DTD_0 0xabcd1234
25
+ static int htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
26
+ {
27
+ htmlSAXHandler *sax;
28
+ if (ctxt == NULL) return(-1);
29
+
30
+ memset(ctxt, 0, sizeof(htmlParserCtxt));
31
+ ctxt->dict = xmlDictCreate();
32
+ if (ctxt->dict == NULL) {
33
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
34
+ return(-1);
35
+ }
36
+ sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
37
+ if (sax == NULL) {
38
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
39
+ return(-1);
40
+ }
41
+ else
42
+ memset(sax, 0, sizeof(htmlSAXHandler));
43
+
44
+ ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr));
45
+ if (ctxt->inputTab == NULL) {
46
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
47
+ ctxt->inputNr = 0;
48
+ ctxt->inputMax = 0;
49
+ ctxt->input = NULL;
50
+ return(-1);
51
+ }
52
+ ctxt->inputNr = 0;
53
+ ctxt->inputMax = 5;
54
+ ctxt->input = NULL;
55
+ ctxt->version = NULL;
56
+ ctxt->encoding = NULL;
57
+ ctxt->standalone = -1;
58
+ ctxt->instate = XML_PARSER_START;
59
+
60
+ ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
61
+ if (ctxt->nodeTab == NULL) {
62
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
63
+ ctxt->nodeNr = 0;
64
+ ctxt->nodeMax = 0;
65
+ ctxt->node = NULL;
66
+ ctxt->inputNr = 0;
67
+ ctxt->inputMax = 0;
68
+ ctxt->input = NULL;
69
+ return(-1);
70
+ }
71
+ ctxt->nodeNr = 0;
72
+ ctxt->nodeMax = 10;
73
+ ctxt->node = NULL;
74
+
75
+ ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
76
+ if (ctxt->nameTab == NULL) {
77
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
78
+ ctxt->nameNr = 0;
79
+ ctxt->nameMax = 10;
80
+ ctxt->name = NULL;
81
+ ctxt->nodeNr = 0;
82
+ ctxt->nodeMax = 0;
83
+ ctxt->node = NULL;
84
+ ctxt->inputNr = 0;
85
+ ctxt->inputMax = 0;
86
+ ctxt->input = NULL;
87
+ return(-1);
88
+ }
89
+ ctxt->nameNr = 0;
90
+ ctxt->nameMax = 10;
91
+ ctxt->name = NULL;
92
+
93
+ if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
94
+ else {
95
+ ctxt->sax = sax;
96
+ memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
97
+ }
98
+ ctxt->userData = ctxt;
99
+ ctxt->myDoc = NULL;
100
+ ctxt->wellFormed = 1;
101
+ ctxt->replaceEntities = 0;
102
+ ctxt->linenumbers = xmlLineNumbersDefaultValue;
103
+ ctxt->html = 1;
104
+ ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
105
+ ctxt->vctxt.userData = ctxt;
106
+ ctxt->vctxt.error = xmlParserValidityError;
107
+ ctxt->vctxt.warning = xmlParserValidityWarning;
108
+ ctxt->record_info = 0;
109
+ ctxt->validate = 0;
110
+ ctxt->nbChars = 0;
111
+ ctxt->checkIndex = 0;
112
+ ctxt->catalogs = NULL;
113
+ xmlInitNodeInfoSeq(&ctxt->node_seq);
114
+ return(0);
115
+ }
116
+
117
+ static htmlParserCtxtPtr htmlNewParserCtxt(void)
118
+ {
119
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
120
+ if (ctxt == NULL) {
121
+ rb_raise(rb_eNoMemError, "NewParserCtxt: out of memory\n");
122
+ return(NULL);
123
+ }
124
+ memset(ctxt, 0, sizeof(xmlParserCtxt));
125
+ if (htmlInitParserCtxt(ctxt) < 0) {
126
+ htmlFreeParserCtxt(ctxt);
127
+ return(NULL);
128
+ }
129
+ return(ctxt);
130
+ }
131
+ #endif
132
+
133
+ static void rxml_html_parser_context_free(htmlParserCtxtPtr ctxt)
134
+ {
135
+ htmlFreeParserCtxt(ctxt);
136
+ }
137
+
138
+ static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt)
139
+ {
140
+ return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
141
+ }
142
+
143
+ /* call-seq:
144
+ * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
145
+ *
146
+ * Creates a new parser context based on the specified file or uri.
147
+ *
148
+ * Parameters:
149
+ *
150
+ * file - A filename or uri.
151
+ */
152
+ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
153
+ {
154
+ htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
155
+ if (!ctxt)
156
+ rxml_raise(&xmlLastError);
157
+
158
+ return rxml_html_parser_context_wrap(ctxt);
159
+ }
160
+
161
+ /* call-seq:
162
+ * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
163
+ *
164
+ * Creates a new parser context based on the specified io object.
165
+ *
166
+ * Parameters:
167
+ *
168
+ * io - A ruby IO object.
169
+ */
170
+ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
171
+ {
172
+ VALUE result;
173
+ htmlParserCtxtPtr ctxt;
174
+ xmlParserInputBufferPtr input;
175
+ xmlParserInputPtr stream;
176
+
177
+ if (NIL_P(io))
178
+ rb_raise(rb_eTypeError, "Must pass in an IO object");
179
+
180
+ input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
181
+ (void*)io, XML_CHAR_ENCODING_NONE);
182
+
183
+ ctxt = htmlNewParserCtxt();
184
+ if (!ctxt)
185
+ {
186
+ xmlFreeParserInputBuffer(input);
187
+ rxml_raise(&xmlLastError);
188
+ }
189
+
190
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
191
+
192
+ if (!stream)
193
+ {
194
+ xmlFreeParserInputBuffer(input);
195
+ xmlFreeParserCtxt(ctxt);
196
+ rxml_raise(&xmlLastError);
197
+ }
198
+ inputPush(ctxt, stream);
199
+ result = rxml_html_parser_context_wrap(ctxt);
200
+
201
+ /* Attach io object to parser so it won't get freed.*/
202
+ rb_ivar_set(result, IO_ATTR, io);
203
+
204
+ return result;
205
+ }
206
+
207
+ /* call-seq:
208
+ * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
209
+ *
210
+ * Creates a new parser context based on the specified string.
211
+ *
212
+ * Parameters:
213
+ *
214
+ * string - A string that contains the data to parse.
215
+ */
216
+ static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
217
+ {
218
+ htmlParserCtxtPtr ctxt;
219
+ Check_Type(string, T_STRING);
220
+
221
+ if (RSTRING_LEN(string) == 0)
222
+ rb_raise(rb_eArgError, "Must specify a string with one or more characters");
223
+
224
+ ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
225
+ RSTRING_LEN(string));
226
+ if (!ctxt)
227
+ rxml_raise(&xmlLastError);
228
+
229
+ htmlDefaultSAXHandlerInit();
230
+ if (ctxt->sax != NULL)
231
+ memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
232
+
233
+ return rxml_html_parser_context_wrap(ctxt);
234
+ }
235
+
236
+ /*
237
+ * call-seq:
238
+ * context.disable_cdata = (true|false)
239
+ *
240
+ * Control whether the CDATA nodes will be created in this context.
241
+ */
242
+ static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE bool)
243
+ {
244
+ htmlParserCtxtPtr ctxt;
245
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
246
+
247
+ if (ctxt->sax == NULL)
248
+ rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
249
+
250
+ /* LibXML controls this internally with the default SAX handler. */
251
+ if (bool)
252
+ ctxt->sax->cdataBlock = NULL;
253
+ else
254
+ ctxt->sax->cdataBlock = htmlDefaultSAXHandler.cdataBlock;
255
+
256
+ return bool;
257
+ }
258
+
259
+ /*
260
+ * call-seq:
261
+ * context.options = XML::Parser::Options::NOENT |
262
+ XML::Parser::Options::NOCDATA
263
+ *
264
+ * Provides control over the execution of a parser. Valid values
265
+ * are the constants defined on XML::Parser::Options. Multiple
266
+ * options can be combined by using Bitwise OR (|).
267
+ */
268
+ static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
269
+ {
270
+ int result;
271
+ htmlParserCtxtPtr ctxt;
272
+ Check_Type(options, T_FIXNUM);
273
+
274
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
275
+ result = htmlCtxtUseOptions(ctxt, NUM2INT(options));
276
+
277
+ return self;
278
+ }
279
+
280
+ void rxml_init_html_parser_context(void)
281
+ {
282
+ IO_ATTR = ID2SYM(rb_intern("@io"));
283
+ cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
284
+
285
+ rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
286
+ rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
287
+ rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
288
+ rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
289
+ rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
290
+ }