libxml-ruby 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +9 -0
- data/README +61 -128
- data/doc/css/normal.css +182 -0
- data/doc/img/raze-tiny.png +0 -0
- data/doc/img/red-cube.jpg +0 -0
- data/doc/img/xml-ruby.png +0 -0
- data/doc/index.xml +43 -0
- data/doc/install.xml +77 -0
- data/doc/layout.rhtml +38 -0
- data/doc/layout.xsl +67 -0
- data/doc/license.xml +32 -0
- data/doc/log/changelog.xml +1324 -0
- data/doc/log/changelog.xsl +42 -0
- data/ext/libxml/ruby_xml_document.c +1084 -1057
- data/ext/libxml/ruby_xml_html_parser.c +37 -40
- data/ext/libxml/ruby_xml_input.c +17 -40
- data/ext/libxml/ruby_xml_input.h +2 -2
- data/ext/libxml/ruby_xml_parser.c +151 -151
- data/ext/libxml/ruby_xml_reader.c +910 -893
- data/ext/libxml/ruby_xml_sax_parser.c +174 -174
- data/ext/libxml/ruby_xml_sax_parser.h +12 -12
- data/ext/libxml/ruby_xml_xpointer.h +13 -25
- data/ext/libxml/version.h +2 -2
- data/ext/vc/libxml_ruby.vcproj +1 -1
- data/test/model/ruby-lang.html +238 -0
- data/test/tc_html_parser.rb +2 -12
- data/test/tc_reader.rb +87 -87
- metadata +17 -3
- data/test/test.rb +0 -8
@@ -1,4 +1,4 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c
|
1
|
+
/* $Id: ruby_xml_html_parser.c 630 2008-11-24 06:53:01Z cfis $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
@@ -6,8 +6,6 @@
|
|
6
6
|
|
7
7
|
VALUE cXMLHTMLParser;
|
8
8
|
static ID INPUT_ATTR;
|
9
|
-
static ID CONTEXT_ATTR;
|
10
|
-
|
11
9
|
|
12
10
|
/*
|
13
11
|
* Document-class: LibXML::XML::HTMLParser
|
@@ -28,34 +26,48 @@ static VALUE
|
|
28
26
|
rxml_html_parser_initialize(VALUE self) {
|
29
27
|
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
30
28
|
rb_iv_set(self, "@input", input);
|
31
|
-
rb_iv_set(self, "@context", Qnil);
|
32
29
|
return self;
|
33
30
|
}
|
34
31
|
|
35
|
-
static
|
36
|
-
|
32
|
+
static htmlDocPtr
|
33
|
+
rxml_html_parser_read_file(VALUE input) {
|
37
34
|
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
38
35
|
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
39
|
-
VALUE
|
36
|
+
VALUE encoding_str = rxml_input_encoding_to_s(Qnil, encoding);
|
37
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(encoding_str));
|
38
|
+
int options = 0;
|
40
39
|
|
41
|
-
return
|
40
|
+
return htmlReadFile(StringValuePtr(file), xencoding_str, options);
|
42
41
|
}
|
43
42
|
|
44
|
-
static
|
45
|
-
|
46
|
-
VALUE
|
47
|
-
|
43
|
+
static htmlDocPtr
|
44
|
+
rxml_html_parser_read_string(VALUE input) {
|
45
|
+
VALUE string = rb_ivar_get(input, STRING_ATTR);
|
46
|
+
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
47
|
+
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
48
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
49
|
+
VALUE encoding_str = rxml_input_encoding_to_s(Qnil, encoding);
|
50
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(encoding_str));
|
51
|
+
int options = 0;
|
52
|
+
|
53
|
+
return htmlReadMemory(StringValuePtr(string), RSTRING_LEN(string),
|
54
|
+
xbase_url, xencoding_str, options);
|
48
55
|
}
|
49
56
|
|
50
|
-
|
51
|
-
|
57
|
+
static htmlDocPtr
|
58
|
+
rxml_html_parser_read_io(VALUE input) {
|
52
59
|
VALUE io = rb_ivar_get(input, IO_ATTR);
|
60
|
+
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
61
|
+
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
53
62
|
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
54
|
-
|
55
|
-
|
63
|
+
VALUE encoding_str = rxml_input_encoding_to_s(Qnil, encoding);
|
64
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(encoding_str));
|
65
|
+
int options = 0;
|
66
|
+
|
56
67
|
return htmlReadIO((xmlInputReadCallback) rxml_read_callback, NULL,
|
57
|
-
io,
|
58
|
-
|
68
|
+
(void *)io,
|
69
|
+
xbase_url, xencoding_str, options);
|
70
|
+
}
|
59
71
|
|
60
72
|
/*
|
61
73
|
* call-seq:
|
@@ -67,35 +79,22 @@ rxml_html_parser_io_ctxt(VALUE input) {
|
|
67
79
|
*/
|
68
80
|
static VALUE
|
69
81
|
rxml_html_parser_parse(VALUE self) {
|
70
|
-
xmlParserCtxtPtr ctxt;
|
71
|
-
VALUE context;
|
72
82
|
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
73
|
-
|
74
|
-
context = rb_ivar_get(self, CONTEXT_ATTR);
|
75
|
-
if (context != Qnil)
|
76
|
-
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
83
|
+
htmlDocPtr xdoc;
|
77
84
|
|
78
85
|
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
79
|
-
|
86
|
+
xdoc = rxml_html_parser_read_file(input);
|
80
87
|
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
85
|
-
ctxt = rxml_html_parser_parse_document(input);*/
|
88
|
+
xdoc = rxml_html_parser_read_string(input);
|
89
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
90
|
+
xdoc = rxml_html_parser_read_io(input);
|
86
91
|
else
|
87
92
|
rb_raise(rb_eArgError, "You must specify a parser data source");
|
88
93
|
|
89
|
-
if (!
|
94
|
+
if (!xdoc)
|
90
95
|
rxml_raise(&xmlLastError);
|
91
96
|
|
92
|
-
|
93
|
-
rb_ivar_set(self, CONTEXT_ATTR, context);
|
94
|
-
|
95
|
-
if (htmlParseDocument(ctxt) == -1)
|
96
|
-
rxml_raise(&ctxt->lastError);
|
97
|
-
|
98
|
-
return rxml_document_wrap(ctxt->myDoc);
|
97
|
+
return rxml_document_wrap(xdoc);
|
99
98
|
}
|
100
99
|
|
101
100
|
// Rdoc needs to know
|
@@ -107,13 +106,11 @@ rxml_html_parser_parse(VALUE self) {
|
|
107
106
|
void
|
108
107
|
ruby_init_html_parser(void) {
|
109
108
|
INPUT_ATTR = rb_intern("@input");
|
110
|
-
CONTEXT_ATTR = rb_intern("@context");
|
111
109
|
|
112
110
|
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
113
111
|
|
114
112
|
/* Atributes */
|
115
113
|
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
116
|
-
rb_define_attr(cXMLHTMLParser, "context", 1, 0);
|
117
114
|
|
118
115
|
/* Instance methods */
|
119
116
|
rb_define_method(cXMLHTMLParser, "initialize", rxml_html_parser_initialize, 0);
|
data/ext/libxml/ruby_xml_input.c
CHANGED
@@ -20,7 +20,12 @@ VALUE cXMLInput;
|
|
20
20
|
* but instead will use the various Document and Parser apis.
|
21
21
|
* For example:
|
22
22
|
*
|
23
|
-
*
|
23
|
+
* parser = XML::Parser.file('my_file')
|
24
|
+
* parser = XML::Parser.string('<myxml/>')
|
25
|
+
* parser = XML::Parser.io(File.open('my_file'))
|
26
|
+
*
|
27
|
+
* XML::HTMLParser, XML::Reader, XML::SaxParser and
|
28
|
+
* XML::Document work in the same way.
|
24
29
|
*
|
25
30
|
* LibXML converts all data sources to UTF8 internally before
|
26
31
|
* processing them. By default, LibXML will determine a data
|
@@ -52,11 +57,10 @@ VALUE cXMLInput;
|
|
52
57
|
* document.encoding = document
|
53
58
|
* doc << XML::Node.new */
|
54
59
|
|
55
|
-
|
60
|
+
ID BASE_URL_ATTR;
|
56
61
|
ID ENCODING_ATTR;
|
57
62
|
ID FILE_ATTR;
|
58
63
|
ID STRING_ATTR;
|
59
|
-
ID DOCUMENT_ATTR;
|
60
64
|
ID IO_ATTR;
|
61
65
|
|
62
66
|
static ID READ_METHOD;
|
@@ -204,9 +208,8 @@ static VALUE
|
|
204
208
|
rxml_input_file_set(VALUE self, VALUE FILE) {
|
205
209
|
Check_Type(FILE, T_STRING);
|
206
210
|
rb_ivar_set(self, FILE_ATTR, FILE);
|
207
|
-
rb_ivar_set(self, STRING_ATTR, Qnil);
|
208
|
-
rb_ivar_set(self, DOCUMENT_ATTR, Qnil);
|
209
211
|
rb_ivar_set(self, IO_ATTR, Qnil);
|
212
|
+
rb_ivar_set(self, STRING_ATTR, Qnil);
|
210
213
|
return self;
|
211
214
|
}
|
212
215
|
|
@@ -233,35 +236,8 @@ static VALUE
|
|
233
236
|
rxml_input_string_set(VALUE self, VALUE string) {
|
234
237
|
Check_Type(string, T_STRING);
|
235
238
|
rb_ivar_set(self, FILE_ATTR, Qnil);
|
236
|
-
rb_ivar_set(self, STRING_ATTR, string);
|
237
|
-
rb_ivar_set(self, DOCUMENT_ATTR, Qnil);
|
238
|
-
rb_ivar_set(self, IO_ATTR, Qnil);
|
239
|
-
return self;
|
240
|
-
}
|
241
|
-
|
242
|
-
/*
|
243
|
-
* call-seq:
|
244
|
-
* input.document -> "document"
|
245
|
-
*
|
246
|
-
* Obtain the document this parser will read from.
|
247
|
-
*/
|
248
|
-
static VALUE
|
249
|
-
rxml_input_document_get(VALUE self) {
|
250
|
-
return rb_ivar_get(self, DOCUMENT_ATTR);
|
251
|
-
}
|
252
|
-
|
253
|
-
/*
|
254
|
-
* call-seq:
|
255
|
-
* input.document = LibXML::XML::Document.new
|
256
|
-
*
|
257
|
-
* Set the document this parser will read from.
|
258
|
-
*/
|
259
|
-
static VALUE
|
260
|
-
rxml_input_document_set(VALUE self, VALUE document) {
|
261
|
-
rb_ivar_set(self, FILE_ATTR, Qnil);
|
262
|
-
rb_ivar_set(self, STRING_ATTR, Qnil);
|
263
|
-
rb_ivar_set(self, DOCUMENT_ATTR, document);
|
264
239
|
rb_ivar_set(self, IO_ATTR, Qnil);
|
240
|
+
rb_ivar_set(self, STRING_ATTR, string);
|
265
241
|
return self;
|
266
242
|
}
|
267
243
|
|
@@ -285,9 +261,8 @@ rxml_input_io_get(VALUE self) {
|
|
285
261
|
static VALUE
|
286
262
|
rxml_input_io_set(VALUE self, VALUE io) {
|
287
263
|
rb_ivar_set(self, FILE_ATTR, Qnil);
|
288
|
-
rb_ivar_set(self, STRING_ATTR, Qnil);
|
289
|
-
rb_ivar_set(self, DOCUMENT_ATTR, Qnil);
|
290
264
|
rb_ivar_set(self, IO_ATTR, io);
|
265
|
+
rb_ivar_set(self, STRING_ATTR, Qnil);
|
291
266
|
return self;
|
292
267
|
}
|
293
268
|
|
@@ -307,10 +282,12 @@ rxml_input_io_set(VALUE self, VALUE io) {
|
|
307
282
|
|
308
283
|
void
|
309
284
|
ruby_init_xml_input(void) {
|
310
|
-
|
311
|
-
|
312
|
-
|
285
|
+
BASE_URL_ATTR = rb_intern("@base_url");
|
286
|
+
ENCODING_ATTR = rb_intern("@encoding");
|
287
|
+
FILE_ATTR = rb_intern("@file");
|
313
288
|
IO_ATTR = rb_intern("@io");
|
289
|
+
STRING_ATTR = rb_intern("@string");
|
290
|
+
|
314
291
|
READ_METHOD = rb_intern("read");
|
315
292
|
|
316
293
|
cXMLInput = rb_define_class_under(mXML, "Input", rb_cObject);
|
@@ -342,14 +319,14 @@ ruby_init_xml_input(void) {
|
|
342
319
|
rb_define_const(cXMLInput, "EUC_JP", INT2NUM(XML_CHAR_ENCODING_EUC_JP)); /* EUC-JP */
|
343
320
|
rb_define_const(cXMLInput, "ASCII", INT2NUM(XML_CHAR_ENCODING_ASCII)); /* pure ASCII */
|
344
321
|
|
322
|
+
rb_define_attr(cXMLInput, "base_url", 1, 1);
|
345
323
|
rb_define_attr(cXMLInput, "encoding", 1, 1);
|
324
|
+
|
346
325
|
rb_define_method(cXMLInput, "initialize", rxml_input_initialize, 0);
|
347
326
|
rb_define_method(cXMLInput, "file", rxml_input_file_get, 0);
|
348
327
|
rb_define_method(cXMLInput, "file=", rxml_input_file_set, 1);
|
349
328
|
rb_define_method(cXMLInput, "string", rxml_input_string_get, 0);
|
350
329
|
rb_define_method(cXMLInput, "string=", rxml_input_string_set, 1);
|
351
|
-
rb_define_method(cXMLInput, "document", rxml_input_document_get, 0);
|
352
|
-
rb_define_method(cXMLInput, "document=", rxml_input_document_set, 1);
|
353
330
|
rb_define_method(cXMLInput, "io", rxml_input_io_get, 0);
|
354
331
|
rb_define_method(cXMLInput, "io=", rxml_input_io_set, 1);
|
355
332
|
}
|
data/ext/libxml/ruby_xml_input.h
CHANGED
@@ -7,11 +7,11 @@
|
|
7
7
|
|
8
8
|
extern VALUE cXMLInput;
|
9
9
|
|
10
|
+
extern ID BASE_URL_ATTR;
|
10
11
|
extern ID ENCODING_ATTR;
|
11
12
|
extern ID FILE_ATTR;
|
12
|
-
extern ID STRING_ATTR;
|
13
|
-
extern ID DOCUMENT_ATTR;
|
14
13
|
extern ID IO_ATTR;
|
14
|
+
extern ID STRING_ATTR;
|
15
15
|
|
16
16
|
void ruby_init_xml_input();
|
17
17
|
int rxml_read_callback(void *context, char *buffer, int len);
|
@@ -1,151 +1,151 @@
|
|
1
|
-
/* $Id: ruby_xml_parser.c
|
2
|
-
|
3
|
-
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
-
|
5
|
-
#include <stdarg.h>
|
6
|
-
#include "ruby_libxml.h"
|
7
|
-
|
8
|
-
VALUE cXMLParser;
|
9
|
-
static ID INPUT_ATTR;
|
10
|
-
static ID CONTEXT_ATTR;
|
11
|
-
|
12
|
-
/*
|
13
|
-
* Document-class: LibXML::XML::Parser
|
14
|
-
*
|
15
|
-
* The XML::Parser provides a tree based API for processing
|
16
|
-
* xml documents, in contract to XML::Reader's stream
|
17
|
-
* based api and XML::SaxParser callback based API.
|
18
|
-
*
|
19
|
-
* As a result, parsing a document creates an in-memory document object
|
20
|
-
* that consist of any number of XML::Node instances. This is simple
|
21
|
-
* and powerful model, but has the major limitation that the size of
|
22
|
-
* the document that can be processed is limited by the amount of
|
23
|
-
* memory available. In such cases, it is better to use the XML::Reader.
|
24
|
-
*
|
25
|
-
* Using the parser is simple:
|
26
|
-
*
|
27
|
-
* parser = XML::Parser.new
|
28
|
-
* parser.file = 'my_file'
|
29
|
-
* doc = parser.parse
|
30
|
-
*
|
31
|
-
* You can also parse strings (see XML::Parser.string) and io objects (see
|
32
|
-
* XML::Parser.io).
|
33
|
-
*/
|
34
|
-
|
35
|
-
/*
|
36
|
-
* call-seq:
|
37
|
-
* parser.initialize -> parser
|
38
|
-
*
|
39
|
-
* Initiliazes instance of parser.
|
40
|
-
*/
|
41
|
-
static VALUE
|
42
|
-
rxml_parser_initialize(VALUE self) {
|
43
|
-
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
44
|
-
rb_iv_set(self, "@input", input);
|
45
|
-
rb_iv_set(self, "@context", Qnil);
|
46
|
-
return self;
|
47
|
-
}
|
48
|
-
|
49
|
-
static xmlParserCtxtPtr
|
50
|
-
rxml_parser_filename_ctxt(VALUE input) {
|
51
|
-
xmlParserCtxtPtr ctxt;
|
52
|
-
int retry_count = 0;
|
53
|
-
VALUE filename = rb_ivar_get(input, FILE_ATTR);
|
54
|
-
|
55
|
-
retry:
|
56
|
-
ctxt = xmlCreateFileParserCtxt(StringValuePtr(filename));
|
57
|
-
if (ctxt == NULL) {
|
58
|
-
if ((errno == EMFILE || errno == ENFILE) && retry_count == 0) {
|
59
|
-
retry_count++;
|
60
|
-
rb_gc();
|
61
|
-
goto retry;
|
62
|
-
} else {
|
63
|
-
rb_raise(rb_eIOError, StringValuePtr(filename));
|
64
|
-
}
|
65
|
-
}
|
66
|
-
|
67
|
-
return ctxt;
|
68
|
-
}
|
69
|
-
|
70
|
-
static xmlParserCtxtPtr
|
71
|
-
rxml_parser_str_ctxt(VALUE input) {
|
72
|
-
VALUE str = rb_ivar_get(input, STRING_ATTR);
|
73
|
-
return xmlCreateMemoryParserCtxt(StringValuePtr(str), RSTRING_LEN(str));
|
74
|
-
}
|
75
|
-
|
76
|
-
static xmlParserCtxtPtr
|
77
|
-
rxml_parser_io_ctxt(VALUE input) {
|
78
|
-
VALUE io = rb_ivar_get(input, IO_ATTR);
|
79
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
80
|
-
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
81
|
-
|
82
|
-
return xmlCreateIOParserCtxt(NULL, NULL,
|
83
|
-
(xmlInputReadCallback) rxml_read_callback,
|
84
|
-
NULL, io, xmlEncoding);
|
85
|
-
}
|
86
|
-
|
87
|
-
|
88
|
-
/*
|
89
|
-
* call-seq:
|
90
|
-
* parser.parse -> document
|
91
|
-
*
|
92
|
-
* Parse the input XML and create an XML::Document with
|
93
|
-
* it's content. If an error occurs, XML::Parser::ParseError
|
94
|
-
* is thrown.
|
95
|
-
*/
|
96
|
-
static VALUE
|
97
|
-
rxml_parser_parse(VALUE self) {
|
98
|
-
xmlParserCtxtPtr ctxt;
|
99
|
-
VALUE context;
|
100
|
-
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
101
|
-
|
102
|
-
context = rb_ivar_get(self, CONTEXT_ATTR);
|
103
|
-
if (context != Qnil)
|
104
|
-
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
105
|
-
|
106
|
-
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
107
|
-
ctxt = rxml_parser_filename_ctxt(input);
|
108
|
-
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
109
|
-
ctxt = rxml_parser_str_ctxt(input);
|
110
|
-
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
111
|
-
ctxt = rxml_parser_parse_document(input);*/
|
112
|
-
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
113
|
-
ctxt = rxml_parser_io_ctxt(input);
|
114
|
-
else
|
115
|
-
rb_raise(rb_eArgError, "You must specify a parser data source");
|
116
|
-
|
117
|
-
if (!ctxt)
|
118
|
-
rxml_raise(&xmlLastError);
|
119
|
-
|
120
|
-
context = rxml_parser_context_wrap(ctxt);
|
121
|
-
rb_ivar_set(self, CONTEXT_ATTR, context);
|
122
|
-
|
123
|
-
if (xmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
124
|
-
xmlFreeDoc(ctxt->myDoc);
|
125
|
-
rxml_raise(&ctxt->lastError);
|
126
|
-
}
|
127
|
-
|
128
|
-
return rxml_document_wrap(ctxt->myDoc);
|
129
|
-
}
|
130
|
-
|
131
|
-
|
132
|
-
// Rdoc needs to know
|
133
|
-
#ifdef RDOC_NEVER_DEFINED
|
134
|
-
mLibXML = rb_define_module("LibXML");
|
135
|
-
mXML = rb_define_module_under(mLibXML, "XML");
|
136
|
-
#endif
|
137
|
-
|
138
|
-
void
|
139
|
-
ruby_init_parser(void) {
|
140
|
-
cXMLParser = rb_define_class_under(mXML, "Parser", rb_cObject);
|
141
|
-
|
142
|
-
/* Atributes */
|
143
|
-
INPUT_ATTR = rb_intern("@input");
|
144
|
-
CONTEXT_ATTR = rb_intern("@context");
|
145
|
-
rb_define_attr(cXMLParser, "input", 1, 0);
|
146
|
-
rb_define_attr(cXMLParser, "context", 1, 0);
|
147
|
-
|
148
|
-
/* Instance Methods */
|
149
|
-
rb_define_method(cXMLParser, "initialize", rxml_parser_initialize, 0);
|
150
|
-
rb_define_method(cXMLParser, "parse", rxml_parser_parse, 0);
|
151
|
-
}
|
1
|
+
/* $Id: ruby_xml_parser.c 630 2008-11-24 06:53:01Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include <stdarg.h>
|
6
|
+
#include "ruby_libxml.h"
|
7
|
+
|
8
|
+
VALUE cXMLParser;
|
9
|
+
static ID INPUT_ATTR;
|
10
|
+
static ID CONTEXT_ATTR;
|
11
|
+
|
12
|
+
/*
|
13
|
+
* Document-class: LibXML::XML::Parser
|
14
|
+
*
|
15
|
+
* The XML::Parser provides a tree based API for processing
|
16
|
+
* xml documents, in contract to XML::Reader's stream
|
17
|
+
* based api and XML::SaxParser callback based API.
|
18
|
+
*
|
19
|
+
* As a result, parsing a document creates an in-memory document object
|
20
|
+
* that consist of any number of XML::Node instances. This is simple
|
21
|
+
* and powerful model, but has the major limitation that the size of
|
22
|
+
* the document that can be processed is limited by the amount of
|
23
|
+
* memory available. In such cases, it is better to use the XML::Reader.
|
24
|
+
*
|
25
|
+
* Using the parser is simple:
|
26
|
+
*
|
27
|
+
* parser = XML::Parser.new
|
28
|
+
* parser.file = 'my_file'
|
29
|
+
* doc = parser.parse
|
30
|
+
*
|
31
|
+
* You can also parse strings (see XML::Parser.string) and io objects (see
|
32
|
+
* XML::Parser.io).
|
33
|
+
*/
|
34
|
+
|
35
|
+
/*
|
36
|
+
* call-seq:
|
37
|
+
* parser.initialize -> parser
|
38
|
+
*
|
39
|
+
* Initiliazes instance of parser.
|
40
|
+
*/
|
41
|
+
static VALUE
|
42
|
+
rxml_parser_initialize(VALUE self) {
|
43
|
+
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
44
|
+
rb_iv_set(self, "@input", input);
|
45
|
+
rb_iv_set(self, "@context", Qnil);
|
46
|
+
return self;
|
47
|
+
}
|
48
|
+
|
49
|
+
static xmlParserCtxtPtr
|
50
|
+
rxml_parser_filename_ctxt(VALUE input) {
|
51
|
+
xmlParserCtxtPtr ctxt;
|
52
|
+
int retry_count = 0;
|
53
|
+
VALUE filename = rb_ivar_get(input, FILE_ATTR);
|
54
|
+
|
55
|
+
retry:
|
56
|
+
ctxt = xmlCreateFileParserCtxt(StringValuePtr(filename));
|
57
|
+
if (ctxt == NULL) {
|
58
|
+
if ((errno == EMFILE || errno == ENFILE) && retry_count == 0) {
|
59
|
+
retry_count++;
|
60
|
+
rb_gc();
|
61
|
+
goto retry;
|
62
|
+
} else {
|
63
|
+
rb_raise(rb_eIOError, StringValuePtr(filename));
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
return ctxt;
|
68
|
+
}
|
69
|
+
|
70
|
+
static xmlParserCtxtPtr
|
71
|
+
rxml_parser_str_ctxt(VALUE input) {
|
72
|
+
VALUE str = rb_ivar_get(input, STRING_ATTR);
|
73
|
+
return xmlCreateMemoryParserCtxt(StringValuePtr(str), RSTRING_LEN(str));
|
74
|
+
}
|
75
|
+
|
76
|
+
static xmlParserCtxtPtr
|
77
|
+
rxml_parser_io_ctxt(VALUE input) {
|
78
|
+
VALUE io = rb_ivar_get(input, IO_ATTR);
|
79
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
80
|
+
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
81
|
+
|
82
|
+
return xmlCreateIOParserCtxt(NULL, NULL,
|
83
|
+
(xmlInputReadCallback) rxml_read_callback,
|
84
|
+
NULL, (void *)io, xmlEncoding);
|
85
|
+
}
|
86
|
+
|
87
|
+
|
88
|
+
/*
|
89
|
+
* call-seq:
|
90
|
+
* parser.parse -> document
|
91
|
+
*
|
92
|
+
* Parse the input XML and create an XML::Document with
|
93
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
94
|
+
* is thrown.
|
95
|
+
*/
|
96
|
+
static VALUE
|
97
|
+
rxml_parser_parse(VALUE self) {
|
98
|
+
xmlParserCtxtPtr ctxt;
|
99
|
+
VALUE context;
|
100
|
+
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
101
|
+
|
102
|
+
context = rb_ivar_get(self, CONTEXT_ATTR);
|
103
|
+
if (context != Qnil)
|
104
|
+
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
105
|
+
|
106
|
+
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
107
|
+
ctxt = rxml_parser_filename_ctxt(input);
|
108
|
+
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
109
|
+
ctxt = rxml_parser_str_ctxt(input);
|
110
|
+
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
111
|
+
ctxt = rxml_parser_parse_document(input);*/
|
112
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
113
|
+
ctxt = rxml_parser_io_ctxt(input);
|
114
|
+
else
|
115
|
+
rb_raise(rb_eArgError, "You must specify a parser data source");
|
116
|
+
|
117
|
+
if (!ctxt)
|
118
|
+
rxml_raise(&xmlLastError);
|
119
|
+
|
120
|
+
context = rxml_parser_context_wrap(ctxt);
|
121
|
+
rb_ivar_set(self, CONTEXT_ATTR, context);
|
122
|
+
|
123
|
+
if (xmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
124
|
+
xmlFreeDoc(ctxt->myDoc);
|
125
|
+
rxml_raise(&ctxt->lastError);
|
126
|
+
}
|
127
|
+
|
128
|
+
return rxml_document_wrap(ctxt->myDoc);
|
129
|
+
}
|
130
|
+
|
131
|
+
|
132
|
+
// Rdoc needs to know
|
133
|
+
#ifdef RDOC_NEVER_DEFINED
|
134
|
+
mLibXML = rb_define_module("LibXML");
|
135
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
136
|
+
#endif
|
137
|
+
|
138
|
+
void
|
139
|
+
ruby_init_parser(void) {
|
140
|
+
cXMLParser = rb_define_class_under(mXML, "Parser", rb_cObject);
|
141
|
+
|
142
|
+
/* Atributes */
|
143
|
+
INPUT_ATTR = rb_intern("@input");
|
144
|
+
CONTEXT_ATTR = rb_intern("@context");
|
145
|
+
rb_define_attr(cXMLParser, "input", 1, 0);
|
146
|
+
rb_define_attr(cXMLParser, "context", 1, 0);
|
147
|
+
|
148
|
+
/* Instance Methods */
|
149
|
+
rb_define_method(cXMLParser, "initialize", rxml_parser_initialize, 0);
|
150
|
+
rb_define_method(cXMLParser, "parse", rxml_parser_parse, 0);
|
151
|
+
}
|