libxml-ruby 0.9.3-x86-mswin32-60 → 0.9.4-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +9 -0
- data/README +61 -128
- data/doc/css/normal.css +182 -0
- data/doc/img/raze-tiny.png +0 -0
- data/doc/img/red-cube.jpg +0 -0
- data/doc/img/xml-ruby.png +0 -0
- data/doc/index.xml +43 -0
- data/doc/install.xml +77 -0
- data/doc/layout.rhtml +38 -0
- data/doc/layout.xsl +67 -0
- data/doc/license.xml +32 -0
- data/doc/log/changelog.xml +1324 -0
- data/doc/log/changelog.xsl +42 -0
- data/ext/libxml/ruby_xml_document.c +1084 -1057
- data/ext/libxml/ruby_xml_html_parser.c +37 -40
- data/ext/libxml/ruby_xml_input.c +17 -40
- data/ext/libxml/ruby_xml_input.h +2 -2
- data/ext/libxml/ruby_xml_parser.c +151 -151
- data/ext/libxml/ruby_xml_reader.c +910 -893
- data/ext/libxml/ruby_xml_sax_parser.c +174 -174
- data/ext/libxml/ruby_xml_sax_parser.h +12 -12
- data/ext/libxml/ruby_xml_xpointer.h +13 -25
- data/ext/libxml/version.h +2 -2
- data/ext/mingw/libxml_ruby.dll.a +0 -0
- data/ext/mingw/libxml_ruby.so +0 -0
- data/ext/vc/libxml_ruby.vcproj +1 -1
- data/test/model/ruby-lang.html +238 -0
- data/test/tc_html_parser.rb +2 -12
- data/test/tc_reader.rb +87 -87
- metadata +17 -3
- data/test/test.rb +0 -8
@@ -1,4 +1,4 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c
|
1
|
+
/* $Id: ruby_xml_html_parser.c 630 2008-11-24 06:53:01Z cfis $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
@@ -6,8 +6,6 @@
|
|
6
6
|
|
7
7
|
VALUE cXMLHTMLParser;
|
8
8
|
static ID INPUT_ATTR;
|
9
|
-
static ID CONTEXT_ATTR;
|
10
|
-
|
11
9
|
|
12
10
|
/*
|
13
11
|
* Document-class: LibXML::XML::HTMLParser
|
@@ -28,34 +26,48 @@ static VALUE
|
|
28
26
|
rxml_html_parser_initialize(VALUE self) {
|
29
27
|
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
30
28
|
rb_iv_set(self, "@input", input);
|
31
|
-
rb_iv_set(self, "@context", Qnil);
|
32
29
|
return self;
|
33
30
|
}
|
34
31
|
|
35
|
-
static
|
36
|
-
|
32
|
+
static htmlDocPtr
|
33
|
+
rxml_html_parser_read_file(VALUE input) {
|
37
34
|
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
38
35
|
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
39
|
-
VALUE
|
36
|
+
VALUE encoding_str = rxml_input_encoding_to_s(Qnil, encoding);
|
37
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(encoding_str));
|
38
|
+
int options = 0;
|
40
39
|
|
41
|
-
return
|
40
|
+
return htmlReadFile(StringValuePtr(file), xencoding_str, options);
|
42
41
|
}
|
43
42
|
|
44
|
-
static
|
45
|
-
|
46
|
-
VALUE
|
47
|
-
|
43
|
+
static htmlDocPtr
|
44
|
+
rxml_html_parser_read_string(VALUE input) {
|
45
|
+
VALUE string = rb_ivar_get(input, STRING_ATTR);
|
46
|
+
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
47
|
+
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
48
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
49
|
+
VALUE encoding_str = rxml_input_encoding_to_s(Qnil, encoding);
|
50
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(encoding_str));
|
51
|
+
int options = 0;
|
52
|
+
|
53
|
+
return htmlReadMemory(StringValuePtr(string), RSTRING_LEN(string),
|
54
|
+
xbase_url, xencoding_str, options);
|
48
55
|
}
|
49
56
|
|
50
|
-
|
51
|
-
|
57
|
+
static htmlDocPtr
|
58
|
+
rxml_html_parser_read_io(VALUE input) {
|
52
59
|
VALUE io = rb_ivar_get(input, IO_ATTR);
|
60
|
+
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
61
|
+
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
53
62
|
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
54
|
-
|
55
|
-
|
63
|
+
VALUE encoding_str = rxml_input_encoding_to_s(Qnil, encoding);
|
64
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(encoding_str));
|
65
|
+
int options = 0;
|
66
|
+
|
56
67
|
return htmlReadIO((xmlInputReadCallback) rxml_read_callback, NULL,
|
57
|
-
io,
|
58
|
-
|
68
|
+
(void *)io,
|
69
|
+
xbase_url, xencoding_str, options);
|
70
|
+
}
|
59
71
|
|
60
72
|
/*
|
61
73
|
* call-seq:
|
@@ -67,35 +79,22 @@ rxml_html_parser_io_ctxt(VALUE input) {
|
|
67
79
|
*/
|
68
80
|
static VALUE
|
69
81
|
rxml_html_parser_parse(VALUE self) {
|
70
|
-
xmlParserCtxtPtr ctxt;
|
71
|
-
VALUE context;
|
72
82
|
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
73
|
-
|
74
|
-
context = rb_ivar_get(self, CONTEXT_ATTR);
|
75
|
-
if (context != Qnil)
|
76
|
-
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
83
|
+
htmlDocPtr xdoc;
|
77
84
|
|
78
85
|
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
79
|
-
|
86
|
+
xdoc = rxml_html_parser_read_file(input);
|
80
87
|
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
85
|
-
ctxt = rxml_html_parser_parse_document(input);*/
|
88
|
+
xdoc = rxml_html_parser_read_string(input);
|
89
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
90
|
+
xdoc = rxml_html_parser_read_io(input);
|
86
91
|
else
|
87
92
|
rb_raise(rb_eArgError, "You must specify a parser data source");
|
88
93
|
|
89
|
-
if (!
|
94
|
+
if (!xdoc)
|
90
95
|
rxml_raise(&xmlLastError);
|
91
96
|
|
92
|
-
|
93
|
-
rb_ivar_set(self, CONTEXT_ATTR, context);
|
94
|
-
|
95
|
-
if (htmlParseDocument(ctxt) == -1)
|
96
|
-
rxml_raise(&ctxt->lastError);
|
97
|
-
|
98
|
-
return rxml_document_wrap(ctxt->myDoc);
|
97
|
+
return rxml_document_wrap(xdoc);
|
99
98
|
}
|
100
99
|
|
101
100
|
// Rdoc needs to know
|
@@ -107,13 +106,11 @@ rxml_html_parser_parse(VALUE self) {
|
|
107
106
|
void
|
108
107
|
ruby_init_html_parser(void) {
|
109
108
|
INPUT_ATTR = rb_intern("@input");
|
110
|
-
CONTEXT_ATTR = rb_intern("@context");
|
111
109
|
|
112
110
|
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
113
111
|
|
114
112
|
/* Atributes */
|
115
113
|
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
116
|
-
rb_define_attr(cXMLHTMLParser, "context", 1, 0);
|
117
114
|
|
118
115
|
/* Instance methods */
|
119
116
|
rb_define_method(cXMLHTMLParser, "initialize", rxml_html_parser_initialize, 0);
|
data/ext/libxml/ruby_xml_input.c
CHANGED
@@ -20,7 +20,12 @@ VALUE cXMLInput;
|
|
20
20
|
* but instead will use the various Document and Parser apis.
|
21
21
|
* For example:
|
22
22
|
*
|
23
|
-
*
|
23
|
+
* parser = XML::Parser.file('my_file')
|
24
|
+
* parser = XML::Parser.string('<myxml/>')
|
25
|
+
* parser = XML::Parser.io(File.open('my_file'))
|
26
|
+
*
|
27
|
+
* XML::HTMLParser, XML::Reader, XML::SaxParser and
|
28
|
+
* XML::Document work in the same way.
|
24
29
|
*
|
25
30
|
* LibXML converts all data sources to UTF8 internally before
|
26
31
|
* processing them. By default, LibXML will determine a data
|
@@ -52,11 +57,10 @@ VALUE cXMLInput;
|
|
52
57
|
* document.encoding = document
|
53
58
|
* doc << XML::Node.new */
|
54
59
|
|
55
|
-
|
60
|
+
ID BASE_URL_ATTR;
|
56
61
|
ID ENCODING_ATTR;
|
57
62
|
ID FILE_ATTR;
|
58
63
|
ID STRING_ATTR;
|
59
|
-
ID DOCUMENT_ATTR;
|
60
64
|
ID IO_ATTR;
|
61
65
|
|
62
66
|
static ID READ_METHOD;
|
@@ -204,9 +208,8 @@ static VALUE
|
|
204
208
|
rxml_input_file_set(VALUE self, VALUE FILE) {
|
205
209
|
Check_Type(FILE, T_STRING);
|
206
210
|
rb_ivar_set(self, FILE_ATTR, FILE);
|
207
|
-
rb_ivar_set(self, STRING_ATTR, Qnil);
|
208
|
-
rb_ivar_set(self, DOCUMENT_ATTR, Qnil);
|
209
211
|
rb_ivar_set(self, IO_ATTR, Qnil);
|
212
|
+
rb_ivar_set(self, STRING_ATTR, Qnil);
|
210
213
|
return self;
|
211
214
|
}
|
212
215
|
|
@@ -233,35 +236,8 @@ static VALUE
|
|
233
236
|
rxml_input_string_set(VALUE self, VALUE string) {
|
234
237
|
Check_Type(string, T_STRING);
|
235
238
|
rb_ivar_set(self, FILE_ATTR, Qnil);
|
236
|
-
rb_ivar_set(self, STRING_ATTR, string);
|
237
|
-
rb_ivar_set(self, DOCUMENT_ATTR, Qnil);
|
238
|
-
rb_ivar_set(self, IO_ATTR, Qnil);
|
239
|
-
return self;
|
240
|
-
}
|
241
|
-
|
242
|
-
/*
|
243
|
-
* call-seq:
|
244
|
-
* input.document -> "document"
|
245
|
-
*
|
246
|
-
* Obtain the document this parser will read from.
|
247
|
-
*/
|
248
|
-
static VALUE
|
249
|
-
rxml_input_document_get(VALUE self) {
|
250
|
-
return rb_ivar_get(self, DOCUMENT_ATTR);
|
251
|
-
}
|
252
|
-
|
253
|
-
/*
|
254
|
-
* call-seq:
|
255
|
-
* input.document = LibXML::XML::Document.new
|
256
|
-
*
|
257
|
-
* Set the document this parser will read from.
|
258
|
-
*/
|
259
|
-
static VALUE
|
260
|
-
rxml_input_document_set(VALUE self, VALUE document) {
|
261
|
-
rb_ivar_set(self, FILE_ATTR, Qnil);
|
262
|
-
rb_ivar_set(self, STRING_ATTR, Qnil);
|
263
|
-
rb_ivar_set(self, DOCUMENT_ATTR, document);
|
264
239
|
rb_ivar_set(self, IO_ATTR, Qnil);
|
240
|
+
rb_ivar_set(self, STRING_ATTR, string);
|
265
241
|
return self;
|
266
242
|
}
|
267
243
|
|
@@ -285,9 +261,8 @@ rxml_input_io_get(VALUE self) {
|
|
285
261
|
static VALUE
|
286
262
|
rxml_input_io_set(VALUE self, VALUE io) {
|
287
263
|
rb_ivar_set(self, FILE_ATTR, Qnil);
|
288
|
-
rb_ivar_set(self, STRING_ATTR, Qnil);
|
289
|
-
rb_ivar_set(self, DOCUMENT_ATTR, Qnil);
|
290
264
|
rb_ivar_set(self, IO_ATTR, io);
|
265
|
+
rb_ivar_set(self, STRING_ATTR, Qnil);
|
291
266
|
return self;
|
292
267
|
}
|
293
268
|
|
@@ -307,10 +282,12 @@ rxml_input_io_set(VALUE self, VALUE io) {
|
|
307
282
|
|
308
283
|
void
|
309
284
|
ruby_init_xml_input(void) {
|
310
|
-
|
311
|
-
|
312
|
-
|
285
|
+
BASE_URL_ATTR = rb_intern("@base_url");
|
286
|
+
ENCODING_ATTR = rb_intern("@encoding");
|
287
|
+
FILE_ATTR = rb_intern("@file");
|
313
288
|
IO_ATTR = rb_intern("@io");
|
289
|
+
STRING_ATTR = rb_intern("@string");
|
290
|
+
|
314
291
|
READ_METHOD = rb_intern("read");
|
315
292
|
|
316
293
|
cXMLInput = rb_define_class_under(mXML, "Input", rb_cObject);
|
@@ -342,14 +319,14 @@ ruby_init_xml_input(void) {
|
|
342
319
|
rb_define_const(cXMLInput, "EUC_JP", INT2NUM(XML_CHAR_ENCODING_EUC_JP)); /* EUC-JP */
|
343
320
|
rb_define_const(cXMLInput, "ASCII", INT2NUM(XML_CHAR_ENCODING_ASCII)); /* pure ASCII */
|
344
321
|
|
322
|
+
rb_define_attr(cXMLInput, "base_url", 1, 1);
|
345
323
|
rb_define_attr(cXMLInput, "encoding", 1, 1);
|
324
|
+
|
346
325
|
rb_define_method(cXMLInput, "initialize", rxml_input_initialize, 0);
|
347
326
|
rb_define_method(cXMLInput, "file", rxml_input_file_get, 0);
|
348
327
|
rb_define_method(cXMLInput, "file=", rxml_input_file_set, 1);
|
349
328
|
rb_define_method(cXMLInput, "string", rxml_input_string_get, 0);
|
350
329
|
rb_define_method(cXMLInput, "string=", rxml_input_string_set, 1);
|
351
|
-
rb_define_method(cXMLInput, "document", rxml_input_document_get, 0);
|
352
|
-
rb_define_method(cXMLInput, "document=", rxml_input_document_set, 1);
|
353
330
|
rb_define_method(cXMLInput, "io", rxml_input_io_get, 0);
|
354
331
|
rb_define_method(cXMLInput, "io=", rxml_input_io_set, 1);
|
355
332
|
}
|
data/ext/libxml/ruby_xml_input.h
CHANGED
@@ -7,11 +7,11 @@
|
|
7
7
|
|
8
8
|
extern VALUE cXMLInput;
|
9
9
|
|
10
|
+
extern ID BASE_URL_ATTR;
|
10
11
|
extern ID ENCODING_ATTR;
|
11
12
|
extern ID FILE_ATTR;
|
12
|
-
extern ID STRING_ATTR;
|
13
|
-
extern ID DOCUMENT_ATTR;
|
14
13
|
extern ID IO_ATTR;
|
14
|
+
extern ID STRING_ATTR;
|
15
15
|
|
16
16
|
void ruby_init_xml_input();
|
17
17
|
int rxml_read_callback(void *context, char *buffer, int len);
|
@@ -1,151 +1,151 @@
|
|
1
|
-
/* $Id: ruby_xml_parser.c
|
2
|
-
|
3
|
-
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
-
|
5
|
-
#include <stdarg.h>
|
6
|
-
#include "ruby_libxml.h"
|
7
|
-
|
8
|
-
VALUE cXMLParser;
|
9
|
-
static ID INPUT_ATTR;
|
10
|
-
static ID CONTEXT_ATTR;
|
11
|
-
|
12
|
-
/*
|
13
|
-
* Document-class: LibXML::XML::Parser
|
14
|
-
*
|
15
|
-
* The XML::Parser provides a tree based API for processing
|
16
|
-
* xml documents, in contract to XML::Reader's stream
|
17
|
-
* based api and XML::SaxParser callback based API.
|
18
|
-
*
|
19
|
-
* As a result, parsing a document creates an in-memory document object
|
20
|
-
* that consist of any number of XML::Node instances. This is simple
|
21
|
-
* and powerful model, but has the major limitation that the size of
|
22
|
-
* the document that can be processed is limited by the amount of
|
23
|
-
* memory available. In such cases, it is better to use the XML::Reader.
|
24
|
-
*
|
25
|
-
* Using the parser is simple:
|
26
|
-
*
|
27
|
-
* parser = XML::Parser.new
|
28
|
-
* parser.file = 'my_file'
|
29
|
-
* doc = parser.parse
|
30
|
-
*
|
31
|
-
* You can also parse strings (see XML::Parser.string) and io objects (see
|
32
|
-
* XML::Parser.io).
|
33
|
-
*/
|
34
|
-
|
35
|
-
/*
|
36
|
-
* call-seq:
|
37
|
-
* parser.initialize -> parser
|
38
|
-
*
|
39
|
-
* Initiliazes instance of parser.
|
40
|
-
*/
|
41
|
-
static VALUE
|
42
|
-
rxml_parser_initialize(VALUE self) {
|
43
|
-
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
44
|
-
rb_iv_set(self, "@input", input);
|
45
|
-
rb_iv_set(self, "@context", Qnil);
|
46
|
-
return self;
|
47
|
-
}
|
48
|
-
|
49
|
-
static xmlParserCtxtPtr
|
50
|
-
rxml_parser_filename_ctxt(VALUE input) {
|
51
|
-
xmlParserCtxtPtr ctxt;
|
52
|
-
int retry_count = 0;
|
53
|
-
VALUE filename = rb_ivar_get(input, FILE_ATTR);
|
54
|
-
|
55
|
-
retry:
|
56
|
-
ctxt = xmlCreateFileParserCtxt(StringValuePtr(filename));
|
57
|
-
if (ctxt == NULL) {
|
58
|
-
if ((errno == EMFILE || errno == ENFILE) && retry_count == 0) {
|
59
|
-
retry_count++;
|
60
|
-
rb_gc();
|
61
|
-
goto retry;
|
62
|
-
} else {
|
63
|
-
rb_raise(rb_eIOError, StringValuePtr(filename));
|
64
|
-
}
|
65
|
-
}
|
66
|
-
|
67
|
-
return ctxt;
|
68
|
-
}
|
69
|
-
|
70
|
-
static xmlParserCtxtPtr
|
71
|
-
rxml_parser_str_ctxt(VALUE input) {
|
72
|
-
VALUE str = rb_ivar_get(input, STRING_ATTR);
|
73
|
-
return xmlCreateMemoryParserCtxt(StringValuePtr(str), RSTRING_LEN(str));
|
74
|
-
}
|
75
|
-
|
76
|
-
static xmlParserCtxtPtr
|
77
|
-
rxml_parser_io_ctxt(VALUE input) {
|
78
|
-
VALUE io = rb_ivar_get(input, IO_ATTR);
|
79
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
80
|
-
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
81
|
-
|
82
|
-
return xmlCreateIOParserCtxt(NULL, NULL,
|
83
|
-
(xmlInputReadCallback) rxml_read_callback,
|
84
|
-
NULL, io, xmlEncoding);
|
85
|
-
}
|
86
|
-
|
87
|
-
|
88
|
-
/*
|
89
|
-
* call-seq:
|
90
|
-
* parser.parse -> document
|
91
|
-
*
|
92
|
-
* Parse the input XML and create an XML::Document with
|
93
|
-
* it's content. If an error occurs, XML::Parser::ParseError
|
94
|
-
* is thrown.
|
95
|
-
*/
|
96
|
-
static VALUE
|
97
|
-
rxml_parser_parse(VALUE self) {
|
98
|
-
xmlParserCtxtPtr ctxt;
|
99
|
-
VALUE context;
|
100
|
-
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
101
|
-
|
102
|
-
context = rb_ivar_get(self, CONTEXT_ATTR);
|
103
|
-
if (context != Qnil)
|
104
|
-
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
105
|
-
|
106
|
-
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
107
|
-
ctxt = rxml_parser_filename_ctxt(input);
|
108
|
-
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
109
|
-
ctxt = rxml_parser_str_ctxt(input);
|
110
|
-
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
111
|
-
ctxt = rxml_parser_parse_document(input);*/
|
112
|
-
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
113
|
-
ctxt = rxml_parser_io_ctxt(input);
|
114
|
-
else
|
115
|
-
rb_raise(rb_eArgError, "You must specify a parser data source");
|
116
|
-
|
117
|
-
if (!ctxt)
|
118
|
-
rxml_raise(&xmlLastError);
|
119
|
-
|
120
|
-
context = rxml_parser_context_wrap(ctxt);
|
121
|
-
rb_ivar_set(self, CONTEXT_ATTR, context);
|
122
|
-
|
123
|
-
if (xmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
124
|
-
xmlFreeDoc(ctxt->myDoc);
|
125
|
-
rxml_raise(&ctxt->lastError);
|
126
|
-
}
|
127
|
-
|
128
|
-
return rxml_document_wrap(ctxt->myDoc);
|
129
|
-
}
|
130
|
-
|
131
|
-
|
132
|
-
// Rdoc needs to know
|
133
|
-
#ifdef RDOC_NEVER_DEFINED
|
134
|
-
mLibXML = rb_define_module("LibXML");
|
135
|
-
mXML = rb_define_module_under(mLibXML, "XML");
|
136
|
-
#endif
|
137
|
-
|
138
|
-
void
|
139
|
-
ruby_init_parser(void) {
|
140
|
-
cXMLParser = rb_define_class_under(mXML, "Parser", rb_cObject);
|
141
|
-
|
142
|
-
/* Atributes */
|
143
|
-
INPUT_ATTR = rb_intern("@input");
|
144
|
-
CONTEXT_ATTR = rb_intern("@context");
|
145
|
-
rb_define_attr(cXMLParser, "input", 1, 0);
|
146
|
-
rb_define_attr(cXMLParser, "context", 1, 0);
|
147
|
-
|
148
|
-
/* Instance Methods */
|
149
|
-
rb_define_method(cXMLParser, "initialize", rxml_parser_initialize, 0);
|
150
|
-
rb_define_method(cXMLParser, "parse", rxml_parser_parse, 0);
|
151
|
-
}
|
1
|
+
/* $Id: ruby_xml_parser.c 630 2008-11-24 06:53:01Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include <stdarg.h>
|
6
|
+
#include "ruby_libxml.h"
|
7
|
+
|
8
|
+
VALUE cXMLParser;
|
9
|
+
static ID INPUT_ATTR;
|
10
|
+
static ID CONTEXT_ATTR;
|
11
|
+
|
12
|
+
/*
|
13
|
+
* Document-class: LibXML::XML::Parser
|
14
|
+
*
|
15
|
+
* The XML::Parser provides a tree based API for processing
|
16
|
+
* xml documents, in contract to XML::Reader's stream
|
17
|
+
* based api and XML::SaxParser callback based API.
|
18
|
+
*
|
19
|
+
* As a result, parsing a document creates an in-memory document object
|
20
|
+
* that consist of any number of XML::Node instances. This is simple
|
21
|
+
* and powerful model, but has the major limitation that the size of
|
22
|
+
* the document that can be processed is limited by the amount of
|
23
|
+
* memory available. In such cases, it is better to use the XML::Reader.
|
24
|
+
*
|
25
|
+
* Using the parser is simple:
|
26
|
+
*
|
27
|
+
* parser = XML::Parser.new
|
28
|
+
* parser.file = 'my_file'
|
29
|
+
* doc = parser.parse
|
30
|
+
*
|
31
|
+
* You can also parse strings (see XML::Parser.string) and io objects (see
|
32
|
+
* XML::Parser.io).
|
33
|
+
*/
|
34
|
+
|
35
|
+
/*
|
36
|
+
* call-seq:
|
37
|
+
* parser.initialize -> parser
|
38
|
+
*
|
39
|
+
* Initiliazes instance of parser.
|
40
|
+
*/
|
41
|
+
static VALUE
|
42
|
+
rxml_parser_initialize(VALUE self) {
|
43
|
+
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
44
|
+
rb_iv_set(self, "@input", input);
|
45
|
+
rb_iv_set(self, "@context", Qnil);
|
46
|
+
return self;
|
47
|
+
}
|
48
|
+
|
49
|
+
static xmlParserCtxtPtr
|
50
|
+
rxml_parser_filename_ctxt(VALUE input) {
|
51
|
+
xmlParserCtxtPtr ctxt;
|
52
|
+
int retry_count = 0;
|
53
|
+
VALUE filename = rb_ivar_get(input, FILE_ATTR);
|
54
|
+
|
55
|
+
retry:
|
56
|
+
ctxt = xmlCreateFileParserCtxt(StringValuePtr(filename));
|
57
|
+
if (ctxt == NULL) {
|
58
|
+
if ((errno == EMFILE || errno == ENFILE) && retry_count == 0) {
|
59
|
+
retry_count++;
|
60
|
+
rb_gc();
|
61
|
+
goto retry;
|
62
|
+
} else {
|
63
|
+
rb_raise(rb_eIOError, StringValuePtr(filename));
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
return ctxt;
|
68
|
+
}
|
69
|
+
|
70
|
+
static xmlParserCtxtPtr
|
71
|
+
rxml_parser_str_ctxt(VALUE input) {
|
72
|
+
VALUE str = rb_ivar_get(input, STRING_ATTR);
|
73
|
+
return xmlCreateMemoryParserCtxt(StringValuePtr(str), RSTRING_LEN(str));
|
74
|
+
}
|
75
|
+
|
76
|
+
static xmlParserCtxtPtr
|
77
|
+
rxml_parser_io_ctxt(VALUE input) {
|
78
|
+
VALUE io = rb_ivar_get(input, IO_ATTR);
|
79
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
80
|
+
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
81
|
+
|
82
|
+
return xmlCreateIOParserCtxt(NULL, NULL,
|
83
|
+
(xmlInputReadCallback) rxml_read_callback,
|
84
|
+
NULL, (void *)io, xmlEncoding);
|
85
|
+
}
|
86
|
+
|
87
|
+
|
88
|
+
/*
|
89
|
+
* call-seq:
|
90
|
+
* parser.parse -> document
|
91
|
+
*
|
92
|
+
* Parse the input XML and create an XML::Document with
|
93
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
94
|
+
* is thrown.
|
95
|
+
*/
|
96
|
+
static VALUE
|
97
|
+
rxml_parser_parse(VALUE self) {
|
98
|
+
xmlParserCtxtPtr ctxt;
|
99
|
+
VALUE context;
|
100
|
+
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
101
|
+
|
102
|
+
context = rb_ivar_get(self, CONTEXT_ATTR);
|
103
|
+
if (context != Qnil)
|
104
|
+
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
105
|
+
|
106
|
+
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
107
|
+
ctxt = rxml_parser_filename_ctxt(input);
|
108
|
+
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
109
|
+
ctxt = rxml_parser_str_ctxt(input);
|
110
|
+
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
111
|
+
ctxt = rxml_parser_parse_document(input);*/
|
112
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
113
|
+
ctxt = rxml_parser_io_ctxt(input);
|
114
|
+
else
|
115
|
+
rb_raise(rb_eArgError, "You must specify a parser data source");
|
116
|
+
|
117
|
+
if (!ctxt)
|
118
|
+
rxml_raise(&xmlLastError);
|
119
|
+
|
120
|
+
context = rxml_parser_context_wrap(ctxt);
|
121
|
+
rb_ivar_set(self, CONTEXT_ATTR, context);
|
122
|
+
|
123
|
+
if (xmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
124
|
+
xmlFreeDoc(ctxt->myDoc);
|
125
|
+
rxml_raise(&ctxt->lastError);
|
126
|
+
}
|
127
|
+
|
128
|
+
return rxml_document_wrap(ctxt->myDoc);
|
129
|
+
}
|
130
|
+
|
131
|
+
|
132
|
+
// Rdoc needs to know
|
133
|
+
#ifdef RDOC_NEVER_DEFINED
|
134
|
+
mLibXML = rb_define_module("LibXML");
|
135
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
136
|
+
#endif
|
137
|
+
|
138
|
+
void
|
139
|
+
ruby_init_parser(void) {
|
140
|
+
cXMLParser = rb_define_class_under(mXML, "Parser", rb_cObject);
|
141
|
+
|
142
|
+
/* Atributes */
|
143
|
+
INPUT_ATTR = rb_intern("@input");
|
144
|
+
CONTEXT_ATTR = rb_intern("@context");
|
145
|
+
rb_define_attr(cXMLParser, "input", 1, 0);
|
146
|
+
rb_define_attr(cXMLParser, "context", 1, 0);
|
147
|
+
|
148
|
+
/* Instance Methods */
|
149
|
+
rb_define_method(cXMLParser, "initialize", rxml_parser_initialize, 0);
|
150
|
+
rb_define_method(cXMLParser, "parse", rxml_parser_parse, 0);
|
151
|
+
}
|