libxml-ruby 0.6.0-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +122 -0
- data/LICENSE +23 -0
- data/README +144 -0
- data/ext/libxml/cbg.c +76 -0
- data/ext/libxml/extconf.rb +308 -0
- data/ext/libxml/libxml.c +62 -0
- data/ext/libxml/ruby_libxml.h +93 -0
- data/ext/libxml/ruby_xml_attr.c +405 -0
- data/ext/libxml/ruby_xml_attr.h +19 -0
- data/ext/libxml/ruby_xml_document.c +1111 -0
- data/ext/libxml/ruby_xml_document.h +27 -0
- data/ext/libxml/ruby_xml_dtd.c +168 -0
- data/ext/libxml/ruby_xml_dtd.h +17 -0
- data/ext/libxml/ruby_xml_html_parser.c +449 -0
- data/ext/libxml/ruby_xml_html_parser.h +29 -0
- data/ext/libxml/ruby_xml_input_cbg.c +158 -0
- data/ext/libxml/ruby_xml_input_cbg.h +20 -0
- data/ext/libxml/ruby_xml_node.c +2410 -0
- data/ext/libxml/ruby_xml_node.h +27 -0
- data/ext/libxml/ruby_xml_node_set.c +170 -0
- data/ext/libxml/ruby_xml_node_set.h +20 -0
- data/ext/libxml/ruby_xml_ns.c +153 -0
- data/ext/libxml/ruby_xml_ns.h +21 -0
- data/ext/libxml/ruby_xml_parser.c +1425 -0
- data/ext/libxml/ruby_xml_parser.h +31 -0
- data/ext/libxml/ruby_xml_parser_context.c +750 -0
- data/ext/libxml/ruby_xml_parser_context.h +22 -0
- data/ext/libxml/ruby_xml_reader.c +900 -0
- data/ext/libxml/ruby_xml_reader.h +14 -0
- data/ext/libxml/ruby_xml_sax_parser.c +485 -0
- data/ext/libxml/ruby_xml_sax_parser.h +56 -0
- data/ext/libxml/ruby_xml_schema.c +146 -0
- data/ext/libxml/ruby_xml_schema.h +16 -0
- data/ext/libxml/ruby_xml_state.c +113 -0
- data/ext/libxml/ruby_xml_state.h +12 -0
- data/ext/libxml/ruby_xml_tree.c +43 -0
- data/ext/libxml/ruby_xml_tree.h +12 -0
- data/ext/libxml/ruby_xml_xinclude.c +20 -0
- data/ext/libxml/ruby_xml_xinclude.h +13 -0
- data/ext/libxml/ruby_xml_xpath.c +243 -0
- data/ext/libxml/ruby_xml_xpath.h +23 -0
- data/ext/libxml/ruby_xml_xpath_context.c +118 -0
- data/ext/libxml/ruby_xml_xpath_context.h +20 -0
- data/ext/libxml/ruby_xml_xpath_object.c +293 -0
- data/ext/libxml/ruby_xml_xpath_object.h +28 -0
- data/ext/libxml/ruby_xml_xpointer.c +100 -0
- data/ext/libxml/ruby_xml_xpointer.h +27 -0
- data/ext/libxml/ruby_xml_xpointer_context.c +21 -0
- data/ext/libxml/ruby_xml_xpointer_context.h +18 -0
- data/ext/libxml/sax_parser_callbacks.inc +213 -0
- data/ext/libxml/version.h +9 -0
- data/lib/libxml.rb +125 -0
- data/lib/libxml_ruby.so +0 -0
- data/lib/xml/libxml.rb +5 -0
- data/mingw/libiconv-2.dll +0 -0
- data/mingw/libxml2-2.dll +0 -0
- data/mingw/libxml_ruby.so +0 -0
- data/mingw/mingw.rake +36 -0
- data/test/dtd-test.rb +24 -0
- data/test/etc_doc_to_s.rb +19 -0
- data/test/ets_copy_bug.rb +21 -0
- data/test/ets_copy_bug2.rb +32 -0
- data/test/ets_copy_bug3.rb +38 -0
- data/test/ets_doc_file.rb +15 -0
- data/test/ets_doc_to_s.rb +21 -0
- data/test/ets_gpx.rb +26 -0
- data/test/ets_node_gc.rb +21 -0
- data/test/ets_test.xml +2 -0
- data/test/ets_tsr.rb +9 -0
- data/test/gc.log +0 -0
- data/test/merge_bug.rb +55 -0
- data/test/schema-test.rb +74 -0
- data/test/tc_well_formed.rb +11 -0
- data/test/tc_xml_document.rb +52 -0
- data/test/tc_xml_document_write.rb +24 -0
- data/test/tc_xml_document_write2.rb +54 -0
- data/test/tc_xml_document_write3.rb +96 -0
- data/test/tc_xml_html_parser.rb +63 -0
- data/test/tc_xml_node.rb +59 -0
- data/test/tc_xml_node2.rb +25 -0
- data/test/tc_xml_node3.rb +27 -0
- data/test/tc_xml_node4.rb +86 -0
- data/test/tc_xml_node5.rb +52 -0
- data/test/tc_xml_node6.rb +27 -0
- data/test/tc_xml_node7.rb +35 -0
- data/test/tc_xml_node8.rb +32 -0
- data/test/tc_xml_node9.rb +32 -0
- data/test/tc_xml_node_copy.rb +40 -0
- data/test/tc_xml_node_set.rb +24 -0
- data/test/tc_xml_node_set2.rb +37 -0
- data/test/tc_xml_node_text.rb +17 -0
- data/test/tc_xml_node_xlink.rb +28 -0
- data/test/tc_xml_parser.rb +190 -0
- data/test/tc_xml_parser2.rb +16 -0
- data/test/tc_xml_parser3.rb +23 -0
- data/test/tc_xml_parser4.rb +33 -0
- data/test/tc_xml_parser5.rb +27 -0
- data/test/tc_xml_parser6.rb +23 -0
- data/test/tc_xml_parser7.rb +28 -0
- data/test/tc_xml_parser8.rb +32 -0
- data/test/tc_xml_parser9.rb +11 -0
- data/test/tc_xml_parser_context.rb +88 -0
- data/test/tc_xml_reader.rb +112 -0
- data/test/tc_xml_sax_parser.rb +104 -0
- data/test/tc_xml_sax_parser2.rb +51 -0
- data/test/tc_xml_xinclude.rb +30 -0
- data/test/tc_xml_xpath.rb +38 -0
- data/test/tc_xml_xpath2.rb +14 -0
- data/test/tc_xml_xpointer.rb +78 -0
- data/vc/libxml.sln +20 -0
- data/vc/libxml.vcproj +389 -0
- data/work/Rakefile +247 -0
- data/work/task/make +26 -0
- data/work/task/memory +37 -0
- data/work/task/rdoc +39 -0
- data/work/task/setup +1616 -0
- data/work/task/test +29 -0
- data/work/test/ets_runner.rb +33 -0
- data/work/test/libxml_test.rb +3 -0
- data/work/test/runner.rb +0 -0
- data/work/test/runner_ets.rb +33 -0
- data/work/vc/debug/libxml.exp +0 -0
- data/work/vc/debug/libxml.ilk +0 -0
- data/work/vc/debug/libxml.lib +0 -0
- data/work/vc/debug/libxml.pdb +0 -0
- data/work/vc/debug/libxml.so +0 -0
- metadata +224 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
/* $Id: ruby_xml_document.h 304 2008-07-02 17:42:41Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#ifndef __RUBY_XML_DOCUMENT__
|
6
|
+
#define __RUBY_XML_DOCUMENT__
|
7
|
+
|
8
|
+
extern VALUE cXMLDocument;
|
9
|
+
|
10
|
+
typedef struct rxp_document {
|
11
|
+
xmlDocPtr doc; /* Tree/DOM interface */
|
12
|
+
int data_type; /* The data type referenced by *data */
|
13
|
+
void *data; /* Pointer to an external structure of options */
|
14
|
+
} ruby_xml_document_t;
|
15
|
+
|
16
|
+
VALUE ruby_xml_document_filename_get(VALUE self);
|
17
|
+
VALUE ruby_xml_document_new_native(VALUE class, VALUE xmlver);
|
18
|
+
void ruby_xml_document_free(ruby_xml_document_t *rxd);
|
19
|
+
VALUE ruby_xml_document_root_get(VALUE self);
|
20
|
+
void ruby_init_xml_document(void);
|
21
|
+
|
22
|
+
#if defined(_WIN32)
|
23
|
+
__declspec(dllexport)
|
24
|
+
#endif
|
25
|
+
VALUE ruby_xml_document_wrap(xmlDocPtr xnode);
|
26
|
+
|
27
|
+
#endif
|
@@ -0,0 +1,168 @@
|
|
1
|
+
#include "ruby_libxml.h"
|
2
|
+
#include "ruby_xml_dtd.h"
|
3
|
+
|
4
|
+
VALUE cXMLDtd;
|
5
|
+
|
6
|
+
void
|
7
|
+
ruby_xml_dtd_free(ruby_xml_dtd *rxdtd) {
|
8
|
+
if (rxdtd->dtd != NULL) {
|
9
|
+
xmlFreeDtd(rxdtd->dtd);
|
10
|
+
rxdtd->dtd = NULL;
|
11
|
+
}
|
12
|
+
|
13
|
+
ruby_xfree(rxdtd);
|
14
|
+
}
|
15
|
+
|
16
|
+
static void
|
17
|
+
ruby_xml_dtd_mark(ruby_xml_dtd *rxdtd) {
|
18
|
+
return;
|
19
|
+
//if (rxdtd == NULL) return;
|
20
|
+
//if (!NIL_P(rxd->xmlver)) rb_gc_mark(rxd->xmlver);
|
21
|
+
}
|
22
|
+
|
23
|
+
/*
|
24
|
+
* call-seq:
|
25
|
+
* XML::Dtd.new("public system") => dtd
|
26
|
+
* XML::Dtd.new("public", "system") => dtd
|
27
|
+
*
|
28
|
+
* Create a new Dtd from the specified public and system
|
29
|
+
* identifiers.
|
30
|
+
*/
|
31
|
+
VALUE
|
32
|
+
ruby_xml_dtd_initialize(int argc, VALUE *argv, VALUE class) {
|
33
|
+
ruby_xml_dtd *rxdtd;
|
34
|
+
VALUE external, system, dtd_string;
|
35
|
+
xmlParserInputBufferPtr buffer;
|
36
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
37
|
+
xmlChar *new_string;
|
38
|
+
|
39
|
+
// 1 argument -- string --> parsujeme jako dtd
|
40
|
+
// 2 argumenty -- public, system --> bude se hledat
|
41
|
+
switch (argc) {
|
42
|
+
case 2:
|
43
|
+
rb_scan_args(argc, argv, "20", &external, &system);
|
44
|
+
|
45
|
+
Check_Type(external, T_STRING);
|
46
|
+
Check_Type(system, T_STRING);
|
47
|
+
rxdtd = ALLOC(ruby_xml_dtd);
|
48
|
+
rxdtd->dtd = xmlParseDTD( (xmlChar*)StringValuePtr(external),
|
49
|
+
(xmlChar*)StringValuePtr(system) );
|
50
|
+
if (rxdtd->dtd == NULL) {
|
51
|
+
ruby_xfree(rxdtd);
|
52
|
+
return(Qfalse);
|
53
|
+
}
|
54
|
+
|
55
|
+
xmlSetTreeDoc( (xmlNodePtr)rxdtd->dtd, NULL );
|
56
|
+
return( Data_Wrap_Struct(cXMLDtd, ruby_xml_dtd_mark, ruby_xml_dtd_free, rxdtd) );
|
57
|
+
break;
|
58
|
+
|
59
|
+
/*
|
60
|
+
SV *
|
61
|
+
new(CLASS, external, system)
|
62
|
+
char * CLASS
|
63
|
+
char * external
|
64
|
+
char * system
|
65
|
+
ALIAS:
|
66
|
+
parse_uri = 1
|
67
|
+
PREINIT:
|
68
|
+
xmlDtdPtr dtd = NULL;
|
69
|
+
CODE:
|
70
|
+
LibXML_error = sv_2mortal(newSVpv("", 0));
|
71
|
+
dtd = xmlParseDTD((const xmlChar*)external, (const xmlChar*)system);
|
72
|
+
if ( dtd == NULL ) {
|
73
|
+
XSRETURN_UNDEF;
|
74
|
+
}
|
75
|
+
xmlSetTreeDoc((xmlNodePtr)dtd, NULL);
|
76
|
+
RETVAL = PmmNodeToSv( (xmlNodePtr) dtd, NULL );
|
77
|
+
OUTPUT:
|
78
|
+
RETVAL
|
79
|
+
*/
|
80
|
+
|
81
|
+
case 1:
|
82
|
+
|
83
|
+
rb_scan_args(argc, argv, "10", &dtd_string);
|
84
|
+
buffer = xmlAllocParserInputBuffer(enc);
|
85
|
+
//if ( !buffer) return Qnil
|
86
|
+
new_string = xmlStrdup((xmlChar*)StringValuePtr(dtd_string));
|
87
|
+
xmlParserInputBufferPush(buffer, xmlStrlen(new_string), (const char*)new_string);
|
88
|
+
|
89
|
+
rxdtd = ALLOC(ruby_xml_dtd);
|
90
|
+
rxdtd->dtd = xmlIOParseDTD(NULL, buffer, enc);
|
91
|
+
|
92
|
+
// NOTE: For some reason freeing this InputBuffer causes a segfault!
|
93
|
+
// xmlFreeParserInputBuffer(buffer);
|
94
|
+
xmlFree(new_string);
|
95
|
+
|
96
|
+
return( Data_Wrap_Struct(cXMLDtd, ruby_xml_dtd_mark, ruby_xml_dtd_free, rxdtd) );
|
97
|
+
|
98
|
+
break;
|
99
|
+
/*
|
100
|
+
SV * parse_string(CLASS, str, ...)
|
101
|
+
char * CLASS
|
102
|
+
char * str
|
103
|
+
PREINIT:
|
104
|
+
STRLEN n_a;
|
105
|
+
xmlDtdPtr res;
|
106
|
+
SV * encoding_sv;
|
107
|
+
xmlParserInputBufferPtr buffer;
|
108
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
109
|
+
xmlChar * new_string;
|
110
|
+
STRLEN len;
|
111
|
+
CODE:
|
112
|
+
LibXML_init_error();
|
113
|
+
if (items > 2) {
|
114
|
+
encoding_sv = ST(2);
|
115
|
+
if (items > 3) {
|
116
|
+
croak("parse_string: too many parameters");
|
117
|
+
}
|
118
|
+
// warn("getting encoding...\n");
|
119
|
+
enc = xmlParseCharEncoding(SvPV(encoding_sv, n_a));
|
120
|
+
if (enc == XML_CHAR_ENCODING_ERROR) {
|
121
|
+
croak("Parse of encoding %s failed: %s", SvPV(encoding_sv, n_a), SvPV(LibXML_error, n_a));
|
122
|
+
}
|
123
|
+
}
|
124
|
+
buffer = xmlAllocParserInputBuffer(enc);
|
125
|
+
// buffer = xmlParserInputBufferCreateMem(str, xmlStrlen(str), enc);
|
126
|
+
if ( !buffer)
|
127
|
+
croak("cant create buffer!\n" );
|
128
|
+
|
129
|
+
new_string = xmlStrdup((const xmlChar*)str);
|
130
|
+
xmlParserInputBufferPush(buffer, xmlStrlen(new_string), (const char*)new_string);
|
131
|
+
|
132
|
+
res = xmlIOParseDTD(NULL, buffer, enc);
|
133
|
+
|
134
|
+
// NOTE: For some reason freeing this InputBuffer causes a segfault!
|
135
|
+
// xmlFreeParserInputBuffer(buffer);
|
136
|
+
xmlFree(new_string);
|
137
|
+
|
138
|
+
sv_2mortal( LibXML_error );
|
139
|
+
LibXML_croak_error();
|
140
|
+
|
141
|
+
if (res == NULL) {
|
142
|
+
croak("no DTD parsed!");
|
143
|
+
}
|
144
|
+
RETVAL = PmmNodeToSv((xmlNodePtr)res, NULL);
|
145
|
+
OUTPUT:
|
146
|
+
RETVAL
|
147
|
+
*/
|
148
|
+
|
149
|
+
default:
|
150
|
+
rb_raise(rb_eArgError, "wrong number of arguments (need 1 or 2)");
|
151
|
+
}
|
152
|
+
|
153
|
+
//docobj = ruby_xml_document_new2(cXMLDocument, xmlver);
|
154
|
+
return Qnil;
|
155
|
+
}
|
156
|
+
|
157
|
+
// Rdoc needs to know
|
158
|
+
#ifdef RDOC_NEVER_DEFINED
|
159
|
+
mXML = rb_define_module("XML");
|
160
|
+
#endif
|
161
|
+
|
162
|
+
void
|
163
|
+
ruby_init_xml_dtd(void) {
|
164
|
+
cXMLDtd = rb_define_class_under(mXML, "Dtd", rb_cObject);
|
165
|
+
rb_define_singleton_method(cXMLDtd, "new", ruby_xml_dtd_initialize, -1);
|
166
|
+
//rb_define_method(cXMLDocument, "xinclude", ruby_xml_document_xinclude, 0);
|
167
|
+
}
|
168
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#ifndef __RUBY_XML_DTD__
|
2
|
+
#define __RUBY_XML_DTD__
|
3
|
+
|
4
|
+
extern VALUE cXMLDtd;
|
5
|
+
|
6
|
+
typedef struct rxp_dtd {
|
7
|
+
xmlDtdPtr dtd; /* DTD interface */
|
8
|
+
//int data_type; /* The data type referenced by *data */
|
9
|
+
//void *data; /* Pointer to an external structure of options */
|
10
|
+
//int is_ptr; /* Determines if this object owns its data or points to it someplace else */
|
11
|
+
//VALUE xmlver; /* T_STRING with the xml version */
|
12
|
+
} ruby_xml_dtd;
|
13
|
+
|
14
|
+
void ruby_init_xml_dtd(void);
|
15
|
+
void ruby_dtd_free(ruby_xml_dtd *rxdtd);
|
16
|
+
|
17
|
+
#endif
|
@@ -0,0 +1,449 @@
|
|
1
|
+
/* $Id: ruby_xml_html_parser.c 300 2008-07-01 19:14:15Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include "ruby_libxml.h"
|
6
|
+
|
7
|
+
VALUE cXMLHTMLParser;
|
8
|
+
|
9
|
+
//static int
|
10
|
+
//ctxtRead(FILE *f, char * buf, int len) {
|
11
|
+
// return(fread(buf, 1, len, f));
|
12
|
+
//}
|
13
|
+
|
14
|
+
|
15
|
+
/*
|
16
|
+
* call-seq:
|
17
|
+
* parser.filename => "filename"
|
18
|
+
*
|
19
|
+
* Obtain the filename this parser will read from.
|
20
|
+
*/
|
21
|
+
/*
|
22
|
+
VALUE
|
23
|
+
ruby_xml_html_parser_filename_get(VALUE self) {
|
24
|
+
ruby_xml_html_parser *rxp;
|
25
|
+
rx_file_data *data;
|
26
|
+
|
27
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
28
|
+
if (rxp->data == NULL)
|
29
|
+
return(Qnil);
|
30
|
+
|
31
|
+
if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
|
32
|
+
return(Qnil);
|
33
|
+
|
34
|
+
data = (rx_file_data *)rxp->data;
|
35
|
+
return(data->filename);
|
36
|
+
}
|
37
|
+
*/
|
38
|
+
|
39
|
+
/*
|
40
|
+
* call-seq:
|
41
|
+
* parser.filename = "filename"
|
42
|
+
*
|
43
|
+
* Set the filename this parser will read from.
|
44
|
+
*/
|
45
|
+
/*
|
46
|
+
VALUE
|
47
|
+
ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
|
48
|
+
ruby_xml_html_parser *rxp;
|
49
|
+
ruby_xml_parser_context *rxpc;
|
50
|
+
rx_file_data *data;
|
51
|
+
|
52
|
+
Check_Type(filename, T_STRING);
|
53
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
54
|
+
|
55
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
56
|
+
if (rxp->data != NULL)
|
57
|
+
rb_fatal("crap, this should be null");
|
58
|
+
|
59
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
60
|
+
data = ALLOC(rx_file_data);
|
61
|
+
rxp->data = data;
|
62
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
|
63
|
+
return(Qnil);
|
64
|
+
}
|
65
|
+
|
66
|
+
rxp->ctxt = ruby_xml_parser_context_new3();
|
67
|
+
data = (rx_file_data *)rxp->data;
|
68
|
+
data->filename = filename;
|
69
|
+
|
70
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
71
|
+
rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
|
72
|
+
if (rxpc->ctxt == NULL)
|
73
|
+
rb_sys_fail(StringValuePtr(filename));
|
74
|
+
|
75
|
+
return(data->filename);
|
76
|
+
}
|
77
|
+
*/
|
78
|
+
|
79
|
+
void
|
80
|
+
ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
|
81
|
+
void *data;
|
82
|
+
|
83
|
+
switch(rxp->data_type) {
|
84
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
85
|
+
break;
|
86
|
+
case RUBY_LIBXML_SRC_TYPE_FILE:
|
87
|
+
data = (void *)(rx_file_data *)rxp->data;
|
88
|
+
ruby_xfree((rx_file_data *)data);
|
89
|
+
break;
|
90
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
91
|
+
data = (void *)(rx_string_data *)rxp->data;
|
92
|
+
ruby_xfree((rx_string_data *)data);
|
93
|
+
break;
|
94
|
+
case RUBY_LIBXML_SRC_TYPE_IO:
|
95
|
+
data = (void *)(rx_io_data *)rxp->data;
|
96
|
+
ruby_xfree((rx_io_data *)data);
|
97
|
+
break;
|
98
|
+
default:
|
99
|
+
rb_fatal("Unknown data type, %d", rxp->data_type);
|
100
|
+
}
|
101
|
+
|
102
|
+
ruby_xfree(rxp);
|
103
|
+
}
|
104
|
+
|
105
|
+
|
106
|
+
/*
|
107
|
+
* call-seq:
|
108
|
+
* parser.io => IO
|
109
|
+
*
|
110
|
+
* Obtain the IO instance this parser works with.
|
111
|
+
*/
|
112
|
+
/*
|
113
|
+
VALUE
|
114
|
+
ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
|
115
|
+
ruby_xml_html_parser *rxp;
|
116
|
+
rx_io_data *data;
|
117
|
+
|
118
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
119
|
+
|
120
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
|
121
|
+
rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
|
122
|
+
rxp->data == NULL)
|
123
|
+
return(Qnil);
|
124
|
+
|
125
|
+
data = (rx_io_data *)rxp->data;
|
126
|
+
|
127
|
+
return(data->io);
|
128
|
+
}
|
129
|
+
*/
|
130
|
+
|
131
|
+
/*
|
132
|
+
* call-seq:
|
133
|
+
* parser.io = IO
|
134
|
+
*
|
135
|
+
* Set the IO instance this parser works with.
|
136
|
+
*/
|
137
|
+
/*
|
138
|
+
VALUE
|
139
|
+
ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
|
140
|
+
ruby_xml_html_parser *rxp;
|
141
|
+
ruby_xml_parser_context *rxpc;
|
142
|
+
rx_io_data *data;
|
143
|
+
OpenFile *fptr;
|
144
|
+
FILE *f;
|
145
|
+
|
146
|
+
if (!rb_obj_is_kind_of(io, rb_cIO))
|
147
|
+
rb_raise(rb_eTypeError, "need an IO object");
|
148
|
+
|
149
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
150
|
+
|
151
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
152
|
+
if (rxp->data != NULL)
|
153
|
+
rb_fatal("crap, this should be null");
|
154
|
+
|
155
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
156
|
+
data = ALLOC(rx_io_data);
|
157
|
+
rxp->data = data;
|
158
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
|
159
|
+
return(Qnil);
|
160
|
+
}
|
161
|
+
|
162
|
+
rxp->ctxt = ruby_xml_parser_context_new3();
|
163
|
+
data = (rx_io_data *)rxp->data;
|
164
|
+
data->io = io;
|
165
|
+
|
166
|
+
GetOpenFile(io, fptr);
|
167
|
+
rb_io_check_readable(fptr);
|
168
|
+
f = GetWriteFile(fptr);
|
169
|
+
|
170
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
171
|
+
rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
|
172
|
+
(xmlInputReadCallback) ctxtRead,
|
173
|
+
NULL, f, XML_CHAR_ENCODING_NONE);
|
174
|
+
if (NIL_P(rxpc->ctxt))
|
175
|
+
rb_sys_fail(0);
|
176
|
+
|
177
|
+
return(data->io);
|
178
|
+
}
|
179
|
+
*/
|
180
|
+
|
181
|
+
void
|
182
|
+
ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
|
183
|
+
if (rxp == NULL) return;
|
184
|
+
if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
|
185
|
+
|
186
|
+
ruby_xml_state_marker();
|
187
|
+
|
188
|
+
switch(rxp->data_type) {
|
189
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
190
|
+
break;
|
191
|
+
case RUBY_LIBXML_SRC_TYPE_FILE:
|
192
|
+
if (!NIL_P(((rx_file_data *)rxp->data)->filename))
|
193
|
+
rb_gc_mark(((rx_file_data *)rxp->data)->filename);
|
194
|
+
break;
|
195
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
196
|
+
if (!NIL_P(((rx_string_data *)rxp->data)->str))
|
197
|
+
rb_gc_mark(((rx_string_data *)rxp->data)->str);
|
198
|
+
break;
|
199
|
+
case RUBY_LIBXML_SRC_TYPE_IO:
|
200
|
+
if (!NIL_P(((rx_io_data *)rxp->data)->io))
|
201
|
+
rb_gc_mark(((rx_io_data *)rxp->data)->io);
|
202
|
+
break;
|
203
|
+
default:
|
204
|
+
rb_fatal("unknown datatype: %d", rxp->data_type);
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
|
209
|
+
/*
|
210
|
+
* call-seq:
|
211
|
+
* XML::HTMLParser.new => parser
|
212
|
+
*
|
213
|
+
* Create a new parser instance with no pre-determined source.
|
214
|
+
*/
|
215
|
+
VALUE
|
216
|
+
ruby_xml_html_parser_new(VALUE class) {
|
217
|
+
ruby_xml_html_parser *rxp;
|
218
|
+
|
219
|
+
rxp = ALLOC(ruby_xml_html_parser);
|
220
|
+
rxp->ctxt = Qnil;
|
221
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
|
222
|
+
rxp->data = NULL;
|
223
|
+
rxp->parsed = 0;
|
224
|
+
|
225
|
+
return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
|
226
|
+
ruby_xml_html_parser_free, rxp));
|
227
|
+
}
|
228
|
+
|
229
|
+
|
230
|
+
/*
|
231
|
+
* call-seq:
|
232
|
+
* XML::HTMLParser.file => parser
|
233
|
+
*
|
234
|
+
* Create a new parser instance that will read the specified file.
|
235
|
+
*/
|
236
|
+
/*
|
237
|
+
VALUE
|
238
|
+
ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
|
239
|
+
VALUE obj;
|
240
|
+
ruby_xml_html_parser *rxp;
|
241
|
+
rx_file_data *data;
|
242
|
+
|
243
|
+
obj = ruby_xml_html_parser_new(class);
|
244
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
245
|
+
|
246
|
+
data = ALLOC(rx_file_data);
|
247
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
248
|
+
rxp->data = data;
|
249
|
+
|
250
|
+
ruby_xml_html_parser_filename_set(obj, filename);
|
251
|
+
|
252
|
+
return(obj);
|
253
|
+
}
|
254
|
+
*/
|
255
|
+
|
256
|
+
/*
|
257
|
+
* call-seq:
|
258
|
+
* XML::HTMLParser.io => parser
|
259
|
+
*
|
260
|
+
* Create a new parser instance that will read from the
|
261
|
+
* specified IO object.
|
262
|
+
*/
|
263
|
+
/*
|
264
|
+
VALUE
|
265
|
+
ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
|
266
|
+
VALUE obj;
|
267
|
+
ruby_xml_html_parser *rxp;
|
268
|
+
rx_io_data *data;
|
269
|
+
|
270
|
+
obj = ruby_xml_html_parser_new(class);
|
271
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
272
|
+
|
273
|
+
data = ALLOC(rx_io_data);
|
274
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
275
|
+
rxp->data = data;
|
276
|
+
|
277
|
+
ruby_xml_html_parser_io_set(obj, io);
|
278
|
+
|
279
|
+
return(obj);
|
280
|
+
}
|
281
|
+
*/
|
282
|
+
|
283
|
+
/*
|
284
|
+
* call-seq:
|
285
|
+
* XML::HTMLParser.string => parser
|
286
|
+
*
|
287
|
+
* Create a new parser instance that will parse the given
|
288
|
+
* string.
|
289
|
+
*/
|
290
|
+
VALUE
|
291
|
+
ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
|
292
|
+
VALUE obj;
|
293
|
+
ruby_xml_html_parser *rxp;
|
294
|
+
rx_string_data *data;
|
295
|
+
|
296
|
+
obj = ruby_xml_html_parser_new(class);
|
297
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
298
|
+
|
299
|
+
data = ALLOC(rx_string_data);
|
300
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
301
|
+
rxp->data = data;
|
302
|
+
|
303
|
+
ruby_xml_html_parser_str_set(obj, str);
|
304
|
+
|
305
|
+
return(obj);
|
306
|
+
}
|
307
|
+
|
308
|
+
|
309
|
+
/*
|
310
|
+
* call-seq:
|
311
|
+
* parser.parse => document
|
312
|
+
*
|
313
|
+
* Parse the input XML and create an XML::Document with
|
314
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
315
|
+
* is thrown.
|
316
|
+
*/
|
317
|
+
VALUE
|
318
|
+
ruby_xml_html_parser_parse(VALUE self) {
|
319
|
+
ruby_xml_html_parser *rxp;
|
320
|
+
ruby_xml_parser_context *rxpc;
|
321
|
+
htmlDocPtr xdp;
|
322
|
+
VALUE doc;
|
323
|
+
|
324
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
325
|
+
|
326
|
+
switch (rxp->data_type) {
|
327
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
328
|
+
return(Qnil);
|
329
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
330
|
+
//case RUBY_LIBXML_SRC_TYPE_FILE:
|
331
|
+
//case RUBY_LIBXML_SRC_TYPE_IO:
|
332
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
333
|
+
|
334
|
+
/* don't check return values here, the HTML parser returns errors
|
335
|
+
* but still allows the resulting tree to be used.
|
336
|
+
*/
|
337
|
+
htmlParseDocument(rxpc->ctxt);
|
338
|
+
xdp = rxpc->ctxt->myDoc;
|
339
|
+
rxp->parsed = 1;
|
340
|
+
|
341
|
+
doc = ruby_xml_document_wrap(xdp);
|
342
|
+
break;
|
343
|
+
default:
|
344
|
+
rb_fatal("Unknown data type, %d", rxp->data_type);
|
345
|
+
}
|
346
|
+
|
347
|
+
return(doc);
|
348
|
+
}
|
349
|
+
|
350
|
+
|
351
|
+
/*
|
352
|
+
* call-seq:
|
353
|
+
* parser.context => context
|
354
|
+
*
|
355
|
+
* Obtain the XML::Parser::Context associated with this
|
356
|
+
* parser.
|
357
|
+
*/
|
358
|
+
VALUE
|
359
|
+
ruby_xml_html_parser_context_get(VALUE self) {
|
360
|
+
ruby_xml_html_parser *rxp;
|
361
|
+
|
362
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
363
|
+
if (rxp->ctxt == Qnil)
|
364
|
+
return(Qnil);
|
365
|
+
else
|
366
|
+
return(rxp->ctxt);
|
367
|
+
}
|
368
|
+
|
369
|
+
|
370
|
+
/*
|
371
|
+
* call-seq:
|
372
|
+
* parser.string => "string"
|
373
|
+
*
|
374
|
+
* Obtain the string this parser works with.
|
375
|
+
*/
|
376
|
+
VALUE
|
377
|
+
ruby_xml_html_parser_str_get(VALUE self) {
|
378
|
+
ruby_xml_html_parser *rxp;
|
379
|
+
rx_string_data *data;
|
380
|
+
|
381
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
382
|
+
if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
|
383
|
+
return(Qnil);
|
384
|
+
|
385
|
+
data = (rx_string_data *)rxp->data;
|
386
|
+
return(data->str);
|
387
|
+
}
|
388
|
+
|
389
|
+
|
390
|
+
/*
|
391
|
+
* call-seq:
|
392
|
+
* parser.string = "string"
|
393
|
+
*
|
394
|
+
* Set the string this parser works with.
|
395
|
+
*/
|
396
|
+
VALUE
|
397
|
+
ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
|
398
|
+
ruby_xml_html_parser *rxp;
|
399
|
+
ruby_xml_parser_context *rxpc;
|
400
|
+
rx_string_data *data;
|
401
|
+
|
402
|
+
Check_Type(str, T_STRING);
|
403
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
404
|
+
|
405
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
406
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
407
|
+
data = ALLOC(rx_string_data);
|
408
|
+
rxp->data = data;
|
409
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
|
410
|
+
return(Qnil);
|
411
|
+
}
|
412
|
+
|
413
|
+
rxp->ctxt = ruby_xml_parser_context_new3();
|
414
|
+
data = (rx_string_data *)rxp->data;
|
415
|
+
data->str = str;
|
416
|
+
|
417
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
418
|
+
rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
|
419
|
+
|
420
|
+
return(data->str);
|
421
|
+
}
|
422
|
+
|
423
|
+
|
424
|
+
// Rdoc needs to know
|
425
|
+
#ifdef RDOC_NEVER_DEFINED
|
426
|
+
mXML = rb_define_module("XML");
|
427
|
+
#endif
|
428
|
+
|
429
|
+
void
|
430
|
+
ruby_init_html_parser(void) {
|
431
|
+
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
432
|
+
|
433
|
+
/*
|
434
|
+
rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
|
435
|
+
rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
|
436
|
+
*/
|
437
|
+
rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
|
438
|
+
rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
|
439
|
+
/*
|
440
|
+
rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
|
441
|
+
rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
|
442
|
+
rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
|
443
|
+
rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
|
444
|
+
*/
|
445
|
+
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
446
|
+
rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
|
447
|
+
rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
|
448
|
+
rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
|
449
|
+
}
|