libxml-ruby 0.6.0-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +122 -0
- data/LICENSE +23 -0
- data/README +144 -0
- data/ext/libxml/cbg.c +76 -0
- data/ext/libxml/extconf.rb +308 -0
- data/ext/libxml/libxml.c +62 -0
- data/ext/libxml/ruby_libxml.h +93 -0
- data/ext/libxml/ruby_xml_attr.c +405 -0
- data/ext/libxml/ruby_xml_attr.h +19 -0
- data/ext/libxml/ruby_xml_document.c +1111 -0
- data/ext/libxml/ruby_xml_document.h +27 -0
- data/ext/libxml/ruby_xml_dtd.c +168 -0
- data/ext/libxml/ruby_xml_dtd.h +17 -0
- data/ext/libxml/ruby_xml_html_parser.c +449 -0
- data/ext/libxml/ruby_xml_html_parser.h +29 -0
- data/ext/libxml/ruby_xml_input_cbg.c +158 -0
- data/ext/libxml/ruby_xml_input_cbg.h +20 -0
- data/ext/libxml/ruby_xml_node.c +2410 -0
- data/ext/libxml/ruby_xml_node.h +27 -0
- data/ext/libxml/ruby_xml_node_set.c +170 -0
- data/ext/libxml/ruby_xml_node_set.h +20 -0
- data/ext/libxml/ruby_xml_ns.c +153 -0
- data/ext/libxml/ruby_xml_ns.h +21 -0
- data/ext/libxml/ruby_xml_parser.c +1425 -0
- data/ext/libxml/ruby_xml_parser.h +31 -0
- data/ext/libxml/ruby_xml_parser_context.c +750 -0
- data/ext/libxml/ruby_xml_parser_context.h +22 -0
- data/ext/libxml/ruby_xml_reader.c +900 -0
- data/ext/libxml/ruby_xml_reader.h +14 -0
- data/ext/libxml/ruby_xml_sax_parser.c +485 -0
- data/ext/libxml/ruby_xml_sax_parser.h +56 -0
- data/ext/libxml/ruby_xml_schema.c +146 -0
- data/ext/libxml/ruby_xml_schema.h +16 -0
- data/ext/libxml/ruby_xml_state.c +113 -0
- data/ext/libxml/ruby_xml_state.h +12 -0
- data/ext/libxml/ruby_xml_tree.c +43 -0
- data/ext/libxml/ruby_xml_tree.h +12 -0
- data/ext/libxml/ruby_xml_xinclude.c +20 -0
- data/ext/libxml/ruby_xml_xinclude.h +13 -0
- data/ext/libxml/ruby_xml_xpath.c +243 -0
- data/ext/libxml/ruby_xml_xpath.h +23 -0
- data/ext/libxml/ruby_xml_xpath_context.c +118 -0
- data/ext/libxml/ruby_xml_xpath_context.h +20 -0
- data/ext/libxml/ruby_xml_xpath_object.c +293 -0
- data/ext/libxml/ruby_xml_xpath_object.h +28 -0
- data/ext/libxml/ruby_xml_xpointer.c +100 -0
- data/ext/libxml/ruby_xml_xpointer.h +27 -0
- data/ext/libxml/ruby_xml_xpointer_context.c +21 -0
- data/ext/libxml/ruby_xml_xpointer_context.h +18 -0
- data/ext/libxml/sax_parser_callbacks.inc +213 -0
- data/ext/libxml/version.h +9 -0
- data/lib/libxml.rb +125 -0
- data/lib/libxml_ruby.so +0 -0
- data/lib/xml/libxml.rb +5 -0
- data/mingw/libiconv-2.dll +0 -0
- data/mingw/libxml2-2.dll +0 -0
- data/mingw/libxml_ruby.so +0 -0
- data/mingw/mingw.rake +36 -0
- data/test/dtd-test.rb +24 -0
- data/test/etc_doc_to_s.rb +19 -0
- data/test/ets_copy_bug.rb +21 -0
- data/test/ets_copy_bug2.rb +32 -0
- data/test/ets_copy_bug3.rb +38 -0
- data/test/ets_doc_file.rb +15 -0
- data/test/ets_doc_to_s.rb +21 -0
- data/test/ets_gpx.rb +26 -0
- data/test/ets_node_gc.rb +21 -0
- data/test/ets_test.xml +2 -0
- data/test/ets_tsr.rb +9 -0
- data/test/gc.log +0 -0
- data/test/merge_bug.rb +55 -0
- data/test/schema-test.rb +74 -0
- data/test/tc_well_formed.rb +11 -0
- data/test/tc_xml_document.rb +52 -0
- data/test/tc_xml_document_write.rb +24 -0
- data/test/tc_xml_document_write2.rb +54 -0
- data/test/tc_xml_document_write3.rb +96 -0
- data/test/tc_xml_html_parser.rb +63 -0
- data/test/tc_xml_node.rb +59 -0
- data/test/tc_xml_node2.rb +25 -0
- data/test/tc_xml_node3.rb +27 -0
- data/test/tc_xml_node4.rb +86 -0
- data/test/tc_xml_node5.rb +52 -0
- data/test/tc_xml_node6.rb +27 -0
- data/test/tc_xml_node7.rb +35 -0
- data/test/tc_xml_node8.rb +32 -0
- data/test/tc_xml_node9.rb +32 -0
- data/test/tc_xml_node_copy.rb +40 -0
- data/test/tc_xml_node_set.rb +24 -0
- data/test/tc_xml_node_set2.rb +37 -0
- data/test/tc_xml_node_text.rb +17 -0
- data/test/tc_xml_node_xlink.rb +28 -0
- data/test/tc_xml_parser.rb +190 -0
- data/test/tc_xml_parser2.rb +16 -0
- data/test/tc_xml_parser3.rb +23 -0
- data/test/tc_xml_parser4.rb +33 -0
- data/test/tc_xml_parser5.rb +27 -0
- data/test/tc_xml_parser6.rb +23 -0
- data/test/tc_xml_parser7.rb +28 -0
- data/test/tc_xml_parser8.rb +32 -0
- data/test/tc_xml_parser9.rb +11 -0
- data/test/tc_xml_parser_context.rb +88 -0
- data/test/tc_xml_reader.rb +112 -0
- data/test/tc_xml_sax_parser.rb +104 -0
- data/test/tc_xml_sax_parser2.rb +51 -0
- data/test/tc_xml_xinclude.rb +30 -0
- data/test/tc_xml_xpath.rb +38 -0
- data/test/tc_xml_xpath2.rb +14 -0
- data/test/tc_xml_xpointer.rb +78 -0
- data/vc/libxml.sln +20 -0
- data/vc/libxml.vcproj +389 -0
- data/work/Rakefile +247 -0
- data/work/task/make +26 -0
- data/work/task/memory +37 -0
- data/work/task/rdoc +39 -0
- data/work/task/setup +1616 -0
- data/work/task/test +29 -0
- data/work/test/ets_runner.rb +33 -0
- data/work/test/libxml_test.rb +3 -0
- data/work/test/runner.rb +0 -0
- data/work/test/runner_ets.rb +33 -0
- data/work/vc/debug/libxml.exp +0 -0
- data/work/vc/debug/libxml.ilk +0 -0
- data/work/vc/debug/libxml.lib +0 -0
- data/work/vc/debug/libxml.pdb +0 -0
- data/work/vc/debug/libxml.so +0 -0
- metadata +224 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
/* $Id: ruby_xml_document.h 304 2008-07-02 17:42:41Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#ifndef __RUBY_XML_DOCUMENT__
|
6
|
+
#define __RUBY_XML_DOCUMENT__
|
7
|
+
|
8
|
+
extern VALUE cXMLDocument;
|
9
|
+
|
10
|
+
typedef struct rxp_document {
|
11
|
+
xmlDocPtr doc; /* Tree/DOM interface */
|
12
|
+
int data_type; /* The data type referenced by *data */
|
13
|
+
void *data; /* Pointer to an external structure of options */
|
14
|
+
} ruby_xml_document_t;
|
15
|
+
|
16
|
+
VALUE ruby_xml_document_filename_get(VALUE self);
|
17
|
+
VALUE ruby_xml_document_new_native(VALUE class, VALUE xmlver);
|
18
|
+
void ruby_xml_document_free(ruby_xml_document_t *rxd);
|
19
|
+
VALUE ruby_xml_document_root_get(VALUE self);
|
20
|
+
void ruby_init_xml_document(void);
|
21
|
+
|
22
|
+
#if defined(_WIN32)
|
23
|
+
__declspec(dllexport)
|
24
|
+
#endif
|
25
|
+
VALUE ruby_xml_document_wrap(xmlDocPtr xnode);
|
26
|
+
|
27
|
+
#endif
|
@@ -0,0 +1,168 @@
|
|
1
|
+
#include "ruby_libxml.h"
|
2
|
+
#include "ruby_xml_dtd.h"
|
3
|
+
|
4
|
+
VALUE cXMLDtd;
|
5
|
+
|
6
|
+
void
|
7
|
+
ruby_xml_dtd_free(ruby_xml_dtd *rxdtd) {
|
8
|
+
if (rxdtd->dtd != NULL) {
|
9
|
+
xmlFreeDtd(rxdtd->dtd);
|
10
|
+
rxdtd->dtd = NULL;
|
11
|
+
}
|
12
|
+
|
13
|
+
ruby_xfree(rxdtd);
|
14
|
+
}
|
15
|
+
|
16
|
+
static void
|
17
|
+
ruby_xml_dtd_mark(ruby_xml_dtd *rxdtd) {
|
18
|
+
return;
|
19
|
+
//if (rxdtd == NULL) return;
|
20
|
+
//if (!NIL_P(rxd->xmlver)) rb_gc_mark(rxd->xmlver);
|
21
|
+
}
|
22
|
+
|
23
|
+
/*
|
24
|
+
* call-seq:
|
25
|
+
* XML::Dtd.new("public system") => dtd
|
26
|
+
* XML::Dtd.new("public", "system") => dtd
|
27
|
+
*
|
28
|
+
* Create a new Dtd from the specified public and system
|
29
|
+
* identifiers.
|
30
|
+
*/
|
31
|
+
VALUE
|
32
|
+
ruby_xml_dtd_initialize(int argc, VALUE *argv, VALUE class) {
|
33
|
+
ruby_xml_dtd *rxdtd;
|
34
|
+
VALUE external, system, dtd_string;
|
35
|
+
xmlParserInputBufferPtr buffer;
|
36
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
37
|
+
xmlChar *new_string;
|
38
|
+
|
39
|
+
// 1 argument -- string --> parsujeme jako dtd
|
40
|
+
// 2 argumenty -- public, system --> bude se hledat
|
41
|
+
switch (argc) {
|
42
|
+
case 2:
|
43
|
+
rb_scan_args(argc, argv, "20", &external, &system);
|
44
|
+
|
45
|
+
Check_Type(external, T_STRING);
|
46
|
+
Check_Type(system, T_STRING);
|
47
|
+
rxdtd = ALLOC(ruby_xml_dtd);
|
48
|
+
rxdtd->dtd = xmlParseDTD( (xmlChar*)StringValuePtr(external),
|
49
|
+
(xmlChar*)StringValuePtr(system) );
|
50
|
+
if (rxdtd->dtd == NULL) {
|
51
|
+
ruby_xfree(rxdtd);
|
52
|
+
return(Qfalse);
|
53
|
+
}
|
54
|
+
|
55
|
+
xmlSetTreeDoc( (xmlNodePtr)rxdtd->dtd, NULL );
|
56
|
+
return( Data_Wrap_Struct(cXMLDtd, ruby_xml_dtd_mark, ruby_xml_dtd_free, rxdtd) );
|
57
|
+
break;
|
58
|
+
|
59
|
+
/*
|
60
|
+
SV *
|
61
|
+
new(CLASS, external, system)
|
62
|
+
char * CLASS
|
63
|
+
char * external
|
64
|
+
char * system
|
65
|
+
ALIAS:
|
66
|
+
parse_uri = 1
|
67
|
+
PREINIT:
|
68
|
+
xmlDtdPtr dtd = NULL;
|
69
|
+
CODE:
|
70
|
+
LibXML_error = sv_2mortal(newSVpv("", 0));
|
71
|
+
dtd = xmlParseDTD((const xmlChar*)external, (const xmlChar*)system);
|
72
|
+
if ( dtd == NULL ) {
|
73
|
+
XSRETURN_UNDEF;
|
74
|
+
}
|
75
|
+
xmlSetTreeDoc((xmlNodePtr)dtd, NULL);
|
76
|
+
RETVAL = PmmNodeToSv( (xmlNodePtr) dtd, NULL );
|
77
|
+
OUTPUT:
|
78
|
+
RETVAL
|
79
|
+
*/
|
80
|
+
|
81
|
+
case 1:
|
82
|
+
|
83
|
+
rb_scan_args(argc, argv, "10", &dtd_string);
|
84
|
+
buffer = xmlAllocParserInputBuffer(enc);
|
85
|
+
//if ( !buffer) return Qnil
|
86
|
+
new_string = xmlStrdup((xmlChar*)StringValuePtr(dtd_string));
|
87
|
+
xmlParserInputBufferPush(buffer, xmlStrlen(new_string), (const char*)new_string);
|
88
|
+
|
89
|
+
rxdtd = ALLOC(ruby_xml_dtd);
|
90
|
+
rxdtd->dtd = xmlIOParseDTD(NULL, buffer, enc);
|
91
|
+
|
92
|
+
// NOTE: For some reason freeing this InputBuffer causes a segfault!
|
93
|
+
// xmlFreeParserInputBuffer(buffer);
|
94
|
+
xmlFree(new_string);
|
95
|
+
|
96
|
+
return( Data_Wrap_Struct(cXMLDtd, ruby_xml_dtd_mark, ruby_xml_dtd_free, rxdtd) );
|
97
|
+
|
98
|
+
break;
|
99
|
+
/*
|
100
|
+
SV * parse_string(CLASS, str, ...)
|
101
|
+
char * CLASS
|
102
|
+
char * str
|
103
|
+
PREINIT:
|
104
|
+
STRLEN n_a;
|
105
|
+
xmlDtdPtr res;
|
106
|
+
SV * encoding_sv;
|
107
|
+
xmlParserInputBufferPtr buffer;
|
108
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
109
|
+
xmlChar * new_string;
|
110
|
+
STRLEN len;
|
111
|
+
CODE:
|
112
|
+
LibXML_init_error();
|
113
|
+
if (items > 2) {
|
114
|
+
encoding_sv = ST(2);
|
115
|
+
if (items > 3) {
|
116
|
+
croak("parse_string: too many parameters");
|
117
|
+
}
|
118
|
+
// warn("getting encoding...\n");
|
119
|
+
enc = xmlParseCharEncoding(SvPV(encoding_sv, n_a));
|
120
|
+
if (enc == XML_CHAR_ENCODING_ERROR) {
|
121
|
+
croak("Parse of encoding %s failed: %s", SvPV(encoding_sv, n_a), SvPV(LibXML_error, n_a));
|
122
|
+
}
|
123
|
+
}
|
124
|
+
buffer = xmlAllocParserInputBuffer(enc);
|
125
|
+
// buffer = xmlParserInputBufferCreateMem(str, xmlStrlen(str), enc);
|
126
|
+
if ( !buffer)
|
127
|
+
croak("cant create buffer!\n" );
|
128
|
+
|
129
|
+
new_string = xmlStrdup((const xmlChar*)str);
|
130
|
+
xmlParserInputBufferPush(buffer, xmlStrlen(new_string), (const char*)new_string);
|
131
|
+
|
132
|
+
res = xmlIOParseDTD(NULL, buffer, enc);
|
133
|
+
|
134
|
+
// NOTE: For some reason freeing this InputBuffer causes a segfault!
|
135
|
+
// xmlFreeParserInputBuffer(buffer);
|
136
|
+
xmlFree(new_string);
|
137
|
+
|
138
|
+
sv_2mortal( LibXML_error );
|
139
|
+
LibXML_croak_error();
|
140
|
+
|
141
|
+
if (res == NULL) {
|
142
|
+
croak("no DTD parsed!");
|
143
|
+
}
|
144
|
+
RETVAL = PmmNodeToSv((xmlNodePtr)res, NULL);
|
145
|
+
OUTPUT:
|
146
|
+
RETVAL
|
147
|
+
*/
|
148
|
+
|
149
|
+
default:
|
150
|
+
rb_raise(rb_eArgError, "wrong number of arguments (need 1 or 2)");
|
151
|
+
}
|
152
|
+
|
153
|
+
//docobj = ruby_xml_document_new2(cXMLDocument, xmlver);
|
154
|
+
return Qnil;
|
155
|
+
}
|
156
|
+
|
157
|
+
// Rdoc needs to know
|
158
|
+
#ifdef RDOC_NEVER_DEFINED
|
159
|
+
mXML = rb_define_module("XML");
|
160
|
+
#endif
|
161
|
+
|
162
|
+
void
|
163
|
+
ruby_init_xml_dtd(void) {
|
164
|
+
cXMLDtd = rb_define_class_under(mXML, "Dtd", rb_cObject);
|
165
|
+
rb_define_singleton_method(cXMLDtd, "new", ruby_xml_dtd_initialize, -1);
|
166
|
+
//rb_define_method(cXMLDocument, "xinclude", ruby_xml_document_xinclude, 0);
|
167
|
+
}
|
168
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#ifndef __RUBY_XML_DTD__
|
2
|
+
#define __RUBY_XML_DTD__
|
3
|
+
|
4
|
+
extern VALUE cXMLDtd;
|
5
|
+
|
6
|
+
typedef struct rxp_dtd {
|
7
|
+
xmlDtdPtr dtd; /* DTD interface */
|
8
|
+
//int data_type; /* The data type referenced by *data */
|
9
|
+
//void *data; /* Pointer to an external structure of options */
|
10
|
+
//int is_ptr; /* Determines if this object owns its data or points to it someplace else */
|
11
|
+
//VALUE xmlver; /* T_STRING with the xml version */
|
12
|
+
} ruby_xml_dtd;
|
13
|
+
|
14
|
+
void ruby_init_xml_dtd(void);
|
15
|
+
void ruby_dtd_free(ruby_xml_dtd *rxdtd);
|
16
|
+
|
17
|
+
#endif
|
@@ -0,0 +1,449 @@
|
|
1
|
+
/* $Id: ruby_xml_html_parser.c 300 2008-07-01 19:14:15Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include "ruby_libxml.h"
|
6
|
+
|
7
|
+
VALUE cXMLHTMLParser;
|
8
|
+
|
9
|
+
//static int
|
10
|
+
//ctxtRead(FILE *f, char * buf, int len) {
|
11
|
+
// return(fread(buf, 1, len, f));
|
12
|
+
//}
|
13
|
+
|
14
|
+
|
15
|
+
/*
|
16
|
+
* call-seq:
|
17
|
+
* parser.filename => "filename"
|
18
|
+
*
|
19
|
+
* Obtain the filename this parser will read from.
|
20
|
+
*/
|
21
|
+
/*
|
22
|
+
VALUE
|
23
|
+
ruby_xml_html_parser_filename_get(VALUE self) {
|
24
|
+
ruby_xml_html_parser *rxp;
|
25
|
+
rx_file_data *data;
|
26
|
+
|
27
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
28
|
+
if (rxp->data == NULL)
|
29
|
+
return(Qnil);
|
30
|
+
|
31
|
+
if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
|
32
|
+
return(Qnil);
|
33
|
+
|
34
|
+
data = (rx_file_data *)rxp->data;
|
35
|
+
return(data->filename);
|
36
|
+
}
|
37
|
+
*/
|
38
|
+
|
39
|
+
/*
|
40
|
+
* call-seq:
|
41
|
+
* parser.filename = "filename"
|
42
|
+
*
|
43
|
+
* Set the filename this parser will read from.
|
44
|
+
*/
|
45
|
+
/*
|
46
|
+
VALUE
|
47
|
+
ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
|
48
|
+
ruby_xml_html_parser *rxp;
|
49
|
+
ruby_xml_parser_context *rxpc;
|
50
|
+
rx_file_data *data;
|
51
|
+
|
52
|
+
Check_Type(filename, T_STRING);
|
53
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
54
|
+
|
55
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
56
|
+
if (rxp->data != NULL)
|
57
|
+
rb_fatal("crap, this should be null");
|
58
|
+
|
59
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
60
|
+
data = ALLOC(rx_file_data);
|
61
|
+
rxp->data = data;
|
62
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
|
63
|
+
return(Qnil);
|
64
|
+
}
|
65
|
+
|
66
|
+
rxp->ctxt = ruby_xml_parser_context_new3();
|
67
|
+
data = (rx_file_data *)rxp->data;
|
68
|
+
data->filename = filename;
|
69
|
+
|
70
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
71
|
+
rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
|
72
|
+
if (rxpc->ctxt == NULL)
|
73
|
+
rb_sys_fail(StringValuePtr(filename));
|
74
|
+
|
75
|
+
return(data->filename);
|
76
|
+
}
|
77
|
+
*/
|
78
|
+
|
79
|
+
void
|
80
|
+
ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
|
81
|
+
void *data;
|
82
|
+
|
83
|
+
switch(rxp->data_type) {
|
84
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
85
|
+
break;
|
86
|
+
case RUBY_LIBXML_SRC_TYPE_FILE:
|
87
|
+
data = (void *)(rx_file_data *)rxp->data;
|
88
|
+
ruby_xfree((rx_file_data *)data);
|
89
|
+
break;
|
90
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
91
|
+
data = (void *)(rx_string_data *)rxp->data;
|
92
|
+
ruby_xfree((rx_string_data *)data);
|
93
|
+
break;
|
94
|
+
case RUBY_LIBXML_SRC_TYPE_IO:
|
95
|
+
data = (void *)(rx_io_data *)rxp->data;
|
96
|
+
ruby_xfree((rx_io_data *)data);
|
97
|
+
break;
|
98
|
+
default:
|
99
|
+
rb_fatal("Unknown data type, %d", rxp->data_type);
|
100
|
+
}
|
101
|
+
|
102
|
+
ruby_xfree(rxp);
|
103
|
+
}
|
104
|
+
|
105
|
+
|
106
|
+
/*
|
107
|
+
* call-seq:
|
108
|
+
* parser.io => IO
|
109
|
+
*
|
110
|
+
* Obtain the IO instance this parser works with.
|
111
|
+
*/
|
112
|
+
/*
|
113
|
+
VALUE
|
114
|
+
ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
|
115
|
+
ruby_xml_html_parser *rxp;
|
116
|
+
rx_io_data *data;
|
117
|
+
|
118
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
119
|
+
|
120
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
|
121
|
+
rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
|
122
|
+
rxp->data == NULL)
|
123
|
+
return(Qnil);
|
124
|
+
|
125
|
+
data = (rx_io_data *)rxp->data;
|
126
|
+
|
127
|
+
return(data->io);
|
128
|
+
}
|
129
|
+
*/
|
130
|
+
|
131
|
+
/*
|
132
|
+
* call-seq:
|
133
|
+
* parser.io = IO
|
134
|
+
*
|
135
|
+
* Set the IO instance this parser works with.
|
136
|
+
*/
|
137
|
+
/*
|
138
|
+
VALUE
|
139
|
+
ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
|
140
|
+
ruby_xml_html_parser *rxp;
|
141
|
+
ruby_xml_parser_context *rxpc;
|
142
|
+
rx_io_data *data;
|
143
|
+
OpenFile *fptr;
|
144
|
+
FILE *f;
|
145
|
+
|
146
|
+
if (!rb_obj_is_kind_of(io, rb_cIO))
|
147
|
+
rb_raise(rb_eTypeError, "need an IO object");
|
148
|
+
|
149
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
150
|
+
|
151
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
152
|
+
if (rxp->data != NULL)
|
153
|
+
rb_fatal("crap, this should be null");
|
154
|
+
|
155
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
156
|
+
data = ALLOC(rx_io_data);
|
157
|
+
rxp->data = data;
|
158
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
|
159
|
+
return(Qnil);
|
160
|
+
}
|
161
|
+
|
162
|
+
rxp->ctxt = ruby_xml_parser_context_new3();
|
163
|
+
data = (rx_io_data *)rxp->data;
|
164
|
+
data->io = io;
|
165
|
+
|
166
|
+
GetOpenFile(io, fptr);
|
167
|
+
rb_io_check_readable(fptr);
|
168
|
+
f = GetWriteFile(fptr);
|
169
|
+
|
170
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
171
|
+
rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
|
172
|
+
(xmlInputReadCallback) ctxtRead,
|
173
|
+
NULL, f, XML_CHAR_ENCODING_NONE);
|
174
|
+
if (NIL_P(rxpc->ctxt))
|
175
|
+
rb_sys_fail(0);
|
176
|
+
|
177
|
+
return(data->io);
|
178
|
+
}
|
179
|
+
*/
|
180
|
+
|
181
|
+
void
|
182
|
+
ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
|
183
|
+
if (rxp == NULL) return;
|
184
|
+
if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
|
185
|
+
|
186
|
+
ruby_xml_state_marker();
|
187
|
+
|
188
|
+
switch(rxp->data_type) {
|
189
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
190
|
+
break;
|
191
|
+
case RUBY_LIBXML_SRC_TYPE_FILE:
|
192
|
+
if (!NIL_P(((rx_file_data *)rxp->data)->filename))
|
193
|
+
rb_gc_mark(((rx_file_data *)rxp->data)->filename);
|
194
|
+
break;
|
195
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
196
|
+
if (!NIL_P(((rx_string_data *)rxp->data)->str))
|
197
|
+
rb_gc_mark(((rx_string_data *)rxp->data)->str);
|
198
|
+
break;
|
199
|
+
case RUBY_LIBXML_SRC_TYPE_IO:
|
200
|
+
if (!NIL_P(((rx_io_data *)rxp->data)->io))
|
201
|
+
rb_gc_mark(((rx_io_data *)rxp->data)->io);
|
202
|
+
break;
|
203
|
+
default:
|
204
|
+
rb_fatal("unknown datatype: %d", rxp->data_type);
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
|
209
|
+
/*
|
210
|
+
* call-seq:
|
211
|
+
* XML::HTMLParser.new => parser
|
212
|
+
*
|
213
|
+
* Create a new parser instance with no pre-determined source.
|
214
|
+
*/
|
215
|
+
VALUE
|
216
|
+
ruby_xml_html_parser_new(VALUE class) {
|
217
|
+
ruby_xml_html_parser *rxp;
|
218
|
+
|
219
|
+
rxp = ALLOC(ruby_xml_html_parser);
|
220
|
+
rxp->ctxt = Qnil;
|
221
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
|
222
|
+
rxp->data = NULL;
|
223
|
+
rxp->parsed = 0;
|
224
|
+
|
225
|
+
return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
|
226
|
+
ruby_xml_html_parser_free, rxp));
|
227
|
+
}
|
228
|
+
|
229
|
+
|
230
|
+
/*
|
231
|
+
* call-seq:
|
232
|
+
* XML::HTMLParser.file => parser
|
233
|
+
*
|
234
|
+
* Create a new parser instance that will read the specified file.
|
235
|
+
*/
|
236
|
+
/*
|
237
|
+
VALUE
|
238
|
+
ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
|
239
|
+
VALUE obj;
|
240
|
+
ruby_xml_html_parser *rxp;
|
241
|
+
rx_file_data *data;
|
242
|
+
|
243
|
+
obj = ruby_xml_html_parser_new(class);
|
244
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
245
|
+
|
246
|
+
data = ALLOC(rx_file_data);
|
247
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
248
|
+
rxp->data = data;
|
249
|
+
|
250
|
+
ruby_xml_html_parser_filename_set(obj, filename);
|
251
|
+
|
252
|
+
return(obj);
|
253
|
+
}
|
254
|
+
*/
|
255
|
+
|
256
|
+
/*
|
257
|
+
* call-seq:
|
258
|
+
* XML::HTMLParser.io => parser
|
259
|
+
*
|
260
|
+
* Create a new parser instance that will read from the
|
261
|
+
* specified IO object.
|
262
|
+
*/
|
263
|
+
/*
|
264
|
+
VALUE
|
265
|
+
ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
|
266
|
+
VALUE obj;
|
267
|
+
ruby_xml_html_parser *rxp;
|
268
|
+
rx_io_data *data;
|
269
|
+
|
270
|
+
obj = ruby_xml_html_parser_new(class);
|
271
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
272
|
+
|
273
|
+
data = ALLOC(rx_io_data);
|
274
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
275
|
+
rxp->data = data;
|
276
|
+
|
277
|
+
ruby_xml_html_parser_io_set(obj, io);
|
278
|
+
|
279
|
+
return(obj);
|
280
|
+
}
|
281
|
+
*/
|
282
|
+
|
283
|
+
/*
|
284
|
+
* call-seq:
|
285
|
+
* XML::HTMLParser.string => parser
|
286
|
+
*
|
287
|
+
* Create a new parser instance that will parse the given
|
288
|
+
* string.
|
289
|
+
*/
|
290
|
+
VALUE
|
291
|
+
ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
|
292
|
+
VALUE obj;
|
293
|
+
ruby_xml_html_parser *rxp;
|
294
|
+
rx_string_data *data;
|
295
|
+
|
296
|
+
obj = ruby_xml_html_parser_new(class);
|
297
|
+
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
298
|
+
|
299
|
+
data = ALLOC(rx_string_data);
|
300
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
301
|
+
rxp->data = data;
|
302
|
+
|
303
|
+
ruby_xml_html_parser_str_set(obj, str);
|
304
|
+
|
305
|
+
return(obj);
|
306
|
+
}
|
307
|
+
|
308
|
+
|
309
|
+
/*
|
310
|
+
* call-seq:
|
311
|
+
* parser.parse => document
|
312
|
+
*
|
313
|
+
* Parse the input XML and create an XML::Document with
|
314
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
315
|
+
* is thrown.
|
316
|
+
*/
|
317
|
+
VALUE
|
318
|
+
ruby_xml_html_parser_parse(VALUE self) {
|
319
|
+
ruby_xml_html_parser *rxp;
|
320
|
+
ruby_xml_parser_context *rxpc;
|
321
|
+
htmlDocPtr xdp;
|
322
|
+
VALUE doc;
|
323
|
+
|
324
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
325
|
+
|
326
|
+
switch (rxp->data_type) {
|
327
|
+
case RUBY_LIBXML_SRC_TYPE_NULL:
|
328
|
+
return(Qnil);
|
329
|
+
case RUBY_LIBXML_SRC_TYPE_STRING:
|
330
|
+
//case RUBY_LIBXML_SRC_TYPE_FILE:
|
331
|
+
//case RUBY_LIBXML_SRC_TYPE_IO:
|
332
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
333
|
+
|
334
|
+
/* don't check return values here, the HTML parser returns errors
|
335
|
+
* but still allows the resulting tree to be used.
|
336
|
+
*/
|
337
|
+
htmlParseDocument(rxpc->ctxt);
|
338
|
+
xdp = rxpc->ctxt->myDoc;
|
339
|
+
rxp->parsed = 1;
|
340
|
+
|
341
|
+
doc = ruby_xml_document_wrap(xdp);
|
342
|
+
break;
|
343
|
+
default:
|
344
|
+
rb_fatal("Unknown data type, %d", rxp->data_type);
|
345
|
+
}
|
346
|
+
|
347
|
+
return(doc);
|
348
|
+
}
|
349
|
+
|
350
|
+
|
351
|
+
/*
|
352
|
+
* call-seq:
|
353
|
+
* parser.context => context
|
354
|
+
*
|
355
|
+
* Obtain the XML::Parser::Context associated with this
|
356
|
+
* parser.
|
357
|
+
*/
|
358
|
+
VALUE
|
359
|
+
ruby_xml_html_parser_context_get(VALUE self) {
|
360
|
+
ruby_xml_html_parser *rxp;
|
361
|
+
|
362
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
363
|
+
if (rxp->ctxt == Qnil)
|
364
|
+
return(Qnil);
|
365
|
+
else
|
366
|
+
return(rxp->ctxt);
|
367
|
+
}
|
368
|
+
|
369
|
+
|
370
|
+
/*
|
371
|
+
* call-seq:
|
372
|
+
* parser.string => "string"
|
373
|
+
*
|
374
|
+
* Obtain the string this parser works with.
|
375
|
+
*/
|
376
|
+
VALUE
|
377
|
+
ruby_xml_html_parser_str_get(VALUE self) {
|
378
|
+
ruby_xml_html_parser *rxp;
|
379
|
+
rx_string_data *data;
|
380
|
+
|
381
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
382
|
+
if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
|
383
|
+
return(Qnil);
|
384
|
+
|
385
|
+
data = (rx_string_data *)rxp->data;
|
386
|
+
return(data->str);
|
387
|
+
}
|
388
|
+
|
389
|
+
|
390
|
+
/*
|
391
|
+
* call-seq:
|
392
|
+
* parser.string = "string"
|
393
|
+
*
|
394
|
+
* Set the string this parser works with.
|
395
|
+
*/
|
396
|
+
VALUE
|
397
|
+
ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
|
398
|
+
ruby_xml_html_parser *rxp;
|
399
|
+
ruby_xml_parser_context *rxpc;
|
400
|
+
rx_string_data *data;
|
401
|
+
|
402
|
+
Check_Type(str, T_STRING);
|
403
|
+
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
404
|
+
|
405
|
+
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
406
|
+
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
407
|
+
data = ALLOC(rx_string_data);
|
408
|
+
rxp->data = data;
|
409
|
+
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
|
410
|
+
return(Qnil);
|
411
|
+
}
|
412
|
+
|
413
|
+
rxp->ctxt = ruby_xml_parser_context_new3();
|
414
|
+
data = (rx_string_data *)rxp->data;
|
415
|
+
data->str = str;
|
416
|
+
|
417
|
+
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
418
|
+
rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
|
419
|
+
|
420
|
+
return(data->str);
|
421
|
+
}
|
422
|
+
|
423
|
+
|
424
|
+
// Rdoc needs to know
|
425
|
+
#ifdef RDOC_NEVER_DEFINED
|
426
|
+
mXML = rb_define_module("XML");
|
427
|
+
#endif
|
428
|
+
|
429
|
+
void
|
430
|
+
ruby_init_html_parser(void) {
|
431
|
+
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
432
|
+
|
433
|
+
/*
|
434
|
+
rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
|
435
|
+
rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
|
436
|
+
*/
|
437
|
+
rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
|
438
|
+
rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
|
439
|
+
/*
|
440
|
+
rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
|
441
|
+
rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
|
442
|
+
rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
|
443
|
+
rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
|
444
|
+
*/
|
445
|
+
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
446
|
+
rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
|
447
|
+
rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
|
448
|
+
rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
|
449
|
+
}
|