libxml-ruby 0.6.0-x86-mswin32-60

Sign up to get free protection for your applications and to get access to all the features.
Files changed (127) hide show
  1. data/CHANGES +122 -0
  2. data/LICENSE +23 -0
  3. data/README +144 -0
  4. data/ext/libxml/cbg.c +76 -0
  5. data/ext/libxml/extconf.rb +308 -0
  6. data/ext/libxml/libxml.c +62 -0
  7. data/ext/libxml/ruby_libxml.h +93 -0
  8. data/ext/libxml/ruby_xml_attr.c +405 -0
  9. data/ext/libxml/ruby_xml_attr.h +19 -0
  10. data/ext/libxml/ruby_xml_document.c +1111 -0
  11. data/ext/libxml/ruby_xml_document.h +27 -0
  12. data/ext/libxml/ruby_xml_dtd.c +168 -0
  13. data/ext/libxml/ruby_xml_dtd.h +17 -0
  14. data/ext/libxml/ruby_xml_html_parser.c +449 -0
  15. data/ext/libxml/ruby_xml_html_parser.h +29 -0
  16. data/ext/libxml/ruby_xml_input_cbg.c +158 -0
  17. data/ext/libxml/ruby_xml_input_cbg.h +20 -0
  18. data/ext/libxml/ruby_xml_node.c +2410 -0
  19. data/ext/libxml/ruby_xml_node.h +27 -0
  20. data/ext/libxml/ruby_xml_node_set.c +170 -0
  21. data/ext/libxml/ruby_xml_node_set.h +20 -0
  22. data/ext/libxml/ruby_xml_ns.c +153 -0
  23. data/ext/libxml/ruby_xml_ns.h +21 -0
  24. data/ext/libxml/ruby_xml_parser.c +1425 -0
  25. data/ext/libxml/ruby_xml_parser.h +31 -0
  26. data/ext/libxml/ruby_xml_parser_context.c +750 -0
  27. data/ext/libxml/ruby_xml_parser_context.h +22 -0
  28. data/ext/libxml/ruby_xml_reader.c +900 -0
  29. data/ext/libxml/ruby_xml_reader.h +14 -0
  30. data/ext/libxml/ruby_xml_sax_parser.c +485 -0
  31. data/ext/libxml/ruby_xml_sax_parser.h +56 -0
  32. data/ext/libxml/ruby_xml_schema.c +146 -0
  33. data/ext/libxml/ruby_xml_schema.h +16 -0
  34. data/ext/libxml/ruby_xml_state.c +113 -0
  35. data/ext/libxml/ruby_xml_state.h +12 -0
  36. data/ext/libxml/ruby_xml_tree.c +43 -0
  37. data/ext/libxml/ruby_xml_tree.h +12 -0
  38. data/ext/libxml/ruby_xml_xinclude.c +20 -0
  39. data/ext/libxml/ruby_xml_xinclude.h +13 -0
  40. data/ext/libxml/ruby_xml_xpath.c +243 -0
  41. data/ext/libxml/ruby_xml_xpath.h +23 -0
  42. data/ext/libxml/ruby_xml_xpath_context.c +118 -0
  43. data/ext/libxml/ruby_xml_xpath_context.h +20 -0
  44. data/ext/libxml/ruby_xml_xpath_object.c +293 -0
  45. data/ext/libxml/ruby_xml_xpath_object.h +28 -0
  46. data/ext/libxml/ruby_xml_xpointer.c +100 -0
  47. data/ext/libxml/ruby_xml_xpointer.h +27 -0
  48. data/ext/libxml/ruby_xml_xpointer_context.c +21 -0
  49. data/ext/libxml/ruby_xml_xpointer_context.h +18 -0
  50. data/ext/libxml/sax_parser_callbacks.inc +213 -0
  51. data/ext/libxml/version.h +9 -0
  52. data/lib/libxml.rb +125 -0
  53. data/lib/libxml_ruby.so +0 -0
  54. data/lib/xml/libxml.rb +5 -0
  55. data/mingw/libiconv-2.dll +0 -0
  56. data/mingw/libxml2-2.dll +0 -0
  57. data/mingw/libxml_ruby.so +0 -0
  58. data/mingw/mingw.rake +36 -0
  59. data/test/dtd-test.rb +24 -0
  60. data/test/etc_doc_to_s.rb +19 -0
  61. data/test/ets_copy_bug.rb +21 -0
  62. data/test/ets_copy_bug2.rb +32 -0
  63. data/test/ets_copy_bug3.rb +38 -0
  64. data/test/ets_doc_file.rb +15 -0
  65. data/test/ets_doc_to_s.rb +21 -0
  66. data/test/ets_gpx.rb +26 -0
  67. data/test/ets_node_gc.rb +21 -0
  68. data/test/ets_test.xml +2 -0
  69. data/test/ets_tsr.rb +9 -0
  70. data/test/gc.log +0 -0
  71. data/test/merge_bug.rb +55 -0
  72. data/test/schema-test.rb +74 -0
  73. data/test/tc_well_formed.rb +11 -0
  74. data/test/tc_xml_document.rb +52 -0
  75. data/test/tc_xml_document_write.rb +24 -0
  76. data/test/tc_xml_document_write2.rb +54 -0
  77. data/test/tc_xml_document_write3.rb +96 -0
  78. data/test/tc_xml_html_parser.rb +63 -0
  79. data/test/tc_xml_node.rb +59 -0
  80. data/test/tc_xml_node2.rb +25 -0
  81. data/test/tc_xml_node3.rb +27 -0
  82. data/test/tc_xml_node4.rb +86 -0
  83. data/test/tc_xml_node5.rb +52 -0
  84. data/test/tc_xml_node6.rb +27 -0
  85. data/test/tc_xml_node7.rb +35 -0
  86. data/test/tc_xml_node8.rb +32 -0
  87. data/test/tc_xml_node9.rb +32 -0
  88. data/test/tc_xml_node_copy.rb +40 -0
  89. data/test/tc_xml_node_set.rb +24 -0
  90. data/test/tc_xml_node_set2.rb +37 -0
  91. data/test/tc_xml_node_text.rb +17 -0
  92. data/test/tc_xml_node_xlink.rb +28 -0
  93. data/test/tc_xml_parser.rb +190 -0
  94. data/test/tc_xml_parser2.rb +16 -0
  95. data/test/tc_xml_parser3.rb +23 -0
  96. data/test/tc_xml_parser4.rb +33 -0
  97. data/test/tc_xml_parser5.rb +27 -0
  98. data/test/tc_xml_parser6.rb +23 -0
  99. data/test/tc_xml_parser7.rb +28 -0
  100. data/test/tc_xml_parser8.rb +32 -0
  101. data/test/tc_xml_parser9.rb +11 -0
  102. data/test/tc_xml_parser_context.rb +88 -0
  103. data/test/tc_xml_reader.rb +112 -0
  104. data/test/tc_xml_sax_parser.rb +104 -0
  105. data/test/tc_xml_sax_parser2.rb +51 -0
  106. data/test/tc_xml_xinclude.rb +30 -0
  107. data/test/tc_xml_xpath.rb +38 -0
  108. data/test/tc_xml_xpath2.rb +14 -0
  109. data/test/tc_xml_xpointer.rb +78 -0
  110. data/vc/libxml.sln +20 -0
  111. data/vc/libxml.vcproj +389 -0
  112. data/work/Rakefile +247 -0
  113. data/work/task/make +26 -0
  114. data/work/task/memory +37 -0
  115. data/work/task/rdoc +39 -0
  116. data/work/task/setup +1616 -0
  117. data/work/task/test +29 -0
  118. data/work/test/ets_runner.rb +33 -0
  119. data/work/test/libxml_test.rb +3 -0
  120. data/work/test/runner.rb +0 -0
  121. data/work/test/runner_ets.rb +33 -0
  122. data/work/vc/debug/libxml.exp +0 -0
  123. data/work/vc/debug/libxml.ilk +0 -0
  124. data/work/vc/debug/libxml.lib +0 -0
  125. data/work/vc/debug/libxml.pdb +0 -0
  126. data/work/vc/debug/libxml.so +0 -0
  127. metadata +224 -0
@@ -0,0 +1,27 @@
1
+ /* $Id: ruby_xml_document.h 304 2008-07-02 17:42:41Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RUBY_XML_DOCUMENT__
6
+ #define __RUBY_XML_DOCUMENT__
7
+
8
+ extern VALUE cXMLDocument;
9
+
10
+ typedef struct rxp_document {
11
+ xmlDocPtr doc; /* Tree/DOM interface */
12
+ int data_type; /* The data type referenced by *data */
13
+ void *data; /* Pointer to an external structure of options */
14
+ } ruby_xml_document_t;
15
+
16
+ VALUE ruby_xml_document_filename_get(VALUE self);
17
+ VALUE ruby_xml_document_new_native(VALUE class, VALUE xmlver);
18
+ void ruby_xml_document_free(ruby_xml_document_t *rxd);
19
+ VALUE ruby_xml_document_root_get(VALUE self);
20
+ void ruby_init_xml_document(void);
21
+
22
+ #if defined(_WIN32)
23
+ __declspec(dllexport)
24
+ #endif
25
+ VALUE ruby_xml_document_wrap(xmlDocPtr xnode);
26
+
27
+ #endif
@@ -0,0 +1,168 @@
1
+ #include "ruby_libxml.h"
2
+ #include "ruby_xml_dtd.h"
3
+
4
+ VALUE cXMLDtd;
5
+
6
+ void
7
+ ruby_xml_dtd_free(ruby_xml_dtd *rxdtd) {
8
+ if (rxdtd->dtd != NULL) {
9
+ xmlFreeDtd(rxdtd->dtd);
10
+ rxdtd->dtd = NULL;
11
+ }
12
+
13
+ ruby_xfree(rxdtd);
14
+ }
15
+
16
+ static void
17
+ ruby_xml_dtd_mark(ruby_xml_dtd *rxdtd) {
18
+ return;
19
+ //if (rxdtd == NULL) return;
20
+ //if (!NIL_P(rxd->xmlver)) rb_gc_mark(rxd->xmlver);
21
+ }
22
+
23
+ /*
24
+ * call-seq:
25
+ * XML::Dtd.new("public system") => dtd
26
+ * XML::Dtd.new("public", "system") => dtd
27
+ *
28
+ * Create a new Dtd from the specified public and system
29
+ * identifiers.
30
+ */
31
+ VALUE
32
+ ruby_xml_dtd_initialize(int argc, VALUE *argv, VALUE class) {
33
+ ruby_xml_dtd *rxdtd;
34
+ VALUE external, system, dtd_string;
35
+ xmlParserInputBufferPtr buffer;
36
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
37
+ xmlChar *new_string;
38
+
39
+ // 1 argument -- string --> parsujeme jako dtd
40
+ // 2 argumenty -- public, system --> bude se hledat
41
+ switch (argc) {
42
+ case 2:
43
+ rb_scan_args(argc, argv, "20", &external, &system);
44
+
45
+ Check_Type(external, T_STRING);
46
+ Check_Type(system, T_STRING);
47
+ rxdtd = ALLOC(ruby_xml_dtd);
48
+ rxdtd->dtd = xmlParseDTD( (xmlChar*)StringValuePtr(external),
49
+ (xmlChar*)StringValuePtr(system) );
50
+ if (rxdtd->dtd == NULL) {
51
+ ruby_xfree(rxdtd);
52
+ return(Qfalse);
53
+ }
54
+
55
+ xmlSetTreeDoc( (xmlNodePtr)rxdtd->dtd, NULL );
56
+ return( Data_Wrap_Struct(cXMLDtd, ruby_xml_dtd_mark, ruby_xml_dtd_free, rxdtd) );
57
+ break;
58
+
59
+ /*
60
+ SV *
61
+ new(CLASS, external, system)
62
+ char * CLASS
63
+ char * external
64
+ char * system
65
+ ALIAS:
66
+ parse_uri = 1
67
+ PREINIT:
68
+ xmlDtdPtr dtd = NULL;
69
+ CODE:
70
+ LibXML_error = sv_2mortal(newSVpv("", 0));
71
+ dtd = xmlParseDTD((const xmlChar*)external, (const xmlChar*)system);
72
+ if ( dtd == NULL ) {
73
+ XSRETURN_UNDEF;
74
+ }
75
+ xmlSetTreeDoc((xmlNodePtr)dtd, NULL);
76
+ RETVAL = PmmNodeToSv( (xmlNodePtr) dtd, NULL );
77
+ OUTPUT:
78
+ RETVAL
79
+ */
80
+
81
+ case 1:
82
+
83
+ rb_scan_args(argc, argv, "10", &dtd_string);
84
+ buffer = xmlAllocParserInputBuffer(enc);
85
+ //if ( !buffer) return Qnil
86
+ new_string = xmlStrdup((xmlChar*)StringValuePtr(dtd_string));
87
+ xmlParserInputBufferPush(buffer, xmlStrlen(new_string), (const char*)new_string);
88
+
89
+ rxdtd = ALLOC(ruby_xml_dtd);
90
+ rxdtd->dtd = xmlIOParseDTD(NULL, buffer, enc);
91
+
92
+ // NOTE: For some reason freeing this InputBuffer causes a segfault!
93
+ // xmlFreeParserInputBuffer(buffer);
94
+ xmlFree(new_string);
95
+
96
+ return( Data_Wrap_Struct(cXMLDtd, ruby_xml_dtd_mark, ruby_xml_dtd_free, rxdtd) );
97
+
98
+ break;
99
+ /*
100
+ SV * parse_string(CLASS, str, ...)
101
+ char * CLASS
102
+ char * str
103
+ PREINIT:
104
+ STRLEN n_a;
105
+ xmlDtdPtr res;
106
+ SV * encoding_sv;
107
+ xmlParserInputBufferPtr buffer;
108
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
109
+ xmlChar * new_string;
110
+ STRLEN len;
111
+ CODE:
112
+ LibXML_init_error();
113
+ if (items > 2) {
114
+ encoding_sv = ST(2);
115
+ if (items > 3) {
116
+ croak("parse_string: too many parameters");
117
+ }
118
+ // warn("getting encoding...\n");
119
+ enc = xmlParseCharEncoding(SvPV(encoding_sv, n_a));
120
+ if (enc == XML_CHAR_ENCODING_ERROR) {
121
+ croak("Parse of encoding %s failed: %s", SvPV(encoding_sv, n_a), SvPV(LibXML_error, n_a));
122
+ }
123
+ }
124
+ buffer = xmlAllocParserInputBuffer(enc);
125
+ // buffer = xmlParserInputBufferCreateMem(str, xmlStrlen(str), enc);
126
+ if ( !buffer)
127
+ croak("cant create buffer!\n" );
128
+
129
+ new_string = xmlStrdup((const xmlChar*)str);
130
+ xmlParserInputBufferPush(buffer, xmlStrlen(new_string), (const char*)new_string);
131
+
132
+ res = xmlIOParseDTD(NULL, buffer, enc);
133
+
134
+ // NOTE: For some reason freeing this InputBuffer causes a segfault!
135
+ // xmlFreeParserInputBuffer(buffer);
136
+ xmlFree(new_string);
137
+
138
+ sv_2mortal( LibXML_error );
139
+ LibXML_croak_error();
140
+
141
+ if (res == NULL) {
142
+ croak("no DTD parsed!");
143
+ }
144
+ RETVAL = PmmNodeToSv((xmlNodePtr)res, NULL);
145
+ OUTPUT:
146
+ RETVAL
147
+ */
148
+
149
+ default:
150
+ rb_raise(rb_eArgError, "wrong number of arguments (need 1 or 2)");
151
+ }
152
+
153
+ //docobj = ruby_xml_document_new2(cXMLDocument, xmlver);
154
+ return Qnil;
155
+ }
156
+
157
+ // Rdoc needs to know
158
+ #ifdef RDOC_NEVER_DEFINED
159
+ mXML = rb_define_module("XML");
160
+ #endif
161
+
162
+ void
163
+ ruby_init_xml_dtd(void) {
164
+ cXMLDtd = rb_define_class_under(mXML, "Dtd", rb_cObject);
165
+ rb_define_singleton_method(cXMLDtd, "new", ruby_xml_dtd_initialize, -1);
166
+ //rb_define_method(cXMLDocument, "xinclude", ruby_xml_document_xinclude, 0);
167
+ }
168
+
@@ -0,0 +1,17 @@
1
+ #ifndef __RUBY_XML_DTD__
2
+ #define __RUBY_XML_DTD__
3
+
4
+ extern VALUE cXMLDtd;
5
+
6
+ typedef struct rxp_dtd {
7
+ xmlDtdPtr dtd; /* DTD interface */
8
+ //int data_type; /* The data type referenced by *data */
9
+ //void *data; /* Pointer to an external structure of options */
10
+ //int is_ptr; /* Determines if this object owns its data or points to it someplace else */
11
+ //VALUE xmlver; /* T_STRING with the xml version */
12
+ } ruby_xml_dtd;
13
+
14
+ void ruby_init_xml_dtd(void);
15
+ void ruby_dtd_free(ruby_xml_dtd *rxdtd);
16
+
17
+ #endif
@@ -0,0 +1,449 @@
1
+ /* $Id: ruby_xml_html_parser.c 300 2008-07-01 19:14:15Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+
7
+ VALUE cXMLHTMLParser;
8
+
9
+ //static int
10
+ //ctxtRead(FILE *f, char * buf, int len) {
11
+ // return(fread(buf, 1, len, f));
12
+ //}
13
+
14
+
15
+ /*
16
+ * call-seq:
17
+ * parser.filename => "filename"
18
+ *
19
+ * Obtain the filename this parser will read from.
20
+ */
21
+ /*
22
+ VALUE
23
+ ruby_xml_html_parser_filename_get(VALUE self) {
24
+ ruby_xml_html_parser *rxp;
25
+ rx_file_data *data;
26
+
27
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
+ if (rxp->data == NULL)
29
+ return(Qnil);
30
+
31
+ if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
+ return(Qnil);
33
+
34
+ data = (rx_file_data *)rxp->data;
35
+ return(data->filename);
36
+ }
37
+ */
38
+
39
+ /*
40
+ * call-seq:
41
+ * parser.filename = "filename"
42
+ *
43
+ * Set the filename this parser will read from.
44
+ */
45
+ /*
46
+ VALUE
47
+ ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
+ ruby_xml_html_parser *rxp;
49
+ ruby_xml_parser_context *rxpc;
50
+ rx_file_data *data;
51
+
52
+ Check_Type(filename, T_STRING);
53
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
+
55
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
+ if (rxp->data != NULL)
57
+ rb_fatal("crap, this should be null");
58
+
59
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
+ data = ALLOC(rx_file_data);
61
+ rxp->data = data;
62
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
+ return(Qnil);
64
+ }
65
+
66
+ rxp->ctxt = ruby_xml_parser_context_new3();
67
+ data = (rx_file_data *)rxp->data;
68
+ data->filename = filename;
69
+
70
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
+ rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
+ if (rxpc->ctxt == NULL)
73
+ rb_sys_fail(StringValuePtr(filename));
74
+
75
+ return(data->filename);
76
+ }
77
+ */
78
+
79
+ void
80
+ ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
+ void *data;
82
+
83
+ switch(rxp->data_type) {
84
+ case RUBY_LIBXML_SRC_TYPE_NULL:
85
+ break;
86
+ case RUBY_LIBXML_SRC_TYPE_FILE:
87
+ data = (void *)(rx_file_data *)rxp->data;
88
+ ruby_xfree((rx_file_data *)data);
89
+ break;
90
+ case RUBY_LIBXML_SRC_TYPE_STRING:
91
+ data = (void *)(rx_string_data *)rxp->data;
92
+ ruby_xfree((rx_string_data *)data);
93
+ break;
94
+ case RUBY_LIBXML_SRC_TYPE_IO:
95
+ data = (void *)(rx_io_data *)rxp->data;
96
+ ruby_xfree((rx_io_data *)data);
97
+ break;
98
+ default:
99
+ rb_fatal("Unknown data type, %d", rxp->data_type);
100
+ }
101
+
102
+ ruby_xfree(rxp);
103
+ }
104
+
105
+
106
+ /*
107
+ * call-seq:
108
+ * parser.io => IO
109
+ *
110
+ * Obtain the IO instance this parser works with.
111
+ */
112
+ /*
113
+ VALUE
114
+ ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
+ ruby_xml_html_parser *rxp;
116
+ rx_io_data *data;
117
+
118
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
+
120
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
+ rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
+ rxp->data == NULL)
123
+ return(Qnil);
124
+
125
+ data = (rx_io_data *)rxp->data;
126
+
127
+ return(data->io);
128
+ }
129
+ */
130
+
131
+ /*
132
+ * call-seq:
133
+ * parser.io = IO
134
+ *
135
+ * Set the IO instance this parser works with.
136
+ */
137
+ /*
138
+ VALUE
139
+ ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
+ ruby_xml_html_parser *rxp;
141
+ ruby_xml_parser_context *rxpc;
142
+ rx_io_data *data;
143
+ OpenFile *fptr;
144
+ FILE *f;
145
+
146
+ if (!rb_obj_is_kind_of(io, rb_cIO))
147
+ rb_raise(rb_eTypeError, "need an IO object");
148
+
149
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
+
151
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
+ if (rxp->data != NULL)
153
+ rb_fatal("crap, this should be null");
154
+
155
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
+ data = ALLOC(rx_io_data);
157
+ rxp->data = data;
158
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
+ return(Qnil);
160
+ }
161
+
162
+ rxp->ctxt = ruby_xml_parser_context_new3();
163
+ data = (rx_io_data *)rxp->data;
164
+ data->io = io;
165
+
166
+ GetOpenFile(io, fptr);
167
+ rb_io_check_readable(fptr);
168
+ f = GetWriteFile(fptr);
169
+
170
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
+ rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
+ (xmlInputReadCallback) ctxtRead,
173
+ NULL, f, XML_CHAR_ENCODING_NONE);
174
+ if (NIL_P(rxpc->ctxt))
175
+ rb_sys_fail(0);
176
+
177
+ return(data->io);
178
+ }
179
+ */
180
+
181
+ void
182
+ ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
+ if (rxp == NULL) return;
184
+ if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
185
+
186
+ ruby_xml_state_marker();
187
+
188
+ switch(rxp->data_type) {
189
+ case RUBY_LIBXML_SRC_TYPE_NULL:
190
+ break;
191
+ case RUBY_LIBXML_SRC_TYPE_FILE:
192
+ if (!NIL_P(((rx_file_data *)rxp->data)->filename))
193
+ rb_gc_mark(((rx_file_data *)rxp->data)->filename);
194
+ break;
195
+ case RUBY_LIBXML_SRC_TYPE_STRING:
196
+ if (!NIL_P(((rx_string_data *)rxp->data)->str))
197
+ rb_gc_mark(((rx_string_data *)rxp->data)->str);
198
+ break;
199
+ case RUBY_LIBXML_SRC_TYPE_IO:
200
+ if (!NIL_P(((rx_io_data *)rxp->data)->io))
201
+ rb_gc_mark(((rx_io_data *)rxp->data)->io);
202
+ break;
203
+ default:
204
+ rb_fatal("unknown datatype: %d", rxp->data_type);
205
+ }
206
+ }
207
+
208
+
209
+ /*
210
+ * call-seq:
211
+ * XML::HTMLParser.new => parser
212
+ *
213
+ * Create a new parser instance with no pre-determined source.
214
+ */
215
+ VALUE
216
+ ruby_xml_html_parser_new(VALUE class) {
217
+ ruby_xml_html_parser *rxp;
218
+
219
+ rxp = ALLOC(ruby_xml_html_parser);
220
+ rxp->ctxt = Qnil;
221
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
222
+ rxp->data = NULL;
223
+ rxp->parsed = 0;
224
+
225
+ return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
226
+ ruby_xml_html_parser_free, rxp));
227
+ }
228
+
229
+
230
+ /*
231
+ * call-seq:
232
+ * XML::HTMLParser.file => parser
233
+ *
234
+ * Create a new parser instance that will read the specified file.
235
+ */
236
+ /*
237
+ VALUE
238
+ ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
239
+ VALUE obj;
240
+ ruby_xml_html_parser *rxp;
241
+ rx_file_data *data;
242
+
243
+ obj = ruby_xml_html_parser_new(class);
244
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
245
+
246
+ data = ALLOC(rx_file_data);
247
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
248
+ rxp->data = data;
249
+
250
+ ruby_xml_html_parser_filename_set(obj, filename);
251
+
252
+ return(obj);
253
+ }
254
+ */
255
+
256
+ /*
257
+ * call-seq:
258
+ * XML::HTMLParser.io => parser
259
+ *
260
+ * Create a new parser instance that will read from the
261
+ * specified IO object.
262
+ */
263
+ /*
264
+ VALUE
265
+ ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
266
+ VALUE obj;
267
+ ruby_xml_html_parser *rxp;
268
+ rx_io_data *data;
269
+
270
+ obj = ruby_xml_html_parser_new(class);
271
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
272
+
273
+ data = ALLOC(rx_io_data);
274
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
275
+ rxp->data = data;
276
+
277
+ ruby_xml_html_parser_io_set(obj, io);
278
+
279
+ return(obj);
280
+ }
281
+ */
282
+
283
+ /*
284
+ * call-seq:
285
+ * XML::HTMLParser.string => parser
286
+ *
287
+ * Create a new parser instance that will parse the given
288
+ * string.
289
+ */
290
+ VALUE
291
+ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
292
+ VALUE obj;
293
+ ruby_xml_html_parser *rxp;
294
+ rx_string_data *data;
295
+
296
+ obj = ruby_xml_html_parser_new(class);
297
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
298
+
299
+ data = ALLOC(rx_string_data);
300
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
301
+ rxp->data = data;
302
+
303
+ ruby_xml_html_parser_str_set(obj, str);
304
+
305
+ return(obj);
306
+ }
307
+
308
+
309
+ /*
310
+ * call-seq:
311
+ * parser.parse => document
312
+ *
313
+ * Parse the input XML and create an XML::Document with
314
+ * it's content. If an error occurs, XML::Parser::ParseError
315
+ * is thrown.
316
+ */
317
+ VALUE
318
+ ruby_xml_html_parser_parse(VALUE self) {
319
+ ruby_xml_html_parser *rxp;
320
+ ruby_xml_parser_context *rxpc;
321
+ htmlDocPtr xdp;
322
+ VALUE doc;
323
+
324
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
325
+
326
+ switch (rxp->data_type) {
327
+ case RUBY_LIBXML_SRC_TYPE_NULL:
328
+ return(Qnil);
329
+ case RUBY_LIBXML_SRC_TYPE_STRING:
330
+ //case RUBY_LIBXML_SRC_TYPE_FILE:
331
+ //case RUBY_LIBXML_SRC_TYPE_IO:
332
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
333
+
334
+ /* don't check return values here, the HTML parser returns errors
335
+ * but still allows the resulting tree to be used.
336
+ */
337
+ htmlParseDocument(rxpc->ctxt);
338
+ xdp = rxpc->ctxt->myDoc;
339
+ rxp->parsed = 1;
340
+
341
+ doc = ruby_xml_document_wrap(xdp);
342
+ break;
343
+ default:
344
+ rb_fatal("Unknown data type, %d", rxp->data_type);
345
+ }
346
+
347
+ return(doc);
348
+ }
349
+
350
+
351
+ /*
352
+ * call-seq:
353
+ * parser.context => context
354
+ *
355
+ * Obtain the XML::Parser::Context associated with this
356
+ * parser.
357
+ */
358
+ VALUE
359
+ ruby_xml_html_parser_context_get(VALUE self) {
360
+ ruby_xml_html_parser *rxp;
361
+
362
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
363
+ if (rxp->ctxt == Qnil)
364
+ return(Qnil);
365
+ else
366
+ return(rxp->ctxt);
367
+ }
368
+
369
+
370
+ /*
371
+ * call-seq:
372
+ * parser.string => "string"
373
+ *
374
+ * Obtain the string this parser works with.
375
+ */
376
+ VALUE
377
+ ruby_xml_html_parser_str_get(VALUE self) {
378
+ ruby_xml_html_parser *rxp;
379
+ rx_string_data *data;
380
+
381
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
382
+ if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
383
+ return(Qnil);
384
+
385
+ data = (rx_string_data *)rxp->data;
386
+ return(data->str);
387
+ }
388
+
389
+
390
+ /*
391
+ * call-seq:
392
+ * parser.string = "string"
393
+ *
394
+ * Set the string this parser works with.
395
+ */
396
+ VALUE
397
+ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
398
+ ruby_xml_html_parser *rxp;
399
+ ruby_xml_parser_context *rxpc;
400
+ rx_string_data *data;
401
+
402
+ Check_Type(str, T_STRING);
403
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
404
+
405
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
406
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
407
+ data = ALLOC(rx_string_data);
408
+ rxp->data = data;
409
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
410
+ return(Qnil);
411
+ }
412
+
413
+ rxp->ctxt = ruby_xml_parser_context_new3();
414
+ data = (rx_string_data *)rxp->data;
415
+ data->str = str;
416
+
417
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
418
+ rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
419
+
420
+ return(data->str);
421
+ }
422
+
423
+
424
+ // Rdoc needs to know
425
+ #ifdef RDOC_NEVER_DEFINED
426
+ mXML = rb_define_module("XML");
427
+ #endif
428
+
429
+ void
430
+ ruby_init_html_parser(void) {
431
+ cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
432
+
433
+ /*
434
+ rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
435
+ rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
436
+ */
437
+ rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
438
+ rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
439
+ /*
440
+ rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
+ rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
+ rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
+ rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
+ */
445
+ rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
+ rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
+ rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
+ rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
+ }