libxml-ruby 0.6.0-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. data/CHANGES +122 -0
  2. data/LICENSE +23 -0
  3. data/README +144 -0
  4. data/ext/libxml/cbg.c +76 -0
  5. data/ext/libxml/extconf.rb +308 -0
  6. data/ext/libxml/libxml.c +62 -0
  7. data/ext/libxml/ruby_libxml.h +93 -0
  8. data/ext/libxml/ruby_xml_attr.c +405 -0
  9. data/ext/libxml/ruby_xml_attr.h +19 -0
  10. data/ext/libxml/ruby_xml_document.c +1111 -0
  11. data/ext/libxml/ruby_xml_document.h +27 -0
  12. data/ext/libxml/ruby_xml_dtd.c +168 -0
  13. data/ext/libxml/ruby_xml_dtd.h +17 -0
  14. data/ext/libxml/ruby_xml_html_parser.c +449 -0
  15. data/ext/libxml/ruby_xml_html_parser.h +29 -0
  16. data/ext/libxml/ruby_xml_input_cbg.c +158 -0
  17. data/ext/libxml/ruby_xml_input_cbg.h +20 -0
  18. data/ext/libxml/ruby_xml_node.c +2410 -0
  19. data/ext/libxml/ruby_xml_node.h +27 -0
  20. data/ext/libxml/ruby_xml_node_set.c +170 -0
  21. data/ext/libxml/ruby_xml_node_set.h +20 -0
  22. data/ext/libxml/ruby_xml_ns.c +153 -0
  23. data/ext/libxml/ruby_xml_ns.h +21 -0
  24. data/ext/libxml/ruby_xml_parser.c +1425 -0
  25. data/ext/libxml/ruby_xml_parser.h +31 -0
  26. data/ext/libxml/ruby_xml_parser_context.c +750 -0
  27. data/ext/libxml/ruby_xml_parser_context.h +22 -0
  28. data/ext/libxml/ruby_xml_reader.c +900 -0
  29. data/ext/libxml/ruby_xml_reader.h +14 -0
  30. data/ext/libxml/ruby_xml_sax_parser.c +485 -0
  31. data/ext/libxml/ruby_xml_sax_parser.h +56 -0
  32. data/ext/libxml/ruby_xml_schema.c +146 -0
  33. data/ext/libxml/ruby_xml_schema.h +16 -0
  34. data/ext/libxml/ruby_xml_state.c +113 -0
  35. data/ext/libxml/ruby_xml_state.h +12 -0
  36. data/ext/libxml/ruby_xml_tree.c +43 -0
  37. data/ext/libxml/ruby_xml_tree.h +12 -0
  38. data/ext/libxml/ruby_xml_xinclude.c +20 -0
  39. data/ext/libxml/ruby_xml_xinclude.h +13 -0
  40. data/ext/libxml/ruby_xml_xpath.c +243 -0
  41. data/ext/libxml/ruby_xml_xpath.h +23 -0
  42. data/ext/libxml/ruby_xml_xpath_context.c +118 -0
  43. data/ext/libxml/ruby_xml_xpath_context.h +20 -0
  44. data/ext/libxml/ruby_xml_xpath_object.c +293 -0
  45. data/ext/libxml/ruby_xml_xpath_object.h +28 -0
  46. data/ext/libxml/ruby_xml_xpointer.c +100 -0
  47. data/ext/libxml/ruby_xml_xpointer.h +27 -0
  48. data/ext/libxml/ruby_xml_xpointer_context.c +21 -0
  49. data/ext/libxml/ruby_xml_xpointer_context.h +18 -0
  50. data/ext/libxml/sax_parser_callbacks.inc +213 -0
  51. data/ext/libxml/version.h +9 -0
  52. data/lib/libxml.rb +125 -0
  53. data/lib/libxml_ruby.so +0 -0
  54. data/lib/xml/libxml.rb +5 -0
  55. data/mingw/libiconv-2.dll +0 -0
  56. data/mingw/libxml2-2.dll +0 -0
  57. data/mingw/libxml_ruby.so +0 -0
  58. data/mingw/mingw.rake +36 -0
  59. data/test/dtd-test.rb +24 -0
  60. data/test/etc_doc_to_s.rb +19 -0
  61. data/test/ets_copy_bug.rb +21 -0
  62. data/test/ets_copy_bug2.rb +32 -0
  63. data/test/ets_copy_bug3.rb +38 -0
  64. data/test/ets_doc_file.rb +15 -0
  65. data/test/ets_doc_to_s.rb +21 -0
  66. data/test/ets_gpx.rb +26 -0
  67. data/test/ets_node_gc.rb +21 -0
  68. data/test/ets_test.xml +2 -0
  69. data/test/ets_tsr.rb +9 -0
  70. data/test/gc.log +0 -0
  71. data/test/merge_bug.rb +55 -0
  72. data/test/schema-test.rb +74 -0
  73. data/test/tc_well_formed.rb +11 -0
  74. data/test/tc_xml_document.rb +52 -0
  75. data/test/tc_xml_document_write.rb +24 -0
  76. data/test/tc_xml_document_write2.rb +54 -0
  77. data/test/tc_xml_document_write3.rb +96 -0
  78. data/test/tc_xml_html_parser.rb +63 -0
  79. data/test/tc_xml_node.rb +59 -0
  80. data/test/tc_xml_node2.rb +25 -0
  81. data/test/tc_xml_node3.rb +27 -0
  82. data/test/tc_xml_node4.rb +86 -0
  83. data/test/tc_xml_node5.rb +52 -0
  84. data/test/tc_xml_node6.rb +27 -0
  85. data/test/tc_xml_node7.rb +35 -0
  86. data/test/tc_xml_node8.rb +32 -0
  87. data/test/tc_xml_node9.rb +32 -0
  88. data/test/tc_xml_node_copy.rb +40 -0
  89. data/test/tc_xml_node_set.rb +24 -0
  90. data/test/tc_xml_node_set2.rb +37 -0
  91. data/test/tc_xml_node_text.rb +17 -0
  92. data/test/tc_xml_node_xlink.rb +28 -0
  93. data/test/tc_xml_parser.rb +190 -0
  94. data/test/tc_xml_parser2.rb +16 -0
  95. data/test/tc_xml_parser3.rb +23 -0
  96. data/test/tc_xml_parser4.rb +33 -0
  97. data/test/tc_xml_parser5.rb +27 -0
  98. data/test/tc_xml_parser6.rb +23 -0
  99. data/test/tc_xml_parser7.rb +28 -0
  100. data/test/tc_xml_parser8.rb +32 -0
  101. data/test/tc_xml_parser9.rb +11 -0
  102. data/test/tc_xml_parser_context.rb +88 -0
  103. data/test/tc_xml_reader.rb +112 -0
  104. data/test/tc_xml_sax_parser.rb +104 -0
  105. data/test/tc_xml_sax_parser2.rb +51 -0
  106. data/test/tc_xml_xinclude.rb +30 -0
  107. data/test/tc_xml_xpath.rb +38 -0
  108. data/test/tc_xml_xpath2.rb +14 -0
  109. data/test/tc_xml_xpointer.rb +78 -0
  110. data/vc/libxml.sln +20 -0
  111. data/vc/libxml.vcproj +389 -0
  112. data/work/Rakefile +247 -0
  113. data/work/task/make +26 -0
  114. data/work/task/memory +37 -0
  115. data/work/task/rdoc +39 -0
  116. data/work/task/setup +1616 -0
  117. data/work/task/test +29 -0
  118. data/work/test/ets_runner.rb +33 -0
  119. data/work/test/libxml_test.rb +3 -0
  120. data/work/test/runner.rb +0 -0
  121. data/work/test/runner_ets.rb +33 -0
  122. data/work/vc/debug/libxml.exp +0 -0
  123. data/work/vc/debug/libxml.ilk +0 -0
  124. data/work/vc/debug/libxml.lib +0 -0
  125. data/work/vc/debug/libxml.pdb +0 -0
  126. data/work/vc/debug/libxml.so +0 -0
  127. metadata +224 -0
@@ -0,0 +1,27 @@
1
+ /* $Id: ruby_xml_document.h 304 2008-07-02 17:42:41Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RUBY_XML_DOCUMENT__
6
+ #define __RUBY_XML_DOCUMENT__
7
+
8
+ extern VALUE cXMLDocument;
9
+
10
+ typedef struct rxp_document {
11
+ xmlDocPtr doc; /* Tree/DOM interface */
12
+ int data_type; /* The data type referenced by *data */
13
+ void *data; /* Pointer to an external structure of options */
14
+ } ruby_xml_document_t;
15
+
16
+ VALUE ruby_xml_document_filename_get(VALUE self);
17
+ VALUE ruby_xml_document_new_native(VALUE class, VALUE xmlver);
18
+ void ruby_xml_document_free(ruby_xml_document_t *rxd);
19
+ VALUE ruby_xml_document_root_get(VALUE self);
20
+ void ruby_init_xml_document(void);
21
+
22
+ #if defined(_WIN32)
23
+ __declspec(dllexport)
24
+ #endif
25
+ VALUE ruby_xml_document_wrap(xmlDocPtr xnode);
26
+
27
+ #endif
@@ -0,0 +1,168 @@
1
+ #include "ruby_libxml.h"
2
+ #include "ruby_xml_dtd.h"
3
+
4
+ VALUE cXMLDtd;
5
+
6
+ void
7
+ ruby_xml_dtd_free(ruby_xml_dtd *rxdtd) {
8
+ if (rxdtd->dtd != NULL) {
9
+ xmlFreeDtd(rxdtd->dtd);
10
+ rxdtd->dtd = NULL;
11
+ }
12
+
13
+ ruby_xfree(rxdtd);
14
+ }
15
+
16
+ static void
17
+ ruby_xml_dtd_mark(ruby_xml_dtd *rxdtd) {
18
+ return;
19
+ //if (rxdtd == NULL) return;
20
+ //if (!NIL_P(rxd->xmlver)) rb_gc_mark(rxd->xmlver);
21
+ }
22
+
23
+ /*
24
+ * call-seq:
25
+ * XML::Dtd.new("public system") => dtd
26
+ * XML::Dtd.new("public", "system") => dtd
27
+ *
28
+ * Create a new Dtd from the specified public and system
29
+ * identifiers.
30
+ */
31
+ VALUE
32
+ ruby_xml_dtd_initialize(int argc, VALUE *argv, VALUE class) {
33
+ ruby_xml_dtd *rxdtd;
34
+ VALUE external, system, dtd_string;
35
+ xmlParserInputBufferPtr buffer;
36
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
37
+ xmlChar *new_string;
38
+
39
+ // 1 argument -- string --> parsujeme jako dtd
40
+ // 2 argumenty -- public, system --> bude se hledat
41
+ switch (argc) {
42
+ case 2:
43
+ rb_scan_args(argc, argv, "20", &external, &system);
44
+
45
+ Check_Type(external, T_STRING);
46
+ Check_Type(system, T_STRING);
47
+ rxdtd = ALLOC(ruby_xml_dtd);
48
+ rxdtd->dtd = xmlParseDTD( (xmlChar*)StringValuePtr(external),
49
+ (xmlChar*)StringValuePtr(system) );
50
+ if (rxdtd->dtd == NULL) {
51
+ ruby_xfree(rxdtd);
52
+ return(Qfalse);
53
+ }
54
+
55
+ xmlSetTreeDoc( (xmlNodePtr)rxdtd->dtd, NULL );
56
+ return( Data_Wrap_Struct(cXMLDtd, ruby_xml_dtd_mark, ruby_xml_dtd_free, rxdtd) );
57
+ break;
58
+
59
+ /*
60
+ SV *
61
+ new(CLASS, external, system)
62
+ char * CLASS
63
+ char * external
64
+ char * system
65
+ ALIAS:
66
+ parse_uri = 1
67
+ PREINIT:
68
+ xmlDtdPtr dtd = NULL;
69
+ CODE:
70
+ LibXML_error = sv_2mortal(newSVpv("", 0));
71
+ dtd = xmlParseDTD((const xmlChar*)external, (const xmlChar*)system);
72
+ if ( dtd == NULL ) {
73
+ XSRETURN_UNDEF;
74
+ }
75
+ xmlSetTreeDoc((xmlNodePtr)dtd, NULL);
76
+ RETVAL = PmmNodeToSv( (xmlNodePtr) dtd, NULL );
77
+ OUTPUT:
78
+ RETVAL
79
+ */
80
+
81
+ case 1:
82
+
83
+ rb_scan_args(argc, argv, "10", &dtd_string);
84
+ buffer = xmlAllocParserInputBuffer(enc);
85
+ //if ( !buffer) return Qnil
86
+ new_string = xmlStrdup((xmlChar*)StringValuePtr(dtd_string));
87
+ xmlParserInputBufferPush(buffer, xmlStrlen(new_string), (const char*)new_string);
88
+
89
+ rxdtd = ALLOC(ruby_xml_dtd);
90
+ rxdtd->dtd = xmlIOParseDTD(NULL, buffer, enc);
91
+
92
+ // NOTE: For some reason freeing this InputBuffer causes a segfault!
93
+ // xmlFreeParserInputBuffer(buffer);
94
+ xmlFree(new_string);
95
+
96
+ return( Data_Wrap_Struct(cXMLDtd, ruby_xml_dtd_mark, ruby_xml_dtd_free, rxdtd) );
97
+
98
+ break;
99
+ /*
100
+ SV * parse_string(CLASS, str, ...)
101
+ char * CLASS
102
+ char * str
103
+ PREINIT:
104
+ STRLEN n_a;
105
+ xmlDtdPtr res;
106
+ SV * encoding_sv;
107
+ xmlParserInputBufferPtr buffer;
108
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
109
+ xmlChar * new_string;
110
+ STRLEN len;
111
+ CODE:
112
+ LibXML_init_error();
113
+ if (items > 2) {
114
+ encoding_sv = ST(2);
115
+ if (items > 3) {
116
+ croak("parse_string: too many parameters");
117
+ }
118
+ // warn("getting encoding...\n");
119
+ enc = xmlParseCharEncoding(SvPV(encoding_sv, n_a));
120
+ if (enc == XML_CHAR_ENCODING_ERROR) {
121
+ croak("Parse of encoding %s failed: %s", SvPV(encoding_sv, n_a), SvPV(LibXML_error, n_a));
122
+ }
123
+ }
124
+ buffer = xmlAllocParserInputBuffer(enc);
125
+ // buffer = xmlParserInputBufferCreateMem(str, xmlStrlen(str), enc);
126
+ if ( !buffer)
127
+ croak("cant create buffer!\n" );
128
+
129
+ new_string = xmlStrdup((const xmlChar*)str);
130
+ xmlParserInputBufferPush(buffer, xmlStrlen(new_string), (const char*)new_string);
131
+
132
+ res = xmlIOParseDTD(NULL, buffer, enc);
133
+
134
+ // NOTE: For some reason freeing this InputBuffer causes a segfault!
135
+ // xmlFreeParserInputBuffer(buffer);
136
+ xmlFree(new_string);
137
+
138
+ sv_2mortal( LibXML_error );
139
+ LibXML_croak_error();
140
+
141
+ if (res == NULL) {
142
+ croak("no DTD parsed!");
143
+ }
144
+ RETVAL = PmmNodeToSv((xmlNodePtr)res, NULL);
145
+ OUTPUT:
146
+ RETVAL
147
+ */
148
+
149
+ default:
150
+ rb_raise(rb_eArgError, "wrong number of arguments (need 1 or 2)");
151
+ }
152
+
153
+ //docobj = ruby_xml_document_new2(cXMLDocument, xmlver);
154
+ return Qnil;
155
+ }
156
+
157
+ // Rdoc needs to know
158
+ #ifdef RDOC_NEVER_DEFINED
159
+ mXML = rb_define_module("XML");
160
+ #endif
161
+
162
+ void
163
+ ruby_init_xml_dtd(void) {
164
+ cXMLDtd = rb_define_class_under(mXML, "Dtd", rb_cObject);
165
+ rb_define_singleton_method(cXMLDtd, "new", ruby_xml_dtd_initialize, -1);
166
+ //rb_define_method(cXMLDocument, "xinclude", ruby_xml_document_xinclude, 0);
167
+ }
168
+
@@ -0,0 +1,17 @@
1
+ #ifndef __RUBY_XML_DTD__
2
+ #define __RUBY_XML_DTD__
3
+
4
+ extern VALUE cXMLDtd;
5
+
6
+ typedef struct rxp_dtd {
7
+ xmlDtdPtr dtd; /* DTD interface */
8
+ //int data_type; /* The data type referenced by *data */
9
+ //void *data; /* Pointer to an external structure of options */
10
+ //int is_ptr; /* Determines if this object owns its data or points to it someplace else */
11
+ //VALUE xmlver; /* T_STRING with the xml version */
12
+ } ruby_xml_dtd;
13
+
14
+ void ruby_init_xml_dtd(void);
15
+ void ruby_dtd_free(ruby_xml_dtd *rxdtd);
16
+
17
+ #endif
@@ -0,0 +1,449 @@
1
+ /* $Id: ruby_xml_html_parser.c 300 2008-07-01 19:14:15Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+
7
+ VALUE cXMLHTMLParser;
8
+
9
+ //static int
10
+ //ctxtRead(FILE *f, char * buf, int len) {
11
+ // return(fread(buf, 1, len, f));
12
+ //}
13
+
14
+
15
+ /*
16
+ * call-seq:
17
+ * parser.filename => "filename"
18
+ *
19
+ * Obtain the filename this parser will read from.
20
+ */
21
+ /*
22
+ VALUE
23
+ ruby_xml_html_parser_filename_get(VALUE self) {
24
+ ruby_xml_html_parser *rxp;
25
+ rx_file_data *data;
26
+
27
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
+ if (rxp->data == NULL)
29
+ return(Qnil);
30
+
31
+ if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
+ return(Qnil);
33
+
34
+ data = (rx_file_data *)rxp->data;
35
+ return(data->filename);
36
+ }
37
+ */
38
+
39
+ /*
40
+ * call-seq:
41
+ * parser.filename = "filename"
42
+ *
43
+ * Set the filename this parser will read from.
44
+ */
45
+ /*
46
+ VALUE
47
+ ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
+ ruby_xml_html_parser *rxp;
49
+ ruby_xml_parser_context *rxpc;
50
+ rx_file_data *data;
51
+
52
+ Check_Type(filename, T_STRING);
53
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
+
55
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
+ if (rxp->data != NULL)
57
+ rb_fatal("crap, this should be null");
58
+
59
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
+ data = ALLOC(rx_file_data);
61
+ rxp->data = data;
62
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
+ return(Qnil);
64
+ }
65
+
66
+ rxp->ctxt = ruby_xml_parser_context_new3();
67
+ data = (rx_file_data *)rxp->data;
68
+ data->filename = filename;
69
+
70
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
+ rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
+ if (rxpc->ctxt == NULL)
73
+ rb_sys_fail(StringValuePtr(filename));
74
+
75
+ return(data->filename);
76
+ }
77
+ */
78
+
79
+ void
80
+ ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
+ void *data;
82
+
83
+ switch(rxp->data_type) {
84
+ case RUBY_LIBXML_SRC_TYPE_NULL:
85
+ break;
86
+ case RUBY_LIBXML_SRC_TYPE_FILE:
87
+ data = (void *)(rx_file_data *)rxp->data;
88
+ ruby_xfree((rx_file_data *)data);
89
+ break;
90
+ case RUBY_LIBXML_SRC_TYPE_STRING:
91
+ data = (void *)(rx_string_data *)rxp->data;
92
+ ruby_xfree((rx_string_data *)data);
93
+ break;
94
+ case RUBY_LIBXML_SRC_TYPE_IO:
95
+ data = (void *)(rx_io_data *)rxp->data;
96
+ ruby_xfree((rx_io_data *)data);
97
+ break;
98
+ default:
99
+ rb_fatal("Unknown data type, %d", rxp->data_type);
100
+ }
101
+
102
+ ruby_xfree(rxp);
103
+ }
104
+
105
+
106
+ /*
107
+ * call-seq:
108
+ * parser.io => IO
109
+ *
110
+ * Obtain the IO instance this parser works with.
111
+ */
112
+ /*
113
+ VALUE
114
+ ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
+ ruby_xml_html_parser *rxp;
116
+ rx_io_data *data;
117
+
118
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
+
120
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
+ rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
+ rxp->data == NULL)
123
+ return(Qnil);
124
+
125
+ data = (rx_io_data *)rxp->data;
126
+
127
+ return(data->io);
128
+ }
129
+ */
130
+
131
+ /*
132
+ * call-seq:
133
+ * parser.io = IO
134
+ *
135
+ * Set the IO instance this parser works with.
136
+ */
137
+ /*
138
+ VALUE
139
+ ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
+ ruby_xml_html_parser *rxp;
141
+ ruby_xml_parser_context *rxpc;
142
+ rx_io_data *data;
143
+ OpenFile *fptr;
144
+ FILE *f;
145
+
146
+ if (!rb_obj_is_kind_of(io, rb_cIO))
147
+ rb_raise(rb_eTypeError, "need an IO object");
148
+
149
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
+
151
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
+ if (rxp->data != NULL)
153
+ rb_fatal("crap, this should be null");
154
+
155
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
+ data = ALLOC(rx_io_data);
157
+ rxp->data = data;
158
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
+ return(Qnil);
160
+ }
161
+
162
+ rxp->ctxt = ruby_xml_parser_context_new3();
163
+ data = (rx_io_data *)rxp->data;
164
+ data->io = io;
165
+
166
+ GetOpenFile(io, fptr);
167
+ rb_io_check_readable(fptr);
168
+ f = GetWriteFile(fptr);
169
+
170
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
+ rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
172
+ (xmlInputReadCallback) ctxtRead,
173
+ NULL, f, XML_CHAR_ENCODING_NONE);
174
+ if (NIL_P(rxpc->ctxt))
175
+ rb_sys_fail(0);
176
+
177
+ return(data->io);
178
+ }
179
+ */
180
+
181
+ void
182
+ ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
+ if (rxp == NULL) return;
184
+ if (!NIL_P(rxp->ctxt)) rb_gc_mark(rxp->ctxt);
185
+
186
+ ruby_xml_state_marker();
187
+
188
+ switch(rxp->data_type) {
189
+ case RUBY_LIBXML_SRC_TYPE_NULL:
190
+ break;
191
+ case RUBY_LIBXML_SRC_TYPE_FILE:
192
+ if (!NIL_P(((rx_file_data *)rxp->data)->filename))
193
+ rb_gc_mark(((rx_file_data *)rxp->data)->filename);
194
+ break;
195
+ case RUBY_LIBXML_SRC_TYPE_STRING:
196
+ if (!NIL_P(((rx_string_data *)rxp->data)->str))
197
+ rb_gc_mark(((rx_string_data *)rxp->data)->str);
198
+ break;
199
+ case RUBY_LIBXML_SRC_TYPE_IO:
200
+ if (!NIL_P(((rx_io_data *)rxp->data)->io))
201
+ rb_gc_mark(((rx_io_data *)rxp->data)->io);
202
+ break;
203
+ default:
204
+ rb_fatal("unknown datatype: %d", rxp->data_type);
205
+ }
206
+ }
207
+
208
+
209
+ /*
210
+ * call-seq:
211
+ * XML::HTMLParser.new => parser
212
+ *
213
+ * Create a new parser instance with no pre-determined source.
214
+ */
215
+ VALUE
216
+ ruby_xml_html_parser_new(VALUE class) {
217
+ ruby_xml_html_parser *rxp;
218
+
219
+ rxp = ALLOC(ruby_xml_html_parser);
220
+ rxp->ctxt = Qnil;
221
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
222
+ rxp->data = NULL;
223
+ rxp->parsed = 0;
224
+
225
+ return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
226
+ ruby_xml_html_parser_free, rxp));
227
+ }
228
+
229
+
230
+ /*
231
+ * call-seq:
232
+ * XML::HTMLParser.file => parser
233
+ *
234
+ * Create a new parser instance that will read the specified file.
235
+ */
236
+ /*
237
+ VALUE
238
+ ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
239
+ VALUE obj;
240
+ ruby_xml_html_parser *rxp;
241
+ rx_file_data *data;
242
+
243
+ obj = ruby_xml_html_parser_new(class);
244
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
245
+
246
+ data = ALLOC(rx_file_data);
247
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
248
+ rxp->data = data;
249
+
250
+ ruby_xml_html_parser_filename_set(obj, filename);
251
+
252
+ return(obj);
253
+ }
254
+ */
255
+
256
+ /*
257
+ * call-seq:
258
+ * XML::HTMLParser.io => parser
259
+ *
260
+ * Create a new parser instance that will read from the
261
+ * specified IO object.
262
+ */
263
+ /*
264
+ VALUE
265
+ ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
266
+ VALUE obj;
267
+ ruby_xml_html_parser *rxp;
268
+ rx_io_data *data;
269
+
270
+ obj = ruby_xml_html_parser_new(class);
271
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
272
+
273
+ data = ALLOC(rx_io_data);
274
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
275
+ rxp->data = data;
276
+
277
+ ruby_xml_html_parser_io_set(obj, io);
278
+
279
+ return(obj);
280
+ }
281
+ */
282
+
283
+ /*
284
+ * call-seq:
285
+ * XML::HTMLParser.string => parser
286
+ *
287
+ * Create a new parser instance that will parse the given
288
+ * string.
289
+ */
290
+ VALUE
291
+ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
292
+ VALUE obj;
293
+ ruby_xml_html_parser *rxp;
294
+ rx_string_data *data;
295
+
296
+ obj = ruby_xml_html_parser_new(class);
297
+ Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
298
+
299
+ data = ALLOC(rx_string_data);
300
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
301
+ rxp->data = data;
302
+
303
+ ruby_xml_html_parser_str_set(obj, str);
304
+
305
+ return(obj);
306
+ }
307
+
308
+
309
+ /*
310
+ * call-seq:
311
+ * parser.parse => document
312
+ *
313
+ * Parse the input XML and create an XML::Document with
314
+ * it's content. If an error occurs, XML::Parser::ParseError
315
+ * is thrown.
316
+ */
317
+ VALUE
318
+ ruby_xml_html_parser_parse(VALUE self) {
319
+ ruby_xml_html_parser *rxp;
320
+ ruby_xml_parser_context *rxpc;
321
+ htmlDocPtr xdp;
322
+ VALUE doc;
323
+
324
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
325
+
326
+ switch (rxp->data_type) {
327
+ case RUBY_LIBXML_SRC_TYPE_NULL:
328
+ return(Qnil);
329
+ case RUBY_LIBXML_SRC_TYPE_STRING:
330
+ //case RUBY_LIBXML_SRC_TYPE_FILE:
331
+ //case RUBY_LIBXML_SRC_TYPE_IO:
332
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
333
+
334
+ /* don't check return values here, the HTML parser returns errors
335
+ * but still allows the resulting tree to be used.
336
+ */
337
+ htmlParseDocument(rxpc->ctxt);
338
+ xdp = rxpc->ctxt->myDoc;
339
+ rxp->parsed = 1;
340
+
341
+ doc = ruby_xml_document_wrap(xdp);
342
+ break;
343
+ default:
344
+ rb_fatal("Unknown data type, %d", rxp->data_type);
345
+ }
346
+
347
+ return(doc);
348
+ }
349
+
350
+
351
+ /*
352
+ * call-seq:
353
+ * parser.context => context
354
+ *
355
+ * Obtain the XML::Parser::Context associated with this
356
+ * parser.
357
+ */
358
+ VALUE
359
+ ruby_xml_html_parser_context_get(VALUE self) {
360
+ ruby_xml_html_parser *rxp;
361
+
362
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
363
+ if (rxp->ctxt == Qnil)
364
+ return(Qnil);
365
+ else
366
+ return(rxp->ctxt);
367
+ }
368
+
369
+
370
+ /*
371
+ * call-seq:
372
+ * parser.string => "string"
373
+ *
374
+ * Obtain the string this parser works with.
375
+ */
376
+ VALUE
377
+ ruby_xml_html_parser_str_get(VALUE self) {
378
+ ruby_xml_html_parser *rxp;
379
+ rx_string_data *data;
380
+
381
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
382
+ if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
383
+ return(Qnil);
384
+
385
+ data = (rx_string_data *)rxp->data;
386
+ return(data->str);
387
+ }
388
+
389
+
390
+ /*
391
+ * call-seq:
392
+ * parser.string = "string"
393
+ *
394
+ * Set the string this parser works with.
395
+ */
396
+ VALUE
397
+ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
398
+ ruby_xml_html_parser *rxp;
399
+ ruby_xml_parser_context *rxpc;
400
+ rx_string_data *data;
401
+
402
+ Check_Type(str, T_STRING);
403
+ Data_Get_Struct(self, ruby_xml_html_parser, rxp);
404
+
405
+ if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
406
+ rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
407
+ data = ALLOC(rx_string_data);
408
+ rxp->data = data;
409
+ } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
410
+ return(Qnil);
411
+ }
412
+
413
+ rxp->ctxt = ruby_xml_parser_context_new3();
414
+ data = (rx_string_data *)rxp->data;
415
+ data->str = str;
416
+
417
+ Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
418
+ rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
419
+
420
+ return(data->str);
421
+ }
422
+
423
+
424
+ // Rdoc needs to know
425
+ #ifdef RDOC_NEVER_DEFINED
426
+ mXML = rb_define_module("XML");
427
+ #endif
428
+
429
+ void
430
+ ruby_init_html_parser(void) {
431
+ cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
432
+
433
+ /*
434
+ rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
435
+ rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
436
+ */
437
+ rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
438
+ rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
439
+ /*
440
+ rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
+ rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
+ rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
+ rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
+ */
445
+ rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
+ rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
+ rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
+ rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
+ }