libxml-ruby 0.8.3 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (168) hide show
  1. data/CHANGES +18 -0
  2. data/RAKEFILE +15 -39
  3. data/README +48 -47
  4. data/ext/libxml/libxml.c +847 -22
  5. data/ext/libxml/ruby_libxml.h +71 -95
  6. data/ext/libxml/ruby_xml_attr.c +500 -500
  7. data/ext/libxml/ruby_xml_attributes.c +1 -1
  8. data/ext/libxml/ruby_xml_document.c +1144 -1135
  9. data/ext/libxml/ruby_xml_document.h +4 -11
  10. data/ext/libxml/ruby_xml_dtd.c +27 -0
  11. data/ext/libxml/ruby_xml_encoding.c +164 -0
  12. data/ext/libxml/ruby_xml_encoding.h +13 -0
  13. data/ext/libxml/ruby_xml_error.c +941 -0
  14. data/ext/libxml/ruby_xml_error.h +13 -0
  15. data/ext/libxml/ruby_xml_html_parser.c +71 -387
  16. data/ext/libxml/ruby_xml_html_parser.h +1 -17
  17. data/ext/libxml/ruby_xml_input.c +179 -0
  18. data/ext/libxml/ruby_xml_input.h +18 -0
  19. data/ext/libxml/ruby_xml_input_cbg.c +17 -3
  20. data/ext/libxml/ruby_xml_node.c +1566 -1582
  21. data/ext/libxml/ruby_xml_node.h +1 -4
  22. data/ext/libxml/ruby_xml_ns.c +14 -3
  23. data/ext/libxml/ruby_xml_parser.c +164 -1398
  24. data/ext/libxml/ruby_xml_parser.h +5 -17
  25. data/ext/libxml/ruby_xml_parser_context.c +131 -169
  26. data/ext/libxml/ruby_xml_parser_context.h +2 -9
  27. data/ext/libxml/ruby_xml_reader.c +910 -945
  28. data/ext/libxml/ruby_xml_relaxng.c +32 -3
  29. data/ext/libxml/ruby_xml_sax_parser.c +106 -364
  30. data/ext/libxml/ruby_xml_sax_parser.h +1 -37
  31. data/ext/libxml/ruby_xml_schema.c +174 -145
  32. data/ext/libxml/ruby_xml_xinclude.c +9 -5
  33. data/ext/libxml/ruby_xml_xpath.c +25 -6
  34. data/ext/libxml/ruby_xml_xpath.h +1 -2
  35. data/ext/libxml/ruby_xml_xpath_context.c +17 -19
  36. data/ext/libxml/ruby_xml_xpath_object.c +60 -56
  37. data/ext/libxml/ruby_xml_xpointer.c +11 -5
  38. data/ext/libxml/sax_parser_callbacks.inc +42 -37
  39. data/ext/libxml/version.h +3 -3
  40. data/ext/mingw/Rakefile +20 -27
  41. data/ext/mingw/build.rake +41 -0
  42. data/ext/vc/libxml_ruby.vcproj +23 -15
  43. data/lib/libxml.rb +8 -2
  44. data/lib/libxml/document.rb +16 -4
  45. data/lib/libxml/error.rb +84 -0
  46. data/lib/libxml/hpricot.rb +76 -0
  47. data/lib/libxml/html_parser.rb +61 -0
  48. data/lib/libxml/node.rb +36 -25
  49. data/lib/libxml/parser.rb +312 -33
  50. data/lib/libxml/parser_context.rb +17 -0
  51. data/lib/libxml/properties.rb +15 -2
  52. data/lib/libxml/reader.rb +15 -0
  53. data/lib/libxml/sax_callbacks.rb +179 -0
  54. data/lib/libxml/sax_parser.rb +42 -0
  55. data/lib/libxml/tree.rb +1 -2
  56. data/lib/libxml/xpath_object.rb +12 -0
  57. data/test/model/atom.xml +4 -0
  58. data/test/tc_attributes.rb +43 -19
  59. data/test/tc_document.rb +1 -1
  60. data/test/tc_document_write.rb +15 -8
  61. data/test/tc_dtd.rb +36 -20
  62. data/test/tc_encoding.rb +13 -0
  63. data/test/tc_error.rb +136 -0
  64. data/test/tc_node.rb +2 -3
  65. data/test/tc_node_copy.rb +1 -1
  66. data/test/tc_node_edit.rb +6 -0
  67. data/test/tc_ns.rb +18 -0
  68. data/test/tc_parser.rb +113 -228
  69. data/test/tc_parser_context.rb +1 -2
  70. data/test/tc_reader.rb +24 -14
  71. data/test/tc_relaxng.rb +18 -6
  72. data/test/tc_sax_parser.rb +48 -13
  73. data/test/tc_schema.rb +20 -8
  74. data/test/tc_well_formed.rb +2 -1
  75. data/test/tc_xml.rb +212 -0
  76. data/test/tc_xpath.rb +60 -46
  77. data/test/tc_xpointer.rb +7 -11
  78. data/test/test_suite.rb +4 -3
  79. metadata +26 -109
  80. data/doc/rdoc/classes/LibXML.html +0 -241
  81. data/doc/rdoc/classes/LibXML/XML.html +0 -185
  82. data/doc/rdoc/classes/LibXML/XML/Attr.html +0 -1010
  83. data/doc/rdoc/classes/LibXML/XML/Attributes.html +0 -526
  84. data/doc/rdoc/classes/LibXML/XML/Document.html +0 -1489
  85. data/doc/rdoc/classes/LibXML/XML/Dtd.html +0 -213
  86. data/doc/rdoc/classes/LibXML/XML/Error.html +0 -117
  87. data/doc/rdoc/classes/LibXML/XML/HTMLParser.html +0 -348
  88. data/doc/rdoc/classes/LibXML/XML/InputCallbacks.html +0 -160
  89. data/doc/rdoc/classes/LibXML/XML/NS.html +0 -381
  90. data/doc/rdoc/classes/LibXML/XML/Node.html +0 -3396
  91. data/doc/rdoc/classes/LibXML/XML/Node/FailedModify.html +0 -123
  92. data/doc/rdoc/classes/LibXML/XML/Node/Set.html +0 -440
  93. data/doc/rdoc/classes/LibXML/XML/Node/SetNamespace.html +0 -123
  94. data/doc/rdoc/classes/LibXML/XML/Node/UnknownType.html +0 -123
  95. data/doc/rdoc/classes/LibXML/XML/Parser.html +0 -2239
  96. data/doc/rdoc/classes/LibXML/XML/Parser/Context.html +0 -1255
  97. data/doc/rdoc/classes/LibXML/XML/Parser/ParseError.html +0 -123
  98. data/doc/rdoc/classes/LibXML/XML/Reader.html +0 -2264
  99. data/doc/rdoc/classes/LibXML/XML/RelaxNG.html +0 -237
  100. data/doc/rdoc/classes/LibXML/XML/SaxParser.html +0 -415
  101. data/doc/rdoc/classes/LibXML/XML/Schema.html +0 -308
  102. data/doc/rdoc/classes/LibXML/XML/State.html +0 -124
  103. data/doc/rdoc/classes/LibXML/XML/Tree.html +0 -111
  104. data/doc/rdoc/classes/LibXML/XML/XInclude.html +0 -123
  105. data/doc/rdoc/classes/LibXML/XML/XInclude/Error.html +0 -117
  106. data/doc/rdoc/classes/LibXML/XML/XMLParserOptions.html +0 -198
  107. data/doc/rdoc/classes/LibXML/XML/XPath.html +0 -184
  108. data/doc/rdoc/classes/LibXML/XML/XPath/Context.html +0 -404
  109. data/doc/rdoc/classes/LibXML/XML/XPath/InvalidPath.html +0 -172
  110. data/doc/rdoc/classes/LibXML/XML/XPath/Object.html +0 -627
  111. data/doc/rdoc/classes/LibXML/XML/XPointer.html +0 -170
  112. data/doc/rdoc/classes/LibXML/XML/XPointer/Context.html +0 -123
  113. data/doc/rdoc/classes/LibXML/XML/XPointer/Context/InvalidPath.html +0 -117
  114. data/doc/rdoc/classes/LibXML/XML/XPointer/InvalidExpression.html +0 -124
  115. data/doc/rdoc/classes/singleton.html +0 -114
  116. data/doc/rdoc/created.rid +0 -1
  117. data/doc/rdoc/files/CHANGES.html +0 -442
  118. data/doc/rdoc/files/LICENSE.html +0 -133
  119. data/doc/rdoc/files/README.html +0 -388
  120. data/doc/rdoc/files/VERSION.html +0 -107
  121. data/doc/rdoc/files/ext/libxml/cbg_c.html +0 -101
  122. data/doc/rdoc/files/ext/libxml/libxml_c.html +0 -101
  123. data/doc/rdoc/files/ext/libxml/ruby_xml_attr_c.html +0 -101
  124. data/doc/rdoc/files/ext/libxml/ruby_xml_attributes_c.html +0 -101
  125. data/doc/rdoc/files/ext/libxml/ruby_xml_document_c.html +0 -101
  126. data/doc/rdoc/files/ext/libxml/ruby_xml_dtd_c.html +0 -101
  127. data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_c.html +0 -101
  128. data/doc/rdoc/files/ext/libxml/ruby_xml_input_cbg_c.html +0 -101
  129. data/doc/rdoc/files/ext/libxml/ruby_xml_node_c.html +0 -101
  130. data/doc/rdoc/files/ext/libxml/ruby_xml_node_set_c.html +0 -101
  131. data/doc/rdoc/files/ext/libxml/ruby_xml_ns_c.html +0 -101
  132. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_c.html +0 -101
  133. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_context_c.html +0 -101
  134. data/doc/rdoc/files/ext/libxml/ruby_xml_reader_c.html +0 -101
  135. data/doc/rdoc/files/ext/libxml/ruby_xml_relaxng_c.html +0 -101
  136. data/doc/rdoc/files/ext/libxml/ruby_xml_sax_parser_c.html +0 -101
  137. data/doc/rdoc/files/ext/libxml/ruby_xml_schema_c.html +0 -101
  138. data/doc/rdoc/files/ext/libxml/ruby_xml_state_c.html +0 -101
  139. data/doc/rdoc/files/ext/libxml/ruby_xml_xinclude_c.html +0 -101
  140. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_c.html +0 -101
  141. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_context_c.html +0 -101
  142. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_object_c.html +0 -101
  143. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_c.html +0 -101
  144. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_context_c.html +0 -101
  145. data/doc/rdoc/files/lib/libxml/attr_rb.html +0 -108
  146. data/doc/rdoc/files/lib/libxml/attributes_rb.html +0 -108
  147. data/doc/rdoc/files/lib/libxml/document_rb.html +0 -108
  148. data/doc/rdoc/files/lib/libxml/node_rb.html +0 -108
  149. data/doc/rdoc/files/lib/libxml/node_set_rb.html +0 -108
  150. data/doc/rdoc/files/lib/libxml/parser_options_rb.html +0 -107
  151. data/doc/rdoc/files/lib/libxml/parser_rb.html +0 -101
  152. data/doc/rdoc/files/lib/libxml/properties_rb.html +0 -108
  153. data/doc/rdoc/files/lib/libxml/tree_rb.html +0 -107
  154. data/doc/rdoc/files/lib/libxml_rb.html +0 -124
  155. data/doc/rdoc/files/lib/xml/libxml_rb.html +0 -124
  156. data/doc/rdoc/files/lib/xml_rb.html +0 -134
  157. data/doc/rdoc/fr_class_index.html +0 -62
  158. data/doc/rdoc/fr_file_index.html +0 -66
  159. data/doc/rdoc/fr_method_index.html +0 -392
  160. data/doc/rdoc/index.html +0 -24
  161. data/doc/rdoc/rdoc-style.css +0 -208
  162. data/ext/libxml/ruby_xml_node_set.c +0 -172
  163. data/ext/libxml/ruby_xml_node_set.h +0 -20
  164. data/ext/libxml/ruby_xml_xpointer_context.c +0 -22
  165. data/ext/libxml/ruby_xml_xpointer_context.h +0 -18
  166. data/lib/libxml/node_set.rb +0 -27
  167. data/test/tc_node_set.rb +0 -24
  168. data/test/tc_node_set2.rb +0 -37
@@ -0,0 +1,13 @@
1
+ /* $Id: ruby_xml_ns.h 324 2008-07-08 23:00:02Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RUBY_XML_ERROR__
6
+ #define __RUBY_XML_ERROR__
7
+
8
+ extern VALUE eXMLError;
9
+
10
+ void ruby_init_xml_error();
11
+ void ruby_xml_raise(xmlErrorPtr xerror);
12
+
13
+ #endif
@@ -1,312 +1,72 @@
1
- /* $Id: ruby_xml_html_parser.c 461 2008-07-15 21:35:56Z cfis $ */
1
+ /* $Id: ruby_xml_html_parser.c 580 2008-11-18 09:24:43Z cfis $ */
2
2
 
3
3
  /* Please see the LICENSE file for copyright and distribution information */
4
4
 
5
5
  #include "ruby_libxml.h"
6
6
 
7
7
  VALUE cXMLHTMLParser;
8
-
9
- //static int
10
- //ctxtRead(FILE *f, char * buf, int len) {
11
- // return(fread(buf, 1, len, f));
12
- //}
8
+ ID INPUT_ATTR;
9
+ ID CONTEXT_ATTR;
13
10
 
14
11
 
15
12
  /*
16
- * call-seq:
17
- * parser.filename -> "filename"
18
- *
19
- * Obtain the filename this parser will read from.
20
- */
21
- /*
22
- VALUE
23
- ruby_xml_html_parser_filename_get(VALUE self) {
24
- ruby_xml_html_parser *rxp;
25
- rx_file_data *data;
13
+ * Document-class: LibXML::XML::HTMLParser
14
+ *
15
+ * The HTML parser implements an HTML 4.0 non-verifying parser with an API
16
+ * compatible with the XML::Parser. In contrast with the XML::Parser,
17
+ * it can parse "real world" HTML, even if it severely broken from a
18
+ * specification point of view. */
26
19
 
27
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
- if (rxp->data == NULL)
29
- return(Qnil);
30
-
31
- if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
- return(Qnil);
33
-
34
- data = (rx_file_data *)rxp->data;
35
- return(data->filename);
36
- }
37
- */
38
20
 
39
21
  /*
40
22
  * call-seq:
41
- * parser.filename = "filename"
23
+ * XML::HTMLParser.initialize -> parser
42
24
  *
43
- * Set the filename this parser will read from.
25
+ * Initializes a new parser instance with no pre-determined source.
44
26
  */
45
- /*
46
27
  VALUE
47
- ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
- ruby_xml_html_parser *rxp;
49
- ruby_xml_parser_context *rxpc;
50
- rx_file_data *data;
51
-
52
- Check_Type(filename, T_STRING);
53
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
-
55
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
- if (rxp->data != NULL)
57
- rb_fatal("crap, this should be null");
58
-
59
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
- data = ALLOC(rx_file_data);
61
- rxp->data = data;
62
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
- return(Qnil);
64
- }
65
-
66
- rxp->ctxt = ruby_xml_parser_context_new();
67
- data = (rx_file_data *)rxp->data;
68
- data->filename = filename;
69
-
70
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
- rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
- if (rxpc->ctxt == NULL)
73
- rb_sys_fail(StringValuePtr(filename));
74
-
75
- return(data->filename);
28
+ ruby_xml_html_parser_initialize(VALUE self) {
29
+ VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
30
+ rb_iv_set(self, "@input", input);
31
+ rb_iv_set(self, "@context", Qnil);
32
+ return self;
76
33
  }
77
- */
78
34
 
79
- void
80
- ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
- void *data;
35
+ htmlParserCtxtPtr
36
+ ruby_xml_html_parser_file_ctxt(VALUE input) {
37
+ VALUE file = rb_ivar_get(input, FILE_ATTR);
38
+ VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
39
+ VALUE encodingStr = ruby_xml_encoding_to_s(Qnil, encoding);
82
40
 
83
- switch(rxp->data_type) {
84
- case RUBY_LIBXML_SRC_TYPE_NULL:
85
- break;
86
- case RUBY_LIBXML_SRC_TYPE_FILE:
87
- data = (void *)(rx_file_data *)rxp->data;
88
- ruby_xfree((rx_file_data *)data);
89
- break;
90
- case RUBY_LIBXML_SRC_TYPE_STRING:
91
- data = (void *)(rx_string_data *)rxp->data;
92
- ruby_xfree((rx_string_data *)data);
93
- break;
94
- case RUBY_LIBXML_SRC_TYPE_IO:
95
- data = (void *)(rx_io_data *)rxp->data;
96
- ruby_xfree((rx_io_data *)data);
97
- break;
98
- default:
99
- rb_fatal("Unknown data type, %d", rxp->data_type);
100
- }
101
-
102
- ruby_xfree(rxp);
41
+ return htmlCreateFileParserCtxt(StringValuePtr(file), StringValuePtr(encodingStr));
103
42
  }
104
43
 
105
-
106
- /*
107
- * call-seq:
108
- * parser.io -> IO
109
- *
110
- * Obtain the IO instance this parser works with.
111
- */
112
- /*
113
- VALUE
114
- ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
- ruby_xml_html_parser *rxp;
116
- rx_io_data *data;
117
-
118
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
-
120
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
- rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
- rxp->data == NULL)
123
- return(Qnil);
124
-
125
- data = (rx_io_data *)rxp->data;
126
-
127
- return(data->io);
44
+ htmlParserCtxtPtr
45
+ ruby_xml_html_parser_str_ctxt(VALUE input) {
46
+ VALUE data = rb_ivar_get(input, STRING_ATTR);
47
+ return htmlCreateMemoryParserCtxt(StringValuePtr(data), RSTRING_LEN(data));
128
48
  }
129
- */
130
49
 
131
50
  /*
132
- * call-seq:
133
- * parser.io = IO
134
- *
135
- * Set the IO instance this parser works with.
136
- */
137
- /*
138
- VALUE
139
- ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
- ruby_xml_html_parser *rxp;
141
- ruby_xml_parser_context *rxpc;
142
- rx_io_data *data;
51
+ htmlParserCtxtPtr
52
+ ruby_xml_html_parser_io_ctxt(VALUE input) {
53
+ VALUE io = rb_ivar_get(input, IO_ATTR);
54
+ VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
55
+ xmlCharEncoding xmlEncoding = NUM2INT(encoding);
56
+
143
57
  OpenFile *fptr;
144
58
  FILE *f;
145
59
 
146
- if (!rb_obj_is_kind_of(io, rb_cIO))
147
- rb_raise(rb_eTypeError, "need an IO object");
148
-
149
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
-
151
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
- if (rxp->data != NULL)
153
- rb_fatal("crap, this should be null");
154
-
155
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
- data = ALLOC(rx_io_data);
157
- rxp->data = data;
158
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
- return(Qnil);
160
- }
161
-
162
- rxp->ctxt = ruby_xml_parser_context_new();
163
- data = (rx_io_data *)rxp->data;
164
- data->io = io;
165
-
166
60
  GetOpenFile(io, fptr);
167
61
  rb_io_check_readable(fptr);
168
62
  f = GetWriteFile(fptr);
169
63
 
170
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
- rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
64
+ return htmlCreateIOParserCtxt(NULL, NULL,
172
65
  (xmlInputReadCallback) ctxtRead,
173
- NULL, f, XML_CHAR_ENCODING_NONE);
174
- if (NIL_P(rxpc->ctxt))
175
- rb_sys_fail(0);
176
-
177
- return(data->io);
66
+ NULL, f, xmlEncoding);
178
67
  }
179
68
  */
180
69
 
181
- void
182
- ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
- if (rxp == NULL) return;
184
- if (!NIL_P(rxp->ctxt))
185
- rb_gc_mark(rxp->ctxt);
186
-
187
- rb_gc_mark(LIBXML_STATE);
188
-
189
- switch(rxp->data_type) {
190
- case RUBY_LIBXML_SRC_TYPE_NULL:
191
- break;
192
- case RUBY_LIBXML_SRC_TYPE_FILE:
193
- if (!NIL_P(((rx_file_data *)rxp->data)->filename))
194
- rb_gc_mark(((rx_file_data *)rxp->data)->filename);
195
- break;
196
- case RUBY_LIBXML_SRC_TYPE_STRING:
197
- if (!NIL_P(((rx_string_data *)rxp->data)->str))
198
- rb_gc_mark(((rx_string_data *)rxp->data)->str);
199
- break;
200
- case RUBY_LIBXML_SRC_TYPE_IO:
201
- if (!NIL_P(((rx_io_data *)rxp->data)->io))
202
- rb_gc_mark(((rx_io_data *)rxp->data)->io);
203
- break;
204
- default:
205
- rb_fatal("unknown datatype: %d", rxp->data_type);
206
- }
207
- }
208
-
209
-
210
- /*
211
- * call-seq:
212
- * XML::HTMLParser.new -> parser
213
- *
214
- * Create a new parser instance with no pre-determined source.
215
- */
216
- VALUE
217
- ruby_xml_html_parser_new(VALUE class) {
218
- ruby_xml_html_parser *rxp;
219
-
220
- rxp = ALLOC(ruby_xml_html_parser);
221
- rxp->ctxt = Qnil;
222
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
223
- rxp->data = NULL;
224
- rxp->parsed = 0;
225
-
226
- return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
227
- ruby_xml_html_parser_free, rxp));
228
- }
229
-
230
-
231
- /*
232
- * call-seq:
233
- * XML::HTMLParser.file -> parser
234
- *
235
- * Create a new parser instance that will read the specified file.
236
- */
237
- /*
238
- VALUE
239
- ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
240
- VALUE obj;
241
- ruby_xml_html_parser *rxp;
242
- rx_file_data *data;
243
-
244
- obj = ruby_xml_html_parser_new(class);
245
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
246
-
247
- data = ALLOC(rx_file_data);
248
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
249
- rxp->data = data;
250
-
251
- ruby_xml_html_parser_filename_set(obj, filename);
252
-
253
- return(obj);
254
- }
255
- */
256
-
257
- /*
258
- * call-seq:
259
- * XML::HTMLParser.io -> parser
260
- *
261
- * Create a new parser instance that will read from the
262
- * specified IO object.
263
- */
264
- /*
265
- VALUE
266
- ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
267
- VALUE obj;
268
- ruby_xml_html_parser *rxp;
269
- rx_io_data *data;
270
-
271
- obj = ruby_xml_html_parser_new(class);
272
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
273
-
274
- data = ALLOC(rx_io_data);
275
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
276
- rxp->data = data;
277
-
278
- ruby_xml_html_parser_io_set(obj, io);
279
-
280
- return(obj);
281
- }
282
- */
283
-
284
- /*
285
- * call-seq:
286
- * XML::HTMLParser.string -> parser
287
- *
288
- * Create a new parser instance that will parse the given
289
- * string.
290
- */
291
- VALUE
292
- ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
293
- VALUE obj;
294
- ruby_xml_html_parser *rxp;
295
- rx_string_data *data;
296
-
297
- obj = ruby_xml_html_parser_new(class);
298
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
299
-
300
- data = ALLOC(rx_string_data);
301
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
302
- rxp->data = data;
303
-
304
- ruby_xml_html_parser_str_set(obj, str);
305
-
306
- return(obj);
307
- }
308
-
309
-
310
70
  /*
311
71
  * call-seq:
312
72
  * parser.parse -> document
@@ -317,111 +77,39 @@ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
317
77
  */
318
78
  VALUE
319
79
  ruby_xml_html_parser_parse(VALUE self) {
320
- ruby_xml_html_parser *rxp;
321
- ruby_xml_parser_context *rxpc;
322
- htmlDocPtr xdp;
323
- VALUE doc;
324
-
325
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
326
-
327
- switch (rxp->data_type) {
328
- case RUBY_LIBXML_SRC_TYPE_NULL:
329
- return(Qnil);
330
- case RUBY_LIBXML_SRC_TYPE_STRING:
331
- //case RUBY_LIBXML_SRC_TYPE_FILE:
332
- //case RUBY_LIBXML_SRC_TYPE_IO:
333
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
334
-
335
- /* don't check return values here, the HTML parser returns errors
336
- * but still allows the resulting tree to be used.
337
- */
338
- htmlParseDocument(rxpc->ctxt);
339
- xdp = rxpc->ctxt->myDoc;
340
- rxp->parsed = 1;
341
-
342
- doc = ruby_xml_document_wrap(xdp);
343
- break;
344
- default:
345
- rb_fatal("Unknown data type, %d", rxp->data_type);
346
- }
347
-
348
- return(doc);
349
- }
350
-
351
-
352
- /*
353
- * call-seq:
354
- * parser.context -> context
355
- *
356
- * Obtain the XML::Parser::Context associated with this
357
- * parser.
358
- */
359
- VALUE
360
- ruby_xml_html_parser_context_get(VALUE self) {
361
- ruby_xml_html_parser *rxp;
362
-
363
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
364
- if (rxp->ctxt == Qnil)
365
- return(Qnil);
80
+ xmlParserCtxtPtr ctxt;
81
+ VALUE context;
82
+ VALUE input = rb_ivar_get(self, INPUT_ATTR);
83
+
84
+ context = rb_ivar_get(self, CONTEXT_ATTR);
85
+ if (context != Qnil)
86
+ rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
87
+
88
+ if (rb_ivar_get(input, FILE_ATTR) != Qnil)
89
+ ctxt = ruby_xml_html_parser_file_ctxt(input);
90
+ else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
91
+ ctxt = ruby_xml_html_parser_str_ctxt(input);
92
+ /*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
93
+ ctxt = ruby_xml_html_parser_parse_document(input);
94
+ else if (rb_ivar_get(input, IO_ATTR) != Qnil)
95
+ ctxt = ruby_xml_html_parser_io_ctxt(input);*/
366
96
  else
367
- return(rxp->ctxt);
368
- }
369
-
370
-
371
- /*
372
- * call-seq:
373
- * parser.string -> "string"
374
- *
375
- * Obtain the string this parser works with.
376
- */
377
- VALUE
378
- ruby_xml_html_parser_str_get(VALUE self) {
379
- ruby_xml_html_parser *rxp;
380
- rx_string_data *data;
381
-
382
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
383
- if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
384
- return(Qnil);
385
-
386
- data = (rx_string_data *)rxp->data;
387
- return(data->str);
388
- }
389
-
390
-
391
- /*
392
- * call-seq:
393
- * parser.string = "string"
394
- *
395
- * Set the string this parser works with.
396
- */
397
- VALUE
398
- ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
399
- ruby_xml_html_parser *rxp;
400
- ruby_xml_parser_context *rxpc;
401
- rx_string_data *data;
402
-
403
- Check_Type(str, T_STRING);
404
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
405
-
406
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
407
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
408
- data = ALLOC(rx_string_data);
409
- rxp->data = data;
410
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
411
- return(Qnil);
97
+ rb_raise(rb_eArgError, "You must specify a parser data source");
98
+
99
+ if (!ctxt)
100
+ ruby_xml_raise(&xmlLastError);
101
+
102
+ context = ruby_xml_parser_context_wrap(ctxt);
103
+ rb_ivar_set(self, CONTEXT_ATTR, context);
104
+
105
+ if (htmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
106
+ xmlFreeDoc(ctxt->myDoc);
107
+ ruby_xml_raise(&ctxt->lastError);
412
108
  }
413
109
 
414
- rxp->ctxt = ruby_xml_parser_context_new();
415
- data = (rx_string_data *)rxp->data;
416
- data->str = str;
417
-
418
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
419
- rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
420
-
421
- return(data->str);
110
+ return ruby_xml_document_wrap(ctxt->myDoc);
422
111
  }
423
112
 
424
-
425
113
  // Rdoc needs to know
426
114
  #ifdef RDOC_NEVER_DEFINED
427
115
  mLibXML = rb_define_module("LibXML");
@@ -430,20 +118,16 @@ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
430
118
 
431
119
  void
432
120
  ruby_init_html_parser(void) {
121
+ INPUT_ATTR = rb_intern("@input");
122
+ CONTEXT_ATTR = rb_intern("@context");
123
+
433
124
  cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
125
+
126
+ /* Atributes */
127
+ rb_define_attr(cXMLHTMLParser, "input", 1, 0);
128
+ rb_define_attr(cXMLHTMLParser, "context", 1, 0);
434
129
 
435
- rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
436
- rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
437
- /*
438
- rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
439
- rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
440
- rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
- rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
- rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
- rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
- */
130
+ /* Instance methods */
131
+ rb_define_method(cXMLHTMLParser, "initialize", ruby_xml_html_parser_initialize, 0);
445
132
  rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
- rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
- rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
- rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
133
  }