libxml-ruby 0.8.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. data/CHANGES +18 -0
  2. data/RAKEFILE +15 -39
  3. data/README +48 -47
  4. data/ext/libxml/libxml.c +847 -22
  5. data/ext/libxml/ruby_libxml.h +71 -95
  6. data/ext/libxml/ruby_xml_attr.c +500 -500
  7. data/ext/libxml/ruby_xml_attributes.c +1 -1
  8. data/ext/libxml/ruby_xml_document.c +1144 -1135
  9. data/ext/libxml/ruby_xml_document.h +4 -11
  10. data/ext/libxml/ruby_xml_dtd.c +27 -0
  11. data/ext/libxml/ruby_xml_encoding.c +164 -0
  12. data/ext/libxml/ruby_xml_encoding.h +13 -0
  13. data/ext/libxml/ruby_xml_error.c +941 -0
  14. data/ext/libxml/ruby_xml_error.h +13 -0
  15. data/ext/libxml/ruby_xml_html_parser.c +71 -387
  16. data/ext/libxml/ruby_xml_html_parser.h +1 -17
  17. data/ext/libxml/ruby_xml_input.c +179 -0
  18. data/ext/libxml/ruby_xml_input.h +18 -0
  19. data/ext/libxml/ruby_xml_input_cbg.c +17 -3
  20. data/ext/libxml/ruby_xml_node.c +1566 -1582
  21. data/ext/libxml/ruby_xml_node.h +1 -4
  22. data/ext/libxml/ruby_xml_ns.c +14 -3
  23. data/ext/libxml/ruby_xml_parser.c +164 -1398
  24. data/ext/libxml/ruby_xml_parser.h +5 -17
  25. data/ext/libxml/ruby_xml_parser_context.c +131 -169
  26. data/ext/libxml/ruby_xml_parser_context.h +2 -9
  27. data/ext/libxml/ruby_xml_reader.c +910 -945
  28. data/ext/libxml/ruby_xml_relaxng.c +32 -3
  29. data/ext/libxml/ruby_xml_sax_parser.c +106 -364
  30. data/ext/libxml/ruby_xml_sax_parser.h +1 -37
  31. data/ext/libxml/ruby_xml_schema.c +174 -145
  32. data/ext/libxml/ruby_xml_xinclude.c +9 -5
  33. data/ext/libxml/ruby_xml_xpath.c +25 -6
  34. data/ext/libxml/ruby_xml_xpath.h +1 -2
  35. data/ext/libxml/ruby_xml_xpath_context.c +17 -19
  36. data/ext/libxml/ruby_xml_xpath_object.c +60 -56
  37. data/ext/libxml/ruby_xml_xpointer.c +11 -5
  38. data/ext/libxml/sax_parser_callbacks.inc +42 -37
  39. data/ext/libxml/version.h +3 -3
  40. data/ext/mingw/Rakefile +20 -27
  41. data/ext/mingw/build.rake +41 -0
  42. data/ext/vc/libxml_ruby.vcproj +23 -15
  43. data/lib/libxml.rb +8 -2
  44. data/lib/libxml/document.rb +16 -4
  45. data/lib/libxml/error.rb +84 -0
  46. data/lib/libxml/hpricot.rb +76 -0
  47. data/lib/libxml/html_parser.rb +61 -0
  48. data/lib/libxml/node.rb +36 -25
  49. data/lib/libxml/parser.rb +312 -33
  50. data/lib/libxml/parser_context.rb +17 -0
  51. data/lib/libxml/properties.rb +15 -2
  52. data/lib/libxml/reader.rb +15 -0
  53. data/lib/libxml/sax_callbacks.rb +179 -0
  54. data/lib/libxml/sax_parser.rb +42 -0
  55. data/lib/libxml/tree.rb +1 -2
  56. data/lib/libxml/xpath_object.rb +12 -0
  57. data/test/model/atom.xml +4 -0
  58. data/test/tc_attributes.rb +43 -19
  59. data/test/tc_document.rb +1 -1
  60. data/test/tc_document_write.rb +15 -8
  61. data/test/tc_dtd.rb +36 -20
  62. data/test/tc_encoding.rb +13 -0
  63. data/test/tc_error.rb +136 -0
  64. data/test/tc_node.rb +2 -3
  65. data/test/tc_node_copy.rb +1 -1
  66. data/test/tc_node_edit.rb +6 -0
  67. data/test/tc_ns.rb +18 -0
  68. data/test/tc_parser.rb +113 -228
  69. data/test/tc_parser_context.rb +1 -2
  70. data/test/tc_reader.rb +24 -14
  71. data/test/tc_relaxng.rb +18 -6
  72. data/test/tc_sax_parser.rb +48 -13
  73. data/test/tc_schema.rb +20 -8
  74. data/test/tc_well_formed.rb +2 -1
  75. data/test/tc_xml.rb +212 -0
  76. data/test/tc_xpath.rb +60 -46
  77. data/test/tc_xpointer.rb +7 -11
  78. data/test/test_suite.rb +4 -3
  79. metadata +26 -109
  80. data/doc/rdoc/classes/LibXML.html +0 -241
  81. data/doc/rdoc/classes/LibXML/XML.html +0 -185
  82. data/doc/rdoc/classes/LibXML/XML/Attr.html +0 -1010
  83. data/doc/rdoc/classes/LibXML/XML/Attributes.html +0 -526
  84. data/doc/rdoc/classes/LibXML/XML/Document.html +0 -1489
  85. data/doc/rdoc/classes/LibXML/XML/Dtd.html +0 -213
  86. data/doc/rdoc/classes/LibXML/XML/Error.html +0 -117
  87. data/doc/rdoc/classes/LibXML/XML/HTMLParser.html +0 -348
  88. data/doc/rdoc/classes/LibXML/XML/InputCallbacks.html +0 -160
  89. data/doc/rdoc/classes/LibXML/XML/NS.html +0 -381
  90. data/doc/rdoc/classes/LibXML/XML/Node.html +0 -3396
  91. data/doc/rdoc/classes/LibXML/XML/Node/FailedModify.html +0 -123
  92. data/doc/rdoc/classes/LibXML/XML/Node/Set.html +0 -440
  93. data/doc/rdoc/classes/LibXML/XML/Node/SetNamespace.html +0 -123
  94. data/doc/rdoc/classes/LibXML/XML/Node/UnknownType.html +0 -123
  95. data/doc/rdoc/classes/LibXML/XML/Parser.html +0 -2239
  96. data/doc/rdoc/classes/LibXML/XML/Parser/Context.html +0 -1255
  97. data/doc/rdoc/classes/LibXML/XML/Parser/ParseError.html +0 -123
  98. data/doc/rdoc/classes/LibXML/XML/Reader.html +0 -2264
  99. data/doc/rdoc/classes/LibXML/XML/RelaxNG.html +0 -237
  100. data/doc/rdoc/classes/LibXML/XML/SaxParser.html +0 -415
  101. data/doc/rdoc/classes/LibXML/XML/Schema.html +0 -308
  102. data/doc/rdoc/classes/LibXML/XML/State.html +0 -124
  103. data/doc/rdoc/classes/LibXML/XML/Tree.html +0 -111
  104. data/doc/rdoc/classes/LibXML/XML/XInclude.html +0 -123
  105. data/doc/rdoc/classes/LibXML/XML/XInclude/Error.html +0 -117
  106. data/doc/rdoc/classes/LibXML/XML/XMLParserOptions.html +0 -198
  107. data/doc/rdoc/classes/LibXML/XML/XPath.html +0 -184
  108. data/doc/rdoc/classes/LibXML/XML/XPath/Context.html +0 -404
  109. data/doc/rdoc/classes/LibXML/XML/XPath/InvalidPath.html +0 -172
  110. data/doc/rdoc/classes/LibXML/XML/XPath/Object.html +0 -627
  111. data/doc/rdoc/classes/LibXML/XML/XPointer.html +0 -170
  112. data/doc/rdoc/classes/LibXML/XML/XPointer/Context.html +0 -123
  113. data/doc/rdoc/classes/LibXML/XML/XPointer/Context/InvalidPath.html +0 -117
  114. data/doc/rdoc/classes/LibXML/XML/XPointer/InvalidExpression.html +0 -124
  115. data/doc/rdoc/classes/singleton.html +0 -114
  116. data/doc/rdoc/created.rid +0 -1
  117. data/doc/rdoc/files/CHANGES.html +0 -442
  118. data/doc/rdoc/files/LICENSE.html +0 -133
  119. data/doc/rdoc/files/README.html +0 -388
  120. data/doc/rdoc/files/VERSION.html +0 -107
  121. data/doc/rdoc/files/ext/libxml/cbg_c.html +0 -101
  122. data/doc/rdoc/files/ext/libxml/libxml_c.html +0 -101
  123. data/doc/rdoc/files/ext/libxml/ruby_xml_attr_c.html +0 -101
  124. data/doc/rdoc/files/ext/libxml/ruby_xml_attributes_c.html +0 -101
  125. data/doc/rdoc/files/ext/libxml/ruby_xml_document_c.html +0 -101
  126. data/doc/rdoc/files/ext/libxml/ruby_xml_dtd_c.html +0 -101
  127. data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_c.html +0 -101
  128. data/doc/rdoc/files/ext/libxml/ruby_xml_input_cbg_c.html +0 -101
  129. data/doc/rdoc/files/ext/libxml/ruby_xml_node_c.html +0 -101
  130. data/doc/rdoc/files/ext/libxml/ruby_xml_node_set_c.html +0 -101
  131. data/doc/rdoc/files/ext/libxml/ruby_xml_ns_c.html +0 -101
  132. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_c.html +0 -101
  133. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_context_c.html +0 -101
  134. data/doc/rdoc/files/ext/libxml/ruby_xml_reader_c.html +0 -101
  135. data/doc/rdoc/files/ext/libxml/ruby_xml_relaxng_c.html +0 -101
  136. data/doc/rdoc/files/ext/libxml/ruby_xml_sax_parser_c.html +0 -101
  137. data/doc/rdoc/files/ext/libxml/ruby_xml_schema_c.html +0 -101
  138. data/doc/rdoc/files/ext/libxml/ruby_xml_state_c.html +0 -101
  139. data/doc/rdoc/files/ext/libxml/ruby_xml_xinclude_c.html +0 -101
  140. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_c.html +0 -101
  141. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_context_c.html +0 -101
  142. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_object_c.html +0 -101
  143. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_c.html +0 -101
  144. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_context_c.html +0 -101
  145. data/doc/rdoc/files/lib/libxml/attr_rb.html +0 -108
  146. data/doc/rdoc/files/lib/libxml/attributes_rb.html +0 -108
  147. data/doc/rdoc/files/lib/libxml/document_rb.html +0 -108
  148. data/doc/rdoc/files/lib/libxml/node_rb.html +0 -108
  149. data/doc/rdoc/files/lib/libxml/node_set_rb.html +0 -108
  150. data/doc/rdoc/files/lib/libxml/parser_options_rb.html +0 -107
  151. data/doc/rdoc/files/lib/libxml/parser_rb.html +0 -101
  152. data/doc/rdoc/files/lib/libxml/properties_rb.html +0 -108
  153. data/doc/rdoc/files/lib/libxml/tree_rb.html +0 -107
  154. data/doc/rdoc/files/lib/libxml_rb.html +0 -124
  155. data/doc/rdoc/files/lib/xml/libxml_rb.html +0 -124
  156. data/doc/rdoc/files/lib/xml_rb.html +0 -134
  157. data/doc/rdoc/fr_class_index.html +0 -62
  158. data/doc/rdoc/fr_file_index.html +0 -66
  159. data/doc/rdoc/fr_method_index.html +0 -392
  160. data/doc/rdoc/index.html +0 -24
  161. data/doc/rdoc/rdoc-style.css +0 -208
  162. data/ext/libxml/ruby_xml_node_set.c +0 -172
  163. data/ext/libxml/ruby_xml_node_set.h +0 -20
  164. data/ext/libxml/ruby_xml_xpointer_context.c +0 -22
  165. data/ext/libxml/ruby_xml_xpointer_context.h +0 -18
  166. data/lib/libxml/node_set.rb +0 -27
  167. data/test/tc_node_set.rb +0 -24
  168. data/test/tc_node_set2.rb +0 -37
@@ -0,0 +1,13 @@
1
+ /* $Id: ruby_xml_ns.h 324 2008-07-08 23:00:02Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RUBY_XML_ERROR__
6
+ #define __RUBY_XML_ERROR__
7
+
8
+ extern VALUE eXMLError;
9
+
10
+ void ruby_init_xml_error();
11
+ void ruby_xml_raise(xmlErrorPtr xerror);
12
+
13
+ #endif
@@ -1,312 +1,72 @@
1
- /* $Id: ruby_xml_html_parser.c 461 2008-07-15 21:35:56Z cfis $ */
1
+ /* $Id: ruby_xml_html_parser.c 580 2008-11-18 09:24:43Z cfis $ */
2
2
 
3
3
  /* Please see the LICENSE file for copyright and distribution information */
4
4
 
5
5
  #include "ruby_libxml.h"
6
6
 
7
7
  VALUE cXMLHTMLParser;
8
-
9
- //static int
10
- //ctxtRead(FILE *f, char * buf, int len) {
11
- // return(fread(buf, 1, len, f));
12
- //}
8
+ ID INPUT_ATTR;
9
+ ID CONTEXT_ATTR;
13
10
 
14
11
 
15
12
  /*
16
- * call-seq:
17
- * parser.filename -> "filename"
18
- *
19
- * Obtain the filename this parser will read from.
20
- */
21
- /*
22
- VALUE
23
- ruby_xml_html_parser_filename_get(VALUE self) {
24
- ruby_xml_html_parser *rxp;
25
- rx_file_data *data;
13
+ * Document-class: LibXML::XML::HTMLParser
14
+ *
15
+ * The HTML parser implements an HTML 4.0 non-verifying parser with an API
16
+ * compatible with the XML::Parser. In contrast with the XML::Parser,
17
+ * it can parse "real world" HTML, even if it severely broken from a
18
+ * specification point of view. */
26
19
 
27
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
- if (rxp->data == NULL)
29
- return(Qnil);
30
-
31
- if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
- return(Qnil);
33
-
34
- data = (rx_file_data *)rxp->data;
35
- return(data->filename);
36
- }
37
- */
38
20
 
39
21
  /*
40
22
  * call-seq:
41
- * parser.filename = "filename"
23
+ * XML::HTMLParser.initialize -> parser
42
24
  *
43
- * Set the filename this parser will read from.
25
+ * Initializes a new parser instance with no pre-determined source.
44
26
  */
45
- /*
46
27
  VALUE
47
- ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
- ruby_xml_html_parser *rxp;
49
- ruby_xml_parser_context *rxpc;
50
- rx_file_data *data;
51
-
52
- Check_Type(filename, T_STRING);
53
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
-
55
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
- if (rxp->data != NULL)
57
- rb_fatal("crap, this should be null");
58
-
59
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
- data = ALLOC(rx_file_data);
61
- rxp->data = data;
62
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
- return(Qnil);
64
- }
65
-
66
- rxp->ctxt = ruby_xml_parser_context_new();
67
- data = (rx_file_data *)rxp->data;
68
- data->filename = filename;
69
-
70
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
- rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
- if (rxpc->ctxt == NULL)
73
- rb_sys_fail(StringValuePtr(filename));
74
-
75
- return(data->filename);
28
+ ruby_xml_html_parser_initialize(VALUE self) {
29
+ VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
30
+ rb_iv_set(self, "@input", input);
31
+ rb_iv_set(self, "@context", Qnil);
32
+ return self;
76
33
  }
77
- */
78
34
 
79
- void
80
- ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
- void *data;
35
+ htmlParserCtxtPtr
36
+ ruby_xml_html_parser_file_ctxt(VALUE input) {
37
+ VALUE file = rb_ivar_get(input, FILE_ATTR);
38
+ VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
39
+ VALUE encodingStr = ruby_xml_encoding_to_s(Qnil, encoding);
82
40
 
83
- switch(rxp->data_type) {
84
- case RUBY_LIBXML_SRC_TYPE_NULL:
85
- break;
86
- case RUBY_LIBXML_SRC_TYPE_FILE:
87
- data = (void *)(rx_file_data *)rxp->data;
88
- ruby_xfree((rx_file_data *)data);
89
- break;
90
- case RUBY_LIBXML_SRC_TYPE_STRING:
91
- data = (void *)(rx_string_data *)rxp->data;
92
- ruby_xfree((rx_string_data *)data);
93
- break;
94
- case RUBY_LIBXML_SRC_TYPE_IO:
95
- data = (void *)(rx_io_data *)rxp->data;
96
- ruby_xfree((rx_io_data *)data);
97
- break;
98
- default:
99
- rb_fatal("Unknown data type, %d", rxp->data_type);
100
- }
101
-
102
- ruby_xfree(rxp);
41
+ return htmlCreateFileParserCtxt(StringValuePtr(file), StringValuePtr(encodingStr));
103
42
  }
104
43
 
105
-
106
- /*
107
- * call-seq:
108
- * parser.io -> IO
109
- *
110
- * Obtain the IO instance this parser works with.
111
- */
112
- /*
113
- VALUE
114
- ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
- ruby_xml_html_parser *rxp;
116
- rx_io_data *data;
117
-
118
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
-
120
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
- rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
- rxp->data == NULL)
123
- return(Qnil);
124
-
125
- data = (rx_io_data *)rxp->data;
126
-
127
- return(data->io);
44
+ htmlParserCtxtPtr
45
+ ruby_xml_html_parser_str_ctxt(VALUE input) {
46
+ VALUE data = rb_ivar_get(input, STRING_ATTR);
47
+ return htmlCreateMemoryParserCtxt(StringValuePtr(data), RSTRING_LEN(data));
128
48
  }
129
- */
130
49
 
131
50
  /*
132
- * call-seq:
133
- * parser.io = IO
134
- *
135
- * Set the IO instance this parser works with.
136
- */
137
- /*
138
- VALUE
139
- ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
- ruby_xml_html_parser *rxp;
141
- ruby_xml_parser_context *rxpc;
142
- rx_io_data *data;
51
+ htmlParserCtxtPtr
52
+ ruby_xml_html_parser_io_ctxt(VALUE input) {
53
+ VALUE io = rb_ivar_get(input, IO_ATTR);
54
+ VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
55
+ xmlCharEncoding xmlEncoding = NUM2INT(encoding);
56
+
143
57
  OpenFile *fptr;
144
58
  FILE *f;
145
59
 
146
- if (!rb_obj_is_kind_of(io, rb_cIO))
147
- rb_raise(rb_eTypeError, "need an IO object");
148
-
149
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
-
151
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
- if (rxp->data != NULL)
153
- rb_fatal("crap, this should be null");
154
-
155
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
- data = ALLOC(rx_io_data);
157
- rxp->data = data;
158
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
- return(Qnil);
160
- }
161
-
162
- rxp->ctxt = ruby_xml_parser_context_new();
163
- data = (rx_io_data *)rxp->data;
164
- data->io = io;
165
-
166
60
  GetOpenFile(io, fptr);
167
61
  rb_io_check_readable(fptr);
168
62
  f = GetWriteFile(fptr);
169
63
 
170
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
- rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
64
+ return htmlCreateIOParserCtxt(NULL, NULL,
172
65
  (xmlInputReadCallback) ctxtRead,
173
- NULL, f, XML_CHAR_ENCODING_NONE);
174
- if (NIL_P(rxpc->ctxt))
175
- rb_sys_fail(0);
176
-
177
- return(data->io);
66
+ NULL, f, xmlEncoding);
178
67
  }
179
68
  */
180
69
 
181
- void
182
- ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
- if (rxp == NULL) return;
184
- if (!NIL_P(rxp->ctxt))
185
- rb_gc_mark(rxp->ctxt);
186
-
187
- rb_gc_mark(LIBXML_STATE);
188
-
189
- switch(rxp->data_type) {
190
- case RUBY_LIBXML_SRC_TYPE_NULL:
191
- break;
192
- case RUBY_LIBXML_SRC_TYPE_FILE:
193
- if (!NIL_P(((rx_file_data *)rxp->data)->filename))
194
- rb_gc_mark(((rx_file_data *)rxp->data)->filename);
195
- break;
196
- case RUBY_LIBXML_SRC_TYPE_STRING:
197
- if (!NIL_P(((rx_string_data *)rxp->data)->str))
198
- rb_gc_mark(((rx_string_data *)rxp->data)->str);
199
- break;
200
- case RUBY_LIBXML_SRC_TYPE_IO:
201
- if (!NIL_P(((rx_io_data *)rxp->data)->io))
202
- rb_gc_mark(((rx_io_data *)rxp->data)->io);
203
- break;
204
- default:
205
- rb_fatal("unknown datatype: %d", rxp->data_type);
206
- }
207
- }
208
-
209
-
210
- /*
211
- * call-seq:
212
- * XML::HTMLParser.new -> parser
213
- *
214
- * Create a new parser instance with no pre-determined source.
215
- */
216
- VALUE
217
- ruby_xml_html_parser_new(VALUE class) {
218
- ruby_xml_html_parser *rxp;
219
-
220
- rxp = ALLOC(ruby_xml_html_parser);
221
- rxp->ctxt = Qnil;
222
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
223
- rxp->data = NULL;
224
- rxp->parsed = 0;
225
-
226
- return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
227
- ruby_xml_html_parser_free, rxp));
228
- }
229
-
230
-
231
- /*
232
- * call-seq:
233
- * XML::HTMLParser.file -> parser
234
- *
235
- * Create a new parser instance that will read the specified file.
236
- */
237
- /*
238
- VALUE
239
- ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
240
- VALUE obj;
241
- ruby_xml_html_parser *rxp;
242
- rx_file_data *data;
243
-
244
- obj = ruby_xml_html_parser_new(class);
245
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
246
-
247
- data = ALLOC(rx_file_data);
248
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
249
- rxp->data = data;
250
-
251
- ruby_xml_html_parser_filename_set(obj, filename);
252
-
253
- return(obj);
254
- }
255
- */
256
-
257
- /*
258
- * call-seq:
259
- * XML::HTMLParser.io -> parser
260
- *
261
- * Create a new parser instance that will read from the
262
- * specified IO object.
263
- */
264
- /*
265
- VALUE
266
- ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
267
- VALUE obj;
268
- ruby_xml_html_parser *rxp;
269
- rx_io_data *data;
270
-
271
- obj = ruby_xml_html_parser_new(class);
272
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
273
-
274
- data = ALLOC(rx_io_data);
275
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
276
- rxp->data = data;
277
-
278
- ruby_xml_html_parser_io_set(obj, io);
279
-
280
- return(obj);
281
- }
282
- */
283
-
284
- /*
285
- * call-seq:
286
- * XML::HTMLParser.string -> parser
287
- *
288
- * Create a new parser instance that will parse the given
289
- * string.
290
- */
291
- VALUE
292
- ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
293
- VALUE obj;
294
- ruby_xml_html_parser *rxp;
295
- rx_string_data *data;
296
-
297
- obj = ruby_xml_html_parser_new(class);
298
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
299
-
300
- data = ALLOC(rx_string_data);
301
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
302
- rxp->data = data;
303
-
304
- ruby_xml_html_parser_str_set(obj, str);
305
-
306
- return(obj);
307
- }
308
-
309
-
310
70
  /*
311
71
  * call-seq:
312
72
  * parser.parse -> document
@@ -317,111 +77,39 @@ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
317
77
  */
318
78
  VALUE
319
79
  ruby_xml_html_parser_parse(VALUE self) {
320
- ruby_xml_html_parser *rxp;
321
- ruby_xml_parser_context *rxpc;
322
- htmlDocPtr xdp;
323
- VALUE doc;
324
-
325
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
326
-
327
- switch (rxp->data_type) {
328
- case RUBY_LIBXML_SRC_TYPE_NULL:
329
- return(Qnil);
330
- case RUBY_LIBXML_SRC_TYPE_STRING:
331
- //case RUBY_LIBXML_SRC_TYPE_FILE:
332
- //case RUBY_LIBXML_SRC_TYPE_IO:
333
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
334
-
335
- /* don't check return values here, the HTML parser returns errors
336
- * but still allows the resulting tree to be used.
337
- */
338
- htmlParseDocument(rxpc->ctxt);
339
- xdp = rxpc->ctxt->myDoc;
340
- rxp->parsed = 1;
341
-
342
- doc = ruby_xml_document_wrap(xdp);
343
- break;
344
- default:
345
- rb_fatal("Unknown data type, %d", rxp->data_type);
346
- }
347
-
348
- return(doc);
349
- }
350
-
351
-
352
- /*
353
- * call-seq:
354
- * parser.context -> context
355
- *
356
- * Obtain the XML::Parser::Context associated with this
357
- * parser.
358
- */
359
- VALUE
360
- ruby_xml_html_parser_context_get(VALUE self) {
361
- ruby_xml_html_parser *rxp;
362
-
363
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
364
- if (rxp->ctxt == Qnil)
365
- return(Qnil);
80
+ xmlParserCtxtPtr ctxt;
81
+ VALUE context;
82
+ VALUE input = rb_ivar_get(self, INPUT_ATTR);
83
+
84
+ context = rb_ivar_get(self, CONTEXT_ATTR);
85
+ if (context != Qnil)
86
+ rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
87
+
88
+ if (rb_ivar_get(input, FILE_ATTR) != Qnil)
89
+ ctxt = ruby_xml_html_parser_file_ctxt(input);
90
+ else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
91
+ ctxt = ruby_xml_html_parser_str_ctxt(input);
92
+ /*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
93
+ ctxt = ruby_xml_html_parser_parse_document(input);
94
+ else if (rb_ivar_get(input, IO_ATTR) != Qnil)
95
+ ctxt = ruby_xml_html_parser_io_ctxt(input);*/
366
96
  else
367
- return(rxp->ctxt);
368
- }
369
-
370
-
371
- /*
372
- * call-seq:
373
- * parser.string -> "string"
374
- *
375
- * Obtain the string this parser works with.
376
- */
377
- VALUE
378
- ruby_xml_html_parser_str_get(VALUE self) {
379
- ruby_xml_html_parser *rxp;
380
- rx_string_data *data;
381
-
382
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
383
- if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
384
- return(Qnil);
385
-
386
- data = (rx_string_data *)rxp->data;
387
- return(data->str);
388
- }
389
-
390
-
391
- /*
392
- * call-seq:
393
- * parser.string = "string"
394
- *
395
- * Set the string this parser works with.
396
- */
397
- VALUE
398
- ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
399
- ruby_xml_html_parser *rxp;
400
- ruby_xml_parser_context *rxpc;
401
- rx_string_data *data;
402
-
403
- Check_Type(str, T_STRING);
404
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
405
-
406
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
407
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
408
- data = ALLOC(rx_string_data);
409
- rxp->data = data;
410
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
411
- return(Qnil);
97
+ rb_raise(rb_eArgError, "You must specify a parser data source");
98
+
99
+ if (!ctxt)
100
+ ruby_xml_raise(&xmlLastError);
101
+
102
+ context = ruby_xml_parser_context_wrap(ctxt);
103
+ rb_ivar_set(self, CONTEXT_ATTR, context);
104
+
105
+ if (htmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
106
+ xmlFreeDoc(ctxt->myDoc);
107
+ ruby_xml_raise(&ctxt->lastError);
412
108
  }
413
109
 
414
- rxp->ctxt = ruby_xml_parser_context_new();
415
- data = (rx_string_data *)rxp->data;
416
- data->str = str;
417
-
418
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
419
- rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
420
-
421
- return(data->str);
110
+ return ruby_xml_document_wrap(ctxt->myDoc);
422
111
  }
423
112
 
424
-
425
113
  // Rdoc needs to know
426
114
  #ifdef RDOC_NEVER_DEFINED
427
115
  mLibXML = rb_define_module("LibXML");
@@ -430,20 +118,16 @@ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
430
118
 
431
119
  void
432
120
  ruby_init_html_parser(void) {
121
+ INPUT_ATTR = rb_intern("@input");
122
+ CONTEXT_ATTR = rb_intern("@context");
123
+
433
124
  cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
125
+
126
+ /* Atributes */
127
+ rb_define_attr(cXMLHTMLParser, "input", 1, 0);
128
+ rb_define_attr(cXMLHTMLParser, "context", 1, 0);
434
129
 
435
- rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
436
- rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
437
- /*
438
- rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
439
- rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
440
- rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
- rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
- rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
- rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
- */
130
+ /* Instance methods */
131
+ rb_define_method(cXMLHTMLParser, "initialize", ruby_xml_html_parser_initialize, 0);
445
132
  rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
- rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
- rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
- rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
133
  }