libxml-ruby 0.8.3-x86-mswin32-60 → 0.9.0-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. data/CHANGES +18 -0
  2. data/RAKEFILE +15 -39
  3. data/README +48 -47
  4. data/ext/libxml/libxml.c +847 -22
  5. data/ext/libxml/ruby_libxml.h +71 -95
  6. data/ext/libxml/ruby_xml_attr.c +500 -500
  7. data/ext/libxml/ruby_xml_attributes.c +1 -1
  8. data/ext/libxml/ruby_xml_document.c +1144 -1135
  9. data/ext/libxml/ruby_xml_document.h +4 -11
  10. data/ext/libxml/ruby_xml_dtd.c +27 -0
  11. data/ext/libxml/ruby_xml_encoding.c +164 -0
  12. data/ext/libxml/ruby_xml_encoding.h +13 -0
  13. data/ext/libxml/ruby_xml_error.c +941 -0
  14. data/ext/libxml/ruby_xml_error.h +13 -0
  15. data/ext/libxml/ruby_xml_html_parser.c +71 -387
  16. data/ext/libxml/ruby_xml_html_parser.h +1 -17
  17. data/ext/libxml/ruby_xml_input.c +179 -0
  18. data/ext/libxml/ruby_xml_input.h +18 -0
  19. data/ext/libxml/ruby_xml_input_cbg.c +17 -3
  20. data/ext/libxml/ruby_xml_node.c +1566 -1582
  21. data/ext/libxml/ruby_xml_node.h +1 -4
  22. data/ext/libxml/ruby_xml_ns.c +14 -3
  23. data/ext/libxml/ruby_xml_parser.c +164 -1398
  24. data/ext/libxml/ruby_xml_parser.h +5 -17
  25. data/ext/libxml/ruby_xml_parser_context.c +131 -169
  26. data/ext/libxml/ruby_xml_parser_context.h +2 -9
  27. data/ext/libxml/ruby_xml_reader.c +910 -945
  28. data/ext/libxml/ruby_xml_relaxng.c +32 -3
  29. data/ext/libxml/ruby_xml_sax_parser.c +106 -364
  30. data/ext/libxml/ruby_xml_sax_parser.h +1 -37
  31. data/ext/libxml/ruby_xml_schema.c +174 -145
  32. data/ext/libxml/ruby_xml_xinclude.c +9 -5
  33. data/ext/libxml/ruby_xml_xpath.c +25 -6
  34. data/ext/libxml/ruby_xml_xpath.h +1 -2
  35. data/ext/libxml/ruby_xml_xpath_context.c +17 -19
  36. data/ext/libxml/ruby_xml_xpath_object.c +60 -56
  37. data/ext/libxml/ruby_xml_xpointer.c +11 -5
  38. data/ext/libxml/sax_parser_callbacks.inc +42 -37
  39. data/ext/libxml/version.h +3 -3
  40. data/ext/mingw/Rakefile +20 -27
  41. data/ext/mingw/build.rake +41 -0
  42. data/{lib → ext/mingw}/libiconv-2.dll +0 -0
  43. data/ext/mingw/libxml2-2.dll +0 -0
  44. data/ext/mingw/libxml_ruby.dll.a +0 -0
  45. data/ext/mingw/libxml_ruby.so +0 -0
  46. data/ext/vc/libxml_ruby.vcproj +23 -15
  47. data/lib/libxml.rb +8 -2
  48. data/lib/libxml/document.rb +16 -4
  49. data/lib/libxml/error.rb +84 -0
  50. data/lib/libxml/hpricot.rb +76 -0
  51. data/lib/libxml/html_parser.rb +61 -0
  52. data/lib/libxml/node.rb +36 -25
  53. data/lib/libxml/parser.rb +312 -33
  54. data/lib/libxml/parser_context.rb +17 -0
  55. data/lib/libxml/properties.rb +15 -2
  56. data/lib/libxml/reader.rb +15 -0
  57. data/lib/libxml/sax_callbacks.rb +179 -0
  58. data/lib/libxml/sax_parser.rb +42 -0
  59. data/lib/libxml/tree.rb +1 -2
  60. data/lib/libxml/xpath_object.rb +12 -0
  61. data/test/model/atom.xml +4 -0
  62. data/test/tc_attributes.rb +43 -19
  63. data/test/tc_document.rb +1 -1
  64. data/test/tc_document_write.rb +15 -8
  65. data/test/tc_dtd.rb +36 -20
  66. data/test/tc_encoding.rb +13 -0
  67. data/test/tc_error.rb +136 -0
  68. data/test/tc_node.rb +2 -3
  69. data/test/tc_node_copy.rb +1 -1
  70. data/test/tc_node_edit.rb +6 -0
  71. data/test/tc_ns.rb +18 -0
  72. data/test/tc_parser.rb +113 -228
  73. data/test/tc_parser_context.rb +1 -2
  74. data/test/tc_reader.rb +24 -14
  75. data/test/tc_relaxng.rb +18 -6
  76. data/test/tc_sax_parser.rb +48 -13
  77. data/test/tc_schema.rb +20 -8
  78. data/test/tc_well_formed.rb +2 -1
  79. data/test/tc_xml.rb +212 -0
  80. data/test/tc_xpath.rb +60 -46
  81. data/test/tc_xpointer.rb +7 -11
  82. data/test/test_suite.rb +4 -3
  83. metadata +32 -115
  84. data/doc/rdoc/classes/LibXML.html +0 -241
  85. data/doc/rdoc/classes/LibXML/XML.html +0 -185
  86. data/doc/rdoc/classes/LibXML/XML/Attr.html +0 -1010
  87. data/doc/rdoc/classes/LibXML/XML/Attributes.html +0 -526
  88. data/doc/rdoc/classes/LibXML/XML/Document.html +0 -1489
  89. data/doc/rdoc/classes/LibXML/XML/Dtd.html +0 -213
  90. data/doc/rdoc/classes/LibXML/XML/Error.html +0 -117
  91. data/doc/rdoc/classes/LibXML/XML/HTMLParser.html +0 -348
  92. data/doc/rdoc/classes/LibXML/XML/InputCallbacks.html +0 -160
  93. data/doc/rdoc/classes/LibXML/XML/NS.html +0 -381
  94. data/doc/rdoc/classes/LibXML/XML/Node.html +0 -3396
  95. data/doc/rdoc/classes/LibXML/XML/Node/FailedModify.html +0 -123
  96. data/doc/rdoc/classes/LibXML/XML/Node/Set.html +0 -440
  97. data/doc/rdoc/classes/LibXML/XML/Node/SetNamespace.html +0 -123
  98. data/doc/rdoc/classes/LibXML/XML/Node/UnknownType.html +0 -123
  99. data/doc/rdoc/classes/LibXML/XML/Parser.html +0 -2239
  100. data/doc/rdoc/classes/LibXML/XML/Parser/Context.html +0 -1255
  101. data/doc/rdoc/classes/LibXML/XML/Parser/ParseError.html +0 -123
  102. data/doc/rdoc/classes/LibXML/XML/Reader.html +0 -2264
  103. data/doc/rdoc/classes/LibXML/XML/RelaxNG.html +0 -237
  104. data/doc/rdoc/classes/LibXML/XML/SaxParser.html +0 -415
  105. data/doc/rdoc/classes/LibXML/XML/Schema.html +0 -308
  106. data/doc/rdoc/classes/LibXML/XML/State.html +0 -124
  107. data/doc/rdoc/classes/LibXML/XML/Tree.html +0 -111
  108. data/doc/rdoc/classes/LibXML/XML/XInclude.html +0 -123
  109. data/doc/rdoc/classes/LibXML/XML/XInclude/Error.html +0 -117
  110. data/doc/rdoc/classes/LibXML/XML/XMLParserOptions.html +0 -198
  111. data/doc/rdoc/classes/LibXML/XML/XPath.html +0 -184
  112. data/doc/rdoc/classes/LibXML/XML/XPath/Context.html +0 -404
  113. data/doc/rdoc/classes/LibXML/XML/XPath/InvalidPath.html +0 -172
  114. data/doc/rdoc/classes/LibXML/XML/XPath/Object.html +0 -627
  115. data/doc/rdoc/classes/LibXML/XML/XPointer.html +0 -170
  116. data/doc/rdoc/classes/LibXML/XML/XPointer/Context.html +0 -123
  117. data/doc/rdoc/classes/LibXML/XML/XPointer/Context/InvalidPath.html +0 -117
  118. data/doc/rdoc/classes/LibXML/XML/XPointer/InvalidExpression.html +0 -124
  119. data/doc/rdoc/classes/singleton.html +0 -114
  120. data/doc/rdoc/created.rid +0 -1
  121. data/doc/rdoc/files/CHANGES.html +0 -442
  122. data/doc/rdoc/files/LICENSE.html +0 -133
  123. data/doc/rdoc/files/README.html +0 -388
  124. data/doc/rdoc/files/VERSION.html +0 -107
  125. data/doc/rdoc/files/ext/libxml/cbg_c.html +0 -101
  126. data/doc/rdoc/files/ext/libxml/libxml_c.html +0 -101
  127. data/doc/rdoc/files/ext/libxml/ruby_xml_attr_c.html +0 -101
  128. data/doc/rdoc/files/ext/libxml/ruby_xml_attributes_c.html +0 -101
  129. data/doc/rdoc/files/ext/libxml/ruby_xml_document_c.html +0 -101
  130. data/doc/rdoc/files/ext/libxml/ruby_xml_dtd_c.html +0 -101
  131. data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_c.html +0 -101
  132. data/doc/rdoc/files/ext/libxml/ruby_xml_input_cbg_c.html +0 -101
  133. data/doc/rdoc/files/ext/libxml/ruby_xml_node_c.html +0 -101
  134. data/doc/rdoc/files/ext/libxml/ruby_xml_node_set_c.html +0 -101
  135. data/doc/rdoc/files/ext/libxml/ruby_xml_ns_c.html +0 -101
  136. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_c.html +0 -101
  137. data/doc/rdoc/files/ext/libxml/ruby_xml_parser_context_c.html +0 -101
  138. data/doc/rdoc/files/ext/libxml/ruby_xml_reader_c.html +0 -101
  139. data/doc/rdoc/files/ext/libxml/ruby_xml_relaxng_c.html +0 -101
  140. data/doc/rdoc/files/ext/libxml/ruby_xml_sax_parser_c.html +0 -101
  141. data/doc/rdoc/files/ext/libxml/ruby_xml_schema_c.html +0 -101
  142. data/doc/rdoc/files/ext/libxml/ruby_xml_state_c.html +0 -101
  143. data/doc/rdoc/files/ext/libxml/ruby_xml_xinclude_c.html +0 -101
  144. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_c.html +0 -101
  145. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_context_c.html +0 -101
  146. data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_object_c.html +0 -101
  147. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_c.html +0 -101
  148. data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_context_c.html +0 -101
  149. data/doc/rdoc/files/lib/libxml/attr_rb.html +0 -108
  150. data/doc/rdoc/files/lib/libxml/attributes_rb.html +0 -108
  151. data/doc/rdoc/files/lib/libxml/document_rb.html +0 -108
  152. data/doc/rdoc/files/lib/libxml/node_rb.html +0 -108
  153. data/doc/rdoc/files/lib/libxml/node_set_rb.html +0 -108
  154. data/doc/rdoc/files/lib/libxml/parser_options_rb.html +0 -107
  155. data/doc/rdoc/files/lib/libxml/parser_rb.html +0 -101
  156. data/doc/rdoc/files/lib/libxml/properties_rb.html +0 -108
  157. data/doc/rdoc/files/lib/libxml/tree_rb.html +0 -107
  158. data/doc/rdoc/files/lib/libxml_rb.html +0 -124
  159. data/doc/rdoc/files/lib/xml/libxml_rb.html +0 -124
  160. data/doc/rdoc/files/lib/xml_rb.html +0 -134
  161. data/doc/rdoc/fr_class_index.html +0 -62
  162. data/doc/rdoc/fr_file_index.html +0 -66
  163. data/doc/rdoc/fr_method_index.html +0 -392
  164. data/doc/rdoc/index.html +0 -24
  165. data/doc/rdoc/rdoc-style.css +0 -208
  166. data/ext/libxml/ruby_xml_node_set.c +0 -172
  167. data/ext/libxml/ruby_xml_node_set.h +0 -20
  168. data/ext/libxml/ruby_xml_xpointer_context.c +0 -22
  169. data/ext/libxml/ruby_xml_xpointer_context.h +0 -18
  170. data/lib/libxml/node_set.rb +0 -27
  171. data/lib/libxml2-2.dll +0 -0
  172. data/lib/libxml_ruby.dll.a +0 -0
  173. data/lib/libxml_ruby.so +0 -0
  174. data/test/tc_node_set.rb +0 -24
  175. data/test/tc_node_set2.rb +0 -37
@@ -0,0 +1,13 @@
1
+ /* $Id: ruby_xml_ns.h 324 2008-07-08 23:00:02Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RUBY_XML_ERROR__
6
+ #define __RUBY_XML_ERROR__
7
+
8
+ extern VALUE eXMLError;
9
+
10
+ void ruby_init_xml_error();
11
+ void ruby_xml_raise(xmlErrorPtr xerror);
12
+
13
+ #endif
@@ -1,312 +1,72 @@
1
- /* $Id: ruby_xml_html_parser.c 461 2008-07-15 21:35:56Z cfis $ */
1
+ /* $Id: ruby_xml_html_parser.c 580 2008-11-18 09:24:43Z cfis $ */
2
2
 
3
3
  /* Please see the LICENSE file for copyright and distribution information */
4
4
 
5
5
  #include "ruby_libxml.h"
6
6
 
7
7
  VALUE cXMLHTMLParser;
8
-
9
- //static int
10
- //ctxtRead(FILE *f, char * buf, int len) {
11
- // return(fread(buf, 1, len, f));
12
- //}
8
+ ID INPUT_ATTR;
9
+ ID CONTEXT_ATTR;
13
10
 
14
11
 
15
12
  /*
16
- * call-seq:
17
- * parser.filename -> "filename"
18
- *
19
- * Obtain the filename this parser will read from.
20
- */
21
- /*
22
- VALUE
23
- ruby_xml_html_parser_filename_get(VALUE self) {
24
- ruby_xml_html_parser *rxp;
25
- rx_file_data *data;
13
+ * Document-class: LibXML::XML::HTMLParser
14
+ *
15
+ * The HTML parser implements an HTML 4.0 non-verifying parser with an API
16
+ * compatible with the XML::Parser. In contrast with the XML::Parser,
17
+ * it can parse "real world" HTML, even if it severely broken from a
18
+ * specification point of view. */
26
19
 
27
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
28
- if (rxp->data == NULL)
29
- return(Qnil);
30
-
31
- if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
32
- return(Qnil);
33
-
34
- data = (rx_file_data *)rxp->data;
35
- return(data->filename);
36
- }
37
- */
38
20
 
39
21
  /*
40
22
  * call-seq:
41
- * parser.filename = "filename"
23
+ * XML::HTMLParser.initialize -> parser
42
24
  *
43
- * Set the filename this parser will read from.
25
+ * Initializes a new parser instance with no pre-determined source.
44
26
  */
45
- /*
46
27
  VALUE
47
- ruby_xml_html_parser_filename_set(VALUE self, VALUE filename) {
48
- ruby_xml_html_parser *rxp;
49
- ruby_xml_parser_context *rxpc;
50
- rx_file_data *data;
51
-
52
- Check_Type(filename, T_STRING);
53
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
54
-
55
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
56
- if (rxp->data != NULL)
57
- rb_fatal("crap, this should be null");
58
-
59
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
60
- data = ALLOC(rx_file_data);
61
- rxp->data = data;
62
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
63
- return(Qnil);
64
- }
65
-
66
- rxp->ctxt = ruby_xml_parser_context_new();
67
- data = (rx_file_data *)rxp->data;
68
- data->filename = filename;
69
-
70
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
71
- rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
72
- if (rxpc->ctxt == NULL)
73
- rb_sys_fail(StringValuePtr(filename));
74
-
75
- return(data->filename);
28
+ ruby_xml_html_parser_initialize(VALUE self) {
29
+ VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
30
+ rb_iv_set(self, "@input", input);
31
+ rb_iv_set(self, "@context", Qnil);
32
+ return self;
76
33
  }
77
- */
78
34
 
79
- void
80
- ruby_xml_html_parser_free(ruby_xml_html_parser *rxp) {
81
- void *data;
35
+ htmlParserCtxtPtr
36
+ ruby_xml_html_parser_file_ctxt(VALUE input) {
37
+ VALUE file = rb_ivar_get(input, FILE_ATTR);
38
+ VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
39
+ VALUE encodingStr = ruby_xml_encoding_to_s(Qnil, encoding);
82
40
 
83
- switch(rxp->data_type) {
84
- case RUBY_LIBXML_SRC_TYPE_NULL:
85
- break;
86
- case RUBY_LIBXML_SRC_TYPE_FILE:
87
- data = (void *)(rx_file_data *)rxp->data;
88
- ruby_xfree((rx_file_data *)data);
89
- break;
90
- case RUBY_LIBXML_SRC_TYPE_STRING:
91
- data = (void *)(rx_string_data *)rxp->data;
92
- ruby_xfree((rx_string_data *)data);
93
- break;
94
- case RUBY_LIBXML_SRC_TYPE_IO:
95
- data = (void *)(rx_io_data *)rxp->data;
96
- ruby_xfree((rx_io_data *)data);
97
- break;
98
- default:
99
- rb_fatal("Unknown data type, %d", rxp->data_type);
100
- }
101
-
102
- ruby_xfree(rxp);
41
+ return htmlCreateFileParserCtxt(StringValuePtr(file), StringValuePtr(encodingStr));
103
42
  }
104
43
 
105
-
106
- /*
107
- * call-seq:
108
- * parser.io -> IO
109
- *
110
- * Obtain the IO instance this parser works with.
111
- */
112
- /*
113
- VALUE
114
- ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
115
- ruby_xml_html_parser *rxp;
116
- rx_io_data *data;
117
-
118
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
119
-
120
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
121
- rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
122
- rxp->data == NULL)
123
- return(Qnil);
124
-
125
- data = (rx_io_data *)rxp->data;
126
-
127
- return(data->io);
44
+ htmlParserCtxtPtr
45
+ ruby_xml_html_parser_str_ctxt(VALUE input) {
46
+ VALUE data = rb_ivar_get(input, STRING_ATTR);
47
+ return htmlCreateMemoryParserCtxt(StringValuePtr(data), RSTRING_LEN(data));
128
48
  }
129
- */
130
49
 
131
50
  /*
132
- * call-seq:
133
- * parser.io = IO
134
- *
135
- * Set the IO instance this parser works with.
136
- */
137
- /*
138
- VALUE
139
- ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
140
- ruby_xml_html_parser *rxp;
141
- ruby_xml_parser_context *rxpc;
142
- rx_io_data *data;
51
+ htmlParserCtxtPtr
52
+ ruby_xml_html_parser_io_ctxt(VALUE input) {
53
+ VALUE io = rb_ivar_get(input, IO_ATTR);
54
+ VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
55
+ xmlCharEncoding xmlEncoding = NUM2INT(encoding);
56
+
143
57
  OpenFile *fptr;
144
58
  FILE *f;
145
59
 
146
- if (!rb_obj_is_kind_of(io, rb_cIO))
147
- rb_raise(rb_eTypeError, "need an IO object");
148
-
149
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
150
-
151
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
152
- if (rxp->data != NULL)
153
- rb_fatal("crap, this should be null");
154
-
155
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
156
- data = ALLOC(rx_io_data);
157
- rxp->data = data;
158
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
159
- return(Qnil);
160
- }
161
-
162
- rxp->ctxt = ruby_xml_parser_context_new();
163
- data = (rx_io_data *)rxp->data;
164
- data->io = io;
165
-
166
60
  GetOpenFile(io, fptr);
167
61
  rb_io_check_readable(fptr);
168
62
  f = GetWriteFile(fptr);
169
63
 
170
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
171
- rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
64
+ return htmlCreateIOParserCtxt(NULL, NULL,
172
65
  (xmlInputReadCallback) ctxtRead,
173
- NULL, f, XML_CHAR_ENCODING_NONE);
174
- if (NIL_P(rxpc->ctxt))
175
- rb_sys_fail(0);
176
-
177
- return(data->io);
66
+ NULL, f, xmlEncoding);
178
67
  }
179
68
  */
180
69
 
181
- void
182
- ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
183
- if (rxp == NULL) return;
184
- if (!NIL_P(rxp->ctxt))
185
- rb_gc_mark(rxp->ctxt);
186
-
187
- rb_gc_mark(LIBXML_STATE);
188
-
189
- switch(rxp->data_type) {
190
- case RUBY_LIBXML_SRC_TYPE_NULL:
191
- break;
192
- case RUBY_LIBXML_SRC_TYPE_FILE:
193
- if (!NIL_P(((rx_file_data *)rxp->data)->filename))
194
- rb_gc_mark(((rx_file_data *)rxp->data)->filename);
195
- break;
196
- case RUBY_LIBXML_SRC_TYPE_STRING:
197
- if (!NIL_P(((rx_string_data *)rxp->data)->str))
198
- rb_gc_mark(((rx_string_data *)rxp->data)->str);
199
- break;
200
- case RUBY_LIBXML_SRC_TYPE_IO:
201
- if (!NIL_P(((rx_io_data *)rxp->data)->io))
202
- rb_gc_mark(((rx_io_data *)rxp->data)->io);
203
- break;
204
- default:
205
- rb_fatal("unknown datatype: %d", rxp->data_type);
206
- }
207
- }
208
-
209
-
210
- /*
211
- * call-seq:
212
- * XML::HTMLParser.new -> parser
213
- *
214
- * Create a new parser instance with no pre-determined source.
215
- */
216
- VALUE
217
- ruby_xml_html_parser_new(VALUE class) {
218
- ruby_xml_html_parser *rxp;
219
-
220
- rxp = ALLOC(ruby_xml_html_parser);
221
- rxp->ctxt = Qnil;
222
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
223
- rxp->data = NULL;
224
- rxp->parsed = 0;
225
-
226
- return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
227
- ruby_xml_html_parser_free, rxp));
228
- }
229
-
230
-
231
- /*
232
- * call-seq:
233
- * XML::HTMLParser.file -> parser
234
- *
235
- * Create a new parser instance that will read the specified file.
236
- */
237
- /*
238
- VALUE
239
- ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
240
- VALUE obj;
241
- ruby_xml_html_parser *rxp;
242
- rx_file_data *data;
243
-
244
- obj = ruby_xml_html_parser_new(class);
245
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
246
-
247
- data = ALLOC(rx_file_data);
248
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
249
- rxp->data = data;
250
-
251
- ruby_xml_html_parser_filename_set(obj, filename);
252
-
253
- return(obj);
254
- }
255
- */
256
-
257
- /*
258
- * call-seq:
259
- * XML::HTMLParser.io -> parser
260
- *
261
- * Create a new parser instance that will read from the
262
- * specified IO object.
263
- */
264
- /*
265
- VALUE
266
- ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
267
- VALUE obj;
268
- ruby_xml_html_parser *rxp;
269
- rx_io_data *data;
270
-
271
- obj = ruby_xml_html_parser_new(class);
272
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
273
-
274
- data = ALLOC(rx_io_data);
275
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
276
- rxp->data = data;
277
-
278
- ruby_xml_html_parser_io_set(obj, io);
279
-
280
- return(obj);
281
- }
282
- */
283
-
284
- /*
285
- * call-seq:
286
- * XML::HTMLParser.string -> parser
287
- *
288
- * Create a new parser instance that will parse the given
289
- * string.
290
- */
291
- VALUE
292
- ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
293
- VALUE obj;
294
- ruby_xml_html_parser *rxp;
295
- rx_string_data *data;
296
-
297
- obj = ruby_xml_html_parser_new(class);
298
- Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
299
-
300
- data = ALLOC(rx_string_data);
301
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
302
- rxp->data = data;
303
-
304
- ruby_xml_html_parser_str_set(obj, str);
305
-
306
- return(obj);
307
- }
308
-
309
-
310
70
  /*
311
71
  * call-seq:
312
72
  * parser.parse -> document
@@ -317,111 +77,39 @@ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
317
77
  */
318
78
  VALUE
319
79
  ruby_xml_html_parser_parse(VALUE self) {
320
- ruby_xml_html_parser *rxp;
321
- ruby_xml_parser_context *rxpc;
322
- htmlDocPtr xdp;
323
- VALUE doc;
324
-
325
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
326
-
327
- switch (rxp->data_type) {
328
- case RUBY_LIBXML_SRC_TYPE_NULL:
329
- return(Qnil);
330
- case RUBY_LIBXML_SRC_TYPE_STRING:
331
- //case RUBY_LIBXML_SRC_TYPE_FILE:
332
- //case RUBY_LIBXML_SRC_TYPE_IO:
333
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
334
-
335
- /* don't check return values here, the HTML parser returns errors
336
- * but still allows the resulting tree to be used.
337
- */
338
- htmlParseDocument(rxpc->ctxt);
339
- xdp = rxpc->ctxt->myDoc;
340
- rxp->parsed = 1;
341
-
342
- doc = ruby_xml_document_wrap(xdp);
343
- break;
344
- default:
345
- rb_fatal("Unknown data type, %d", rxp->data_type);
346
- }
347
-
348
- return(doc);
349
- }
350
-
351
-
352
- /*
353
- * call-seq:
354
- * parser.context -> context
355
- *
356
- * Obtain the XML::Parser::Context associated with this
357
- * parser.
358
- */
359
- VALUE
360
- ruby_xml_html_parser_context_get(VALUE self) {
361
- ruby_xml_html_parser *rxp;
362
-
363
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
364
- if (rxp->ctxt == Qnil)
365
- return(Qnil);
80
+ xmlParserCtxtPtr ctxt;
81
+ VALUE context;
82
+ VALUE input = rb_ivar_get(self, INPUT_ATTR);
83
+
84
+ context = rb_ivar_get(self, CONTEXT_ATTR);
85
+ if (context != Qnil)
86
+ rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
87
+
88
+ if (rb_ivar_get(input, FILE_ATTR) != Qnil)
89
+ ctxt = ruby_xml_html_parser_file_ctxt(input);
90
+ else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
91
+ ctxt = ruby_xml_html_parser_str_ctxt(input);
92
+ /*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
93
+ ctxt = ruby_xml_html_parser_parse_document(input);
94
+ else if (rb_ivar_get(input, IO_ATTR) != Qnil)
95
+ ctxt = ruby_xml_html_parser_io_ctxt(input);*/
366
96
  else
367
- return(rxp->ctxt);
368
- }
369
-
370
-
371
- /*
372
- * call-seq:
373
- * parser.string -> "string"
374
- *
375
- * Obtain the string this parser works with.
376
- */
377
- VALUE
378
- ruby_xml_html_parser_str_get(VALUE self) {
379
- ruby_xml_html_parser *rxp;
380
- rx_string_data *data;
381
-
382
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
383
- if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
384
- return(Qnil);
385
-
386
- data = (rx_string_data *)rxp->data;
387
- return(data->str);
388
- }
389
-
390
-
391
- /*
392
- * call-seq:
393
- * parser.string = "string"
394
- *
395
- * Set the string this parser works with.
396
- */
397
- VALUE
398
- ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
399
- ruby_xml_html_parser *rxp;
400
- ruby_xml_parser_context *rxpc;
401
- rx_string_data *data;
402
-
403
- Check_Type(str, T_STRING);
404
- Data_Get_Struct(self, ruby_xml_html_parser, rxp);
405
-
406
- if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
407
- rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
408
- data = ALLOC(rx_string_data);
409
- rxp->data = data;
410
- } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
411
- return(Qnil);
97
+ rb_raise(rb_eArgError, "You must specify a parser data source");
98
+
99
+ if (!ctxt)
100
+ ruby_xml_raise(&xmlLastError);
101
+
102
+ context = ruby_xml_parser_context_wrap(ctxt);
103
+ rb_ivar_set(self, CONTEXT_ATTR, context);
104
+
105
+ if (htmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
106
+ xmlFreeDoc(ctxt->myDoc);
107
+ ruby_xml_raise(&ctxt->lastError);
412
108
  }
413
109
 
414
- rxp->ctxt = ruby_xml_parser_context_new();
415
- data = (rx_string_data *)rxp->data;
416
- data->str = str;
417
-
418
- Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
419
- rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
420
-
421
- return(data->str);
110
+ return ruby_xml_document_wrap(ctxt->myDoc);
422
111
  }
423
112
 
424
-
425
113
  // Rdoc needs to know
426
114
  #ifdef RDOC_NEVER_DEFINED
427
115
  mLibXML = rb_define_module("LibXML");
@@ -430,20 +118,16 @@ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
430
118
 
431
119
  void
432
120
  ruby_init_html_parser(void) {
121
+ INPUT_ATTR = rb_intern("@input");
122
+ CONTEXT_ATTR = rb_intern("@context");
123
+
433
124
  cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
125
+
126
+ /* Atributes */
127
+ rb_define_attr(cXMLHTMLParser, "input", 1, 0);
128
+ rb_define_attr(cXMLHTMLParser, "context", 1, 0);
434
129
 
435
- rb_define_singleton_method(cXMLHTMLParser, "new", ruby_xml_html_parser_new, 0);
436
- rb_define_singleton_method(cXMLHTMLParser, "string", ruby_xml_html_parser_new_string, 1);
437
- /*
438
- rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
439
- rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
440
- rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
441
- rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
442
- rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
443
- rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
444
- */
130
+ /* Instance methods */
131
+ rb_define_method(cXMLHTMLParser, "initialize", ruby_xml_html_parser_initialize, 0);
445
132
  rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
446
- rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
447
- rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
448
- rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
449
133
  }