libxml-ruby 0.8.3 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +18 -0
- data/RAKEFILE +15 -39
- data/README +48 -47
- data/ext/libxml/libxml.c +847 -22
- data/ext/libxml/ruby_libxml.h +71 -95
- data/ext/libxml/ruby_xml_attr.c +500 -500
- data/ext/libxml/ruby_xml_attributes.c +1 -1
- data/ext/libxml/ruby_xml_document.c +1144 -1135
- data/ext/libxml/ruby_xml_document.h +4 -11
- data/ext/libxml/ruby_xml_dtd.c +27 -0
- data/ext/libxml/ruby_xml_encoding.c +164 -0
- data/ext/libxml/ruby_xml_encoding.h +13 -0
- data/ext/libxml/ruby_xml_error.c +941 -0
- data/ext/libxml/ruby_xml_error.h +13 -0
- data/ext/libxml/ruby_xml_html_parser.c +71 -387
- data/ext/libxml/ruby_xml_html_parser.h +1 -17
- data/ext/libxml/ruby_xml_input.c +179 -0
- data/ext/libxml/ruby_xml_input.h +18 -0
- data/ext/libxml/ruby_xml_input_cbg.c +17 -3
- data/ext/libxml/ruby_xml_node.c +1566 -1582
- data/ext/libxml/ruby_xml_node.h +1 -4
- data/ext/libxml/ruby_xml_ns.c +14 -3
- data/ext/libxml/ruby_xml_parser.c +164 -1398
- data/ext/libxml/ruby_xml_parser.h +5 -17
- data/ext/libxml/ruby_xml_parser_context.c +131 -169
- data/ext/libxml/ruby_xml_parser_context.h +2 -9
- data/ext/libxml/ruby_xml_reader.c +910 -945
- data/ext/libxml/ruby_xml_relaxng.c +32 -3
- data/ext/libxml/ruby_xml_sax_parser.c +106 -364
- data/ext/libxml/ruby_xml_sax_parser.h +1 -37
- data/ext/libxml/ruby_xml_schema.c +174 -145
- data/ext/libxml/ruby_xml_xinclude.c +9 -5
- data/ext/libxml/ruby_xml_xpath.c +25 -6
- data/ext/libxml/ruby_xml_xpath.h +1 -2
- data/ext/libxml/ruby_xml_xpath_context.c +17 -19
- data/ext/libxml/ruby_xml_xpath_object.c +60 -56
- data/ext/libxml/ruby_xml_xpointer.c +11 -5
- data/ext/libxml/sax_parser_callbacks.inc +42 -37
- data/ext/libxml/version.h +3 -3
- data/ext/mingw/Rakefile +20 -27
- data/ext/mingw/build.rake +41 -0
- data/ext/vc/libxml_ruby.vcproj +23 -15
- data/lib/libxml.rb +8 -2
- data/lib/libxml/document.rb +16 -4
- data/lib/libxml/error.rb +84 -0
- data/lib/libxml/hpricot.rb +76 -0
- data/lib/libxml/html_parser.rb +61 -0
- data/lib/libxml/node.rb +36 -25
- data/lib/libxml/parser.rb +312 -33
- data/lib/libxml/parser_context.rb +17 -0
- data/lib/libxml/properties.rb +15 -2
- data/lib/libxml/reader.rb +15 -0
- data/lib/libxml/sax_callbacks.rb +179 -0
- data/lib/libxml/sax_parser.rb +42 -0
- data/lib/libxml/tree.rb +1 -2
- data/lib/libxml/xpath_object.rb +12 -0
- data/test/model/atom.xml +4 -0
- data/test/tc_attributes.rb +43 -19
- data/test/tc_document.rb +1 -1
- data/test/tc_document_write.rb +15 -8
- data/test/tc_dtd.rb +36 -20
- data/test/tc_encoding.rb +13 -0
- data/test/tc_error.rb +136 -0
- data/test/tc_node.rb +2 -3
- data/test/tc_node_copy.rb +1 -1
- data/test/tc_node_edit.rb +6 -0
- data/test/tc_ns.rb +18 -0
- data/test/tc_parser.rb +113 -228
- data/test/tc_parser_context.rb +1 -2
- data/test/tc_reader.rb +24 -14
- data/test/tc_relaxng.rb +18 -6
- data/test/tc_sax_parser.rb +48 -13
- data/test/tc_schema.rb +20 -8
- data/test/tc_well_formed.rb +2 -1
- data/test/tc_xml.rb +212 -0
- data/test/tc_xpath.rb +60 -46
- data/test/tc_xpointer.rb +7 -11
- data/test/test_suite.rb +4 -3
- metadata +26 -109
- data/doc/rdoc/classes/LibXML.html +0 -241
- data/doc/rdoc/classes/LibXML/XML.html +0 -185
- data/doc/rdoc/classes/LibXML/XML/Attr.html +0 -1010
- data/doc/rdoc/classes/LibXML/XML/Attributes.html +0 -526
- data/doc/rdoc/classes/LibXML/XML/Document.html +0 -1489
- data/doc/rdoc/classes/LibXML/XML/Dtd.html +0 -213
- data/doc/rdoc/classes/LibXML/XML/Error.html +0 -117
- data/doc/rdoc/classes/LibXML/XML/HTMLParser.html +0 -348
- data/doc/rdoc/classes/LibXML/XML/InputCallbacks.html +0 -160
- data/doc/rdoc/classes/LibXML/XML/NS.html +0 -381
- data/doc/rdoc/classes/LibXML/XML/Node.html +0 -3396
- data/doc/rdoc/classes/LibXML/XML/Node/FailedModify.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/Node/Set.html +0 -440
- data/doc/rdoc/classes/LibXML/XML/Node/SetNamespace.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/Node/UnknownType.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/Parser.html +0 -2239
- data/doc/rdoc/classes/LibXML/XML/Parser/Context.html +0 -1255
- data/doc/rdoc/classes/LibXML/XML/Parser/ParseError.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/Reader.html +0 -2264
- data/doc/rdoc/classes/LibXML/XML/RelaxNG.html +0 -237
- data/doc/rdoc/classes/LibXML/XML/SaxParser.html +0 -415
- data/doc/rdoc/classes/LibXML/XML/Schema.html +0 -308
- data/doc/rdoc/classes/LibXML/XML/State.html +0 -124
- data/doc/rdoc/classes/LibXML/XML/Tree.html +0 -111
- data/doc/rdoc/classes/LibXML/XML/XInclude.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/XInclude/Error.html +0 -117
- data/doc/rdoc/classes/LibXML/XML/XMLParserOptions.html +0 -198
- data/doc/rdoc/classes/LibXML/XML/XPath.html +0 -184
- data/doc/rdoc/classes/LibXML/XML/XPath/Context.html +0 -404
- data/doc/rdoc/classes/LibXML/XML/XPath/InvalidPath.html +0 -172
- data/doc/rdoc/classes/LibXML/XML/XPath/Object.html +0 -627
- data/doc/rdoc/classes/LibXML/XML/XPointer.html +0 -170
- data/doc/rdoc/classes/LibXML/XML/XPointer/Context.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/XPointer/Context/InvalidPath.html +0 -117
- data/doc/rdoc/classes/LibXML/XML/XPointer/InvalidExpression.html +0 -124
- data/doc/rdoc/classes/singleton.html +0 -114
- data/doc/rdoc/created.rid +0 -1
- data/doc/rdoc/files/CHANGES.html +0 -442
- data/doc/rdoc/files/LICENSE.html +0 -133
- data/doc/rdoc/files/README.html +0 -388
- data/doc/rdoc/files/VERSION.html +0 -107
- data/doc/rdoc/files/ext/libxml/cbg_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/libxml_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_attr_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_attributes_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_document_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_dtd_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_input_cbg_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_node_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_node_set_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_ns_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_parser_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_parser_context_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_reader_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_relaxng_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_sax_parser_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_schema_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_state_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xinclude_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_context_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_object_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_context_c.html +0 -101
- data/doc/rdoc/files/lib/libxml/attr_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/attributes_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/document_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/node_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/node_set_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/parser_options_rb.html +0 -107
- data/doc/rdoc/files/lib/libxml/parser_rb.html +0 -101
- data/doc/rdoc/files/lib/libxml/properties_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/tree_rb.html +0 -107
- data/doc/rdoc/files/lib/libxml_rb.html +0 -124
- data/doc/rdoc/files/lib/xml/libxml_rb.html +0 -124
- data/doc/rdoc/files/lib/xml_rb.html +0 -134
- data/doc/rdoc/fr_class_index.html +0 -62
- data/doc/rdoc/fr_file_index.html +0 -66
- data/doc/rdoc/fr_method_index.html +0 -392
- data/doc/rdoc/index.html +0 -24
- data/doc/rdoc/rdoc-style.css +0 -208
- data/ext/libxml/ruby_xml_node_set.c +0 -172
- data/ext/libxml/ruby_xml_node_set.h +0 -20
- data/ext/libxml/ruby_xml_xpointer_context.c +0 -22
- data/ext/libxml/ruby_xml_xpointer_context.h +0 -18
- data/lib/libxml/node_set.rb +0 -27
- data/test/tc_node_set.rb +0 -24
- data/test/tc_node_set2.rb +0 -37
@@ -0,0 +1,13 @@
|
|
1
|
+
/* $Id: ruby_xml_ns.h 324 2008-07-08 23:00:02Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#ifndef __RUBY_XML_ERROR__
|
6
|
+
#define __RUBY_XML_ERROR__
|
7
|
+
|
8
|
+
extern VALUE eXMLError;
|
9
|
+
|
10
|
+
void ruby_init_xml_error();
|
11
|
+
void ruby_xml_raise(xmlErrorPtr xerror);
|
12
|
+
|
13
|
+
#endif
|
@@ -1,312 +1,72 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c
|
1
|
+
/* $Id: ruby_xml_html_parser.c 580 2008-11-18 09:24:43Z cfis $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
5
5
|
#include "ruby_libxml.h"
|
6
6
|
|
7
7
|
VALUE cXMLHTMLParser;
|
8
|
-
|
9
|
-
|
10
|
-
//ctxtRead(FILE *f, char * buf, int len) {
|
11
|
-
// return(fread(buf, 1, len, f));
|
12
|
-
//}
|
8
|
+
ID INPUT_ATTR;
|
9
|
+
ID CONTEXT_ATTR;
|
13
10
|
|
14
11
|
|
15
12
|
/*
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
VALUE
|
23
|
-
ruby_xml_html_parser_filename_get(VALUE self) {
|
24
|
-
ruby_xml_html_parser *rxp;
|
25
|
-
rx_file_data *data;
|
13
|
+
* Document-class: LibXML::XML::HTMLParser
|
14
|
+
*
|
15
|
+
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
16
|
+
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
17
|
+
* it can parse "real world" HTML, even if it severely broken from a
|
18
|
+
* specification point of view. */
|
26
19
|
|
27
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
28
|
-
if (rxp->data == NULL)
|
29
|
-
return(Qnil);
|
30
|
-
|
31
|
-
if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
|
32
|
-
return(Qnil);
|
33
|
-
|
34
|
-
data = (rx_file_data *)rxp->data;
|
35
|
-
return(data->filename);
|
36
|
-
}
|
37
|
-
*/
|
38
20
|
|
39
21
|
/*
|
40
22
|
* call-seq:
|
41
|
-
*
|
23
|
+
* XML::HTMLParser.initialize -> parser
|
42
24
|
*
|
43
|
-
*
|
25
|
+
* Initializes a new parser instance with no pre-determined source.
|
44
26
|
*/
|
45
|
-
/*
|
46
27
|
VALUE
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
Check_Type(filename, T_STRING);
|
53
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
54
|
-
|
55
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
56
|
-
if (rxp->data != NULL)
|
57
|
-
rb_fatal("crap, this should be null");
|
58
|
-
|
59
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
60
|
-
data = ALLOC(rx_file_data);
|
61
|
-
rxp->data = data;
|
62
|
-
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
|
63
|
-
return(Qnil);
|
64
|
-
}
|
65
|
-
|
66
|
-
rxp->ctxt = ruby_xml_parser_context_new();
|
67
|
-
data = (rx_file_data *)rxp->data;
|
68
|
-
data->filename = filename;
|
69
|
-
|
70
|
-
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
71
|
-
rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
|
72
|
-
if (rxpc->ctxt == NULL)
|
73
|
-
rb_sys_fail(StringValuePtr(filename));
|
74
|
-
|
75
|
-
return(data->filename);
|
28
|
+
ruby_xml_html_parser_initialize(VALUE self) {
|
29
|
+
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
30
|
+
rb_iv_set(self, "@input", input);
|
31
|
+
rb_iv_set(self, "@context", Qnil);
|
32
|
+
return self;
|
76
33
|
}
|
77
|
-
*/
|
78
34
|
|
79
|
-
|
80
|
-
|
81
|
-
|
35
|
+
htmlParserCtxtPtr
|
36
|
+
ruby_xml_html_parser_file_ctxt(VALUE input) {
|
37
|
+
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
38
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
39
|
+
VALUE encodingStr = ruby_xml_encoding_to_s(Qnil, encoding);
|
82
40
|
|
83
|
-
|
84
|
-
case RUBY_LIBXML_SRC_TYPE_NULL:
|
85
|
-
break;
|
86
|
-
case RUBY_LIBXML_SRC_TYPE_FILE:
|
87
|
-
data = (void *)(rx_file_data *)rxp->data;
|
88
|
-
ruby_xfree((rx_file_data *)data);
|
89
|
-
break;
|
90
|
-
case RUBY_LIBXML_SRC_TYPE_STRING:
|
91
|
-
data = (void *)(rx_string_data *)rxp->data;
|
92
|
-
ruby_xfree((rx_string_data *)data);
|
93
|
-
break;
|
94
|
-
case RUBY_LIBXML_SRC_TYPE_IO:
|
95
|
-
data = (void *)(rx_io_data *)rxp->data;
|
96
|
-
ruby_xfree((rx_io_data *)data);
|
97
|
-
break;
|
98
|
-
default:
|
99
|
-
rb_fatal("Unknown data type, %d", rxp->data_type);
|
100
|
-
}
|
101
|
-
|
102
|
-
ruby_xfree(rxp);
|
41
|
+
return htmlCreateFileParserCtxt(StringValuePtr(file), StringValuePtr(encodingStr));
|
103
42
|
}
|
104
43
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
*
|
110
|
-
* Obtain the IO instance this parser works with.
|
111
|
-
*/
|
112
|
-
/*
|
113
|
-
VALUE
|
114
|
-
ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
|
115
|
-
ruby_xml_html_parser *rxp;
|
116
|
-
rx_io_data *data;
|
117
|
-
|
118
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
119
|
-
|
120
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
|
121
|
-
rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
|
122
|
-
rxp->data == NULL)
|
123
|
-
return(Qnil);
|
124
|
-
|
125
|
-
data = (rx_io_data *)rxp->data;
|
126
|
-
|
127
|
-
return(data->io);
|
44
|
+
htmlParserCtxtPtr
|
45
|
+
ruby_xml_html_parser_str_ctxt(VALUE input) {
|
46
|
+
VALUE data = rb_ivar_get(input, STRING_ATTR);
|
47
|
+
return htmlCreateMemoryParserCtxt(StringValuePtr(data), RSTRING_LEN(data));
|
128
48
|
}
|
129
|
-
*/
|
130
49
|
|
131
50
|
/*
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
VALUE
|
139
|
-
ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
|
140
|
-
ruby_xml_html_parser *rxp;
|
141
|
-
ruby_xml_parser_context *rxpc;
|
142
|
-
rx_io_data *data;
|
51
|
+
htmlParserCtxtPtr
|
52
|
+
ruby_xml_html_parser_io_ctxt(VALUE input) {
|
53
|
+
VALUE io = rb_ivar_get(input, IO_ATTR);
|
54
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
55
|
+
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
56
|
+
|
143
57
|
OpenFile *fptr;
|
144
58
|
FILE *f;
|
145
59
|
|
146
|
-
if (!rb_obj_is_kind_of(io, rb_cIO))
|
147
|
-
rb_raise(rb_eTypeError, "need an IO object");
|
148
|
-
|
149
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
150
|
-
|
151
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
152
|
-
if (rxp->data != NULL)
|
153
|
-
rb_fatal("crap, this should be null");
|
154
|
-
|
155
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
156
|
-
data = ALLOC(rx_io_data);
|
157
|
-
rxp->data = data;
|
158
|
-
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
|
159
|
-
return(Qnil);
|
160
|
-
}
|
161
|
-
|
162
|
-
rxp->ctxt = ruby_xml_parser_context_new();
|
163
|
-
data = (rx_io_data *)rxp->data;
|
164
|
-
data->io = io;
|
165
|
-
|
166
60
|
GetOpenFile(io, fptr);
|
167
61
|
rb_io_check_readable(fptr);
|
168
62
|
f = GetWriteFile(fptr);
|
169
63
|
|
170
|
-
|
171
|
-
rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
|
64
|
+
return htmlCreateIOParserCtxt(NULL, NULL,
|
172
65
|
(xmlInputReadCallback) ctxtRead,
|
173
|
-
NULL, f,
|
174
|
-
if (NIL_P(rxpc->ctxt))
|
175
|
-
rb_sys_fail(0);
|
176
|
-
|
177
|
-
return(data->io);
|
66
|
+
NULL, f, xmlEncoding);
|
178
67
|
}
|
179
68
|
*/
|
180
69
|
|
181
|
-
void
|
182
|
-
ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
|
183
|
-
if (rxp == NULL) return;
|
184
|
-
if (!NIL_P(rxp->ctxt))
|
185
|
-
rb_gc_mark(rxp->ctxt);
|
186
|
-
|
187
|
-
rb_gc_mark(LIBXML_STATE);
|
188
|
-
|
189
|
-
switch(rxp->data_type) {
|
190
|
-
case RUBY_LIBXML_SRC_TYPE_NULL:
|
191
|
-
break;
|
192
|
-
case RUBY_LIBXML_SRC_TYPE_FILE:
|
193
|
-
if (!NIL_P(((rx_file_data *)rxp->data)->filename))
|
194
|
-
rb_gc_mark(((rx_file_data *)rxp->data)->filename);
|
195
|
-
break;
|
196
|
-
case RUBY_LIBXML_SRC_TYPE_STRING:
|
197
|
-
if (!NIL_P(((rx_string_data *)rxp->data)->str))
|
198
|
-
rb_gc_mark(((rx_string_data *)rxp->data)->str);
|
199
|
-
break;
|
200
|
-
case RUBY_LIBXML_SRC_TYPE_IO:
|
201
|
-
if (!NIL_P(((rx_io_data *)rxp->data)->io))
|
202
|
-
rb_gc_mark(((rx_io_data *)rxp->data)->io);
|
203
|
-
break;
|
204
|
-
default:
|
205
|
-
rb_fatal("unknown datatype: %d", rxp->data_type);
|
206
|
-
}
|
207
|
-
}
|
208
|
-
|
209
|
-
|
210
|
-
/*
|
211
|
-
* call-seq:
|
212
|
-
* XML::HTMLParser.new -> parser
|
213
|
-
*
|
214
|
-
* Create a new parser instance with no pre-determined source.
|
215
|
-
*/
|
216
|
-
VALUE
|
217
|
-
ruby_xml_html_parser_new(VALUE class) {
|
218
|
-
ruby_xml_html_parser *rxp;
|
219
|
-
|
220
|
-
rxp = ALLOC(ruby_xml_html_parser);
|
221
|
-
rxp->ctxt = Qnil;
|
222
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
|
223
|
-
rxp->data = NULL;
|
224
|
-
rxp->parsed = 0;
|
225
|
-
|
226
|
-
return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
|
227
|
-
ruby_xml_html_parser_free, rxp));
|
228
|
-
}
|
229
|
-
|
230
|
-
|
231
|
-
/*
|
232
|
-
* call-seq:
|
233
|
-
* XML::HTMLParser.file -> parser
|
234
|
-
*
|
235
|
-
* Create a new parser instance that will read the specified file.
|
236
|
-
*/
|
237
|
-
/*
|
238
|
-
VALUE
|
239
|
-
ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
|
240
|
-
VALUE obj;
|
241
|
-
ruby_xml_html_parser *rxp;
|
242
|
-
rx_file_data *data;
|
243
|
-
|
244
|
-
obj = ruby_xml_html_parser_new(class);
|
245
|
-
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
246
|
-
|
247
|
-
data = ALLOC(rx_file_data);
|
248
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
249
|
-
rxp->data = data;
|
250
|
-
|
251
|
-
ruby_xml_html_parser_filename_set(obj, filename);
|
252
|
-
|
253
|
-
return(obj);
|
254
|
-
}
|
255
|
-
*/
|
256
|
-
|
257
|
-
/*
|
258
|
-
* call-seq:
|
259
|
-
* XML::HTMLParser.io -> parser
|
260
|
-
*
|
261
|
-
* Create a new parser instance that will read from the
|
262
|
-
* specified IO object.
|
263
|
-
*/
|
264
|
-
/*
|
265
|
-
VALUE
|
266
|
-
ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
|
267
|
-
VALUE obj;
|
268
|
-
ruby_xml_html_parser *rxp;
|
269
|
-
rx_io_data *data;
|
270
|
-
|
271
|
-
obj = ruby_xml_html_parser_new(class);
|
272
|
-
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
273
|
-
|
274
|
-
data = ALLOC(rx_io_data);
|
275
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
276
|
-
rxp->data = data;
|
277
|
-
|
278
|
-
ruby_xml_html_parser_io_set(obj, io);
|
279
|
-
|
280
|
-
return(obj);
|
281
|
-
}
|
282
|
-
*/
|
283
|
-
|
284
|
-
/*
|
285
|
-
* call-seq:
|
286
|
-
* XML::HTMLParser.string -> parser
|
287
|
-
*
|
288
|
-
* Create a new parser instance that will parse the given
|
289
|
-
* string.
|
290
|
-
*/
|
291
|
-
VALUE
|
292
|
-
ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
|
293
|
-
VALUE obj;
|
294
|
-
ruby_xml_html_parser *rxp;
|
295
|
-
rx_string_data *data;
|
296
|
-
|
297
|
-
obj = ruby_xml_html_parser_new(class);
|
298
|
-
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
299
|
-
|
300
|
-
data = ALLOC(rx_string_data);
|
301
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
302
|
-
rxp->data = data;
|
303
|
-
|
304
|
-
ruby_xml_html_parser_str_set(obj, str);
|
305
|
-
|
306
|
-
return(obj);
|
307
|
-
}
|
308
|
-
|
309
|
-
|
310
70
|
/*
|
311
71
|
* call-seq:
|
312
72
|
* parser.parse -> document
|
@@ -317,111 +77,39 @@ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
|
|
317
77
|
*/
|
318
78
|
VALUE
|
319
79
|
ruby_xml_html_parser_parse(VALUE self) {
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
* but still allows the resulting tree to be used.
|
337
|
-
*/
|
338
|
-
htmlParseDocument(rxpc->ctxt);
|
339
|
-
xdp = rxpc->ctxt->myDoc;
|
340
|
-
rxp->parsed = 1;
|
341
|
-
|
342
|
-
doc = ruby_xml_document_wrap(xdp);
|
343
|
-
break;
|
344
|
-
default:
|
345
|
-
rb_fatal("Unknown data type, %d", rxp->data_type);
|
346
|
-
}
|
347
|
-
|
348
|
-
return(doc);
|
349
|
-
}
|
350
|
-
|
351
|
-
|
352
|
-
/*
|
353
|
-
* call-seq:
|
354
|
-
* parser.context -> context
|
355
|
-
*
|
356
|
-
* Obtain the XML::Parser::Context associated with this
|
357
|
-
* parser.
|
358
|
-
*/
|
359
|
-
VALUE
|
360
|
-
ruby_xml_html_parser_context_get(VALUE self) {
|
361
|
-
ruby_xml_html_parser *rxp;
|
362
|
-
|
363
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
364
|
-
if (rxp->ctxt == Qnil)
|
365
|
-
return(Qnil);
|
80
|
+
xmlParserCtxtPtr ctxt;
|
81
|
+
VALUE context;
|
82
|
+
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
83
|
+
|
84
|
+
context = rb_ivar_get(self, CONTEXT_ATTR);
|
85
|
+
if (context != Qnil)
|
86
|
+
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
87
|
+
|
88
|
+
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
89
|
+
ctxt = ruby_xml_html_parser_file_ctxt(input);
|
90
|
+
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
91
|
+
ctxt = ruby_xml_html_parser_str_ctxt(input);
|
92
|
+
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
93
|
+
ctxt = ruby_xml_html_parser_parse_document(input);
|
94
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
95
|
+
ctxt = ruby_xml_html_parser_io_ctxt(input);*/
|
366
96
|
else
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
ruby_xml_html_parser_str_get(VALUE self) {
|
379
|
-
ruby_xml_html_parser *rxp;
|
380
|
-
rx_string_data *data;
|
381
|
-
|
382
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
383
|
-
if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
|
384
|
-
return(Qnil);
|
385
|
-
|
386
|
-
data = (rx_string_data *)rxp->data;
|
387
|
-
return(data->str);
|
388
|
-
}
|
389
|
-
|
390
|
-
|
391
|
-
/*
|
392
|
-
* call-seq:
|
393
|
-
* parser.string = "string"
|
394
|
-
*
|
395
|
-
* Set the string this parser works with.
|
396
|
-
*/
|
397
|
-
VALUE
|
398
|
-
ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
|
399
|
-
ruby_xml_html_parser *rxp;
|
400
|
-
ruby_xml_parser_context *rxpc;
|
401
|
-
rx_string_data *data;
|
402
|
-
|
403
|
-
Check_Type(str, T_STRING);
|
404
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
405
|
-
|
406
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
407
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
408
|
-
data = ALLOC(rx_string_data);
|
409
|
-
rxp->data = data;
|
410
|
-
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
|
411
|
-
return(Qnil);
|
97
|
+
rb_raise(rb_eArgError, "You must specify a parser data source");
|
98
|
+
|
99
|
+
if (!ctxt)
|
100
|
+
ruby_xml_raise(&xmlLastError);
|
101
|
+
|
102
|
+
context = ruby_xml_parser_context_wrap(ctxt);
|
103
|
+
rb_ivar_set(self, CONTEXT_ATTR, context);
|
104
|
+
|
105
|
+
if (htmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
106
|
+
xmlFreeDoc(ctxt->myDoc);
|
107
|
+
ruby_xml_raise(&ctxt->lastError);
|
412
108
|
}
|
413
109
|
|
414
|
-
|
415
|
-
data = (rx_string_data *)rxp->data;
|
416
|
-
data->str = str;
|
417
|
-
|
418
|
-
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
419
|
-
rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
|
420
|
-
|
421
|
-
return(data->str);
|
110
|
+
return ruby_xml_document_wrap(ctxt->myDoc);
|
422
111
|
}
|
423
112
|
|
424
|
-
|
425
113
|
// Rdoc needs to know
|
426
114
|
#ifdef RDOC_NEVER_DEFINED
|
427
115
|
mLibXML = rb_define_module("LibXML");
|
@@ -430,20 +118,16 @@ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
|
|
430
118
|
|
431
119
|
void
|
432
120
|
ruby_init_html_parser(void) {
|
121
|
+
INPUT_ATTR = rb_intern("@input");
|
122
|
+
CONTEXT_ATTR = rb_intern("@context");
|
123
|
+
|
433
124
|
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
125
|
+
|
126
|
+
/* Atributes */
|
127
|
+
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
128
|
+
rb_define_attr(cXMLHTMLParser, "context", 1, 0);
|
434
129
|
|
435
|
-
|
436
|
-
|
437
|
-
/*
|
438
|
-
rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
|
439
|
-
rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
|
440
|
-
rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
|
441
|
-
rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
|
442
|
-
rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
|
443
|
-
rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
|
444
|
-
*/
|
130
|
+
/* Instance methods */
|
131
|
+
rb_define_method(cXMLHTMLParser, "initialize", ruby_xml_html_parser_initialize, 0);
|
445
132
|
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
446
|
-
rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
|
447
|
-
rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
|
448
|
-
rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
|
449
133
|
}
|