libxml-ruby 0.8.3-x86-mswin32-60 → 0.9.0-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +18 -0
- data/RAKEFILE +15 -39
- data/README +48 -47
- data/ext/libxml/libxml.c +847 -22
- data/ext/libxml/ruby_libxml.h +71 -95
- data/ext/libxml/ruby_xml_attr.c +500 -500
- data/ext/libxml/ruby_xml_attributes.c +1 -1
- data/ext/libxml/ruby_xml_document.c +1144 -1135
- data/ext/libxml/ruby_xml_document.h +4 -11
- data/ext/libxml/ruby_xml_dtd.c +27 -0
- data/ext/libxml/ruby_xml_encoding.c +164 -0
- data/ext/libxml/ruby_xml_encoding.h +13 -0
- data/ext/libxml/ruby_xml_error.c +941 -0
- data/ext/libxml/ruby_xml_error.h +13 -0
- data/ext/libxml/ruby_xml_html_parser.c +71 -387
- data/ext/libxml/ruby_xml_html_parser.h +1 -17
- data/ext/libxml/ruby_xml_input.c +179 -0
- data/ext/libxml/ruby_xml_input.h +18 -0
- data/ext/libxml/ruby_xml_input_cbg.c +17 -3
- data/ext/libxml/ruby_xml_node.c +1566 -1582
- data/ext/libxml/ruby_xml_node.h +1 -4
- data/ext/libxml/ruby_xml_ns.c +14 -3
- data/ext/libxml/ruby_xml_parser.c +164 -1398
- data/ext/libxml/ruby_xml_parser.h +5 -17
- data/ext/libxml/ruby_xml_parser_context.c +131 -169
- data/ext/libxml/ruby_xml_parser_context.h +2 -9
- data/ext/libxml/ruby_xml_reader.c +910 -945
- data/ext/libxml/ruby_xml_relaxng.c +32 -3
- data/ext/libxml/ruby_xml_sax_parser.c +106 -364
- data/ext/libxml/ruby_xml_sax_parser.h +1 -37
- data/ext/libxml/ruby_xml_schema.c +174 -145
- data/ext/libxml/ruby_xml_xinclude.c +9 -5
- data/ext/libxml/ruby_xml_xpath.c +25 -6
- data/ext/libxml/ruby_xml_xpath.h +1 -2
- data/ext/libxml/ruby_xml_xpath_context.c +17 -19
- data/ext/libxml/ruby_xml_xpath_object.c +60 -56
- data/ext/libxml/ruby_xml_xpointer.c +11 -5
- data/ext/libxml/sax_parser_callbacks.inc +42 -37
- data/ext/libxml/version.h +3 -3
- data/ext/mingw/Rakefile +20 -27
- data/ext/mingw/build.rake +41 -0
- data/{lib → ext/mingw}/libiconv-2.dll +0 -0
- data/ext/mingw/libxml2-2.dll +0 -0
- data/ext/mingw/libxml_ruby.dll.a +0 -0
- data/ext/mingw/libxml_ruby.so +0 -0
- data/ext/vc/libxml_ruby.vcproj +23 -15
- data/lib/libxml.rb +8 -2
- data/lib/libxml/document.rb +16 -4
- data/lib/libxml/error.rb +84 -0
- data/lib/libxml/hpricot.rb +76 -0
- data/lib/libxml/html_parser.rb +61 -0
- data/lib/libxml/node.rb +36 -25
- data/lib/libxml/parser.rb +312 -33
- data/lib/libxml/parser_context.rb +17 -0
- data/lib/libxml/properties.rb +15 -2
- data/lib/libxml/reader.rb +15 -0
- data/lib/libxml/sax_callbacks.rb +179 -0
- data/lib/libxml/sax_parser.rb +42 -0
- data/lib/libxml/tree.rb +1 -2
- data/lib/libxml/xpath_object.rb +12 -0
- data/test/model/atom.xml +4 -0
- data/test/tc_attributes.rb +43 -19
- data/test/tc_document.rb +1 -1
- data/test/tc_document_write.rb +15 -8
- data/test/tc_dtd.rb +36 -20
- data/test/tc_encoding.rb +13 -0
- data/test/tc_error.rb +136 -0
- data/test/tc_node.rb +2 -3
- data/test/tc_node_copy.rb +1 -1
- data/test/tc_node_edit.rb +6 -0
- data/test/tc_ns.rb +18 -0
- data/test/tc_parser.rb +113 -228
- data/test/tc_parser_context.rb +1 -2
- data/test/tc_reader.rb +24 -14
- data/test/tc_relaxng.rb +18 -6
- data/test/tc_sax_parser.rb +48 -13
- data/test/tc_schema.rb +20 -8
- data/test/tc_well_formed.rb +2 -1
- data/test/tc_xml.rb +212 -0
- data/test/tc_xpath.rb +60 -46
- data/test/tc_xpointer.rb +7 -11
- data/test/test_suite.rb +4 -3
- metadata +32 -115
- data/doc/rdoc/classes/LibXML.html +0 -241
- data/doc/rdoc/classes/LibXML/XML.html +0 -185
- data/doc/rdoc/classes/LibXML/XML/Attr.html +0 -1010
- data/doc/rdoc/classes/LibXML/XML/Attributes.html +0 -526
- data/doc/rdoc/classes/LibXML/XML/Document.html +0 -1489
- data/doc/rdoc/classes/LibXML/XML/Dtd.html +0 -213
- data/doc/rdoc/classes/LibXML/XML/Error.html +0 -117
- data/doc/rdoc/classes/LibXML/XML/HTMLParser.html +0 -348
- data/doc/rdoc/classes/LibXML/XML/InputCallbacks.html +0 -160
- data/doc/rdoc/classes/LibXML/XML/NS.html +0 -381
- data/doc/rdoc/classes/LibXML/XML/Node.html +0 -3396
- data/doc/rdoc/classes/LibXML/XML/Node/FailedModify.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/Node/Set.html +0 -440
- data/doc/rdoc/classes/LibXML/XML/Node/SetNamespace.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/Node/UnknownType.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/Parser.html +0 -2239
- data/doc/rdoc/classes/LibXML/XML/Parser/Context.html +0 -1255
- data/doc/rdoc/classes/LibXML/XML/Parser/ParseError.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/Reader.html +0 -2264
- data/doc/rdoc/classes/LibXML/XML/RelaxNG.html +0 -237
- data/doc/rdoc/classes/LibXML/XML/SaxParser.html +0 -415
- data/doc/rdoc/classes/LibXML/XML/Schema.html +0 -308
- data/doc/rdoc/classes/LibXML/XML/State.html +0 -124
- data/doc/rdoc/classes/LibXML/XML/Tree.html +0 -111
- data/doc/rdoc/classes/LibXML/XML/XInclude.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/XInclude/Error.html +0 -117
- data/doc/rdoc/classes/LibXML/XML/XMLParserOptions.html +0 -198
- data/doc/rdoc/classes/LibXML/XML/XPath.html +0 -184
- data/doc/rdoc/classes/LibXML/XML/XPath/Context.html +0 -404
- data/doc/rdoc/classes/LibXML/XML/XPath/InvalidPath.html +0 -172
- data/doc/rdoc/classes/LibXML/XML/XPath/Object.html +0 -627
- data/doc/rdoc/classes/LibXML/XML/XPointer.html +0 -170
- data/doc/rdoc/classes/LibXML/XML/XPointer/Context.html +0 -123
- data/doc/rdoc/classes/LibXML/XML/XPointer/Context/InvalidPath.html +0 -117
- data/doc/rdoc/classes/LibXML/XML/XPointer/InvalidExpression.html +0 -124
- data/doc/rdoc/classes/singleton.html +0 -114
- data/doc/rdoc/created.rid +0 -1
- data/doc/rdoc/files/CHANGES.html +0 -442
- data/doc/rdoc/files/LICENSE.html +0 -133
- data/doc/rdoc/files/README.html +0 -388
- data/doc/rdoc/files/VERSION.html +0 -107
- data/doc/rdoc/files/ext/libxml/cbg_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/libxml_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_attr_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_attributes_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_document_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_dtd_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_html_parser_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_input_cbg_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_node_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_node_set_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_ns_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_parser_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_parser_context_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_reader_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_relaxng_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_sax_parser_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_schema_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_state_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xinclude_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_context_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpath_object_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_c.html +0 -101
- data/doc/rdoc/files/ext/libxml/ruby_xml_xpointer_context_c.html +0 -101
- data/doc/rdoc/files/lib/libxml/attr_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/attributes_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/document_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/node_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/node_set_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/parser_options_rb.html +0 -107
- data/doc/rdoc/files/lib/libxml/parser_rb.html +0 -101
- data/doc/rdoc/files/lib/libxml/properties_rb.html +0 -108
- data/doc/rdoc/files/lib/libxml/tree_rb.html +0 -107
- data/doc/rdoc/files/lib/libxml_rb.html +0 -124
- data/doc/rdoc/files/lib/xml/libxml_rb.html +0 -124
- data/doc/rdoc/files/lib/xml_rb.html +0 -134
- data/doc/rdoc/fr_class_index.html +0 -62
- data/doc/rdoc/fr_file_index.html +0 -66
- data/doc/rdoc/fr_method_index.html +0 -392
- data/doc/rdoc/index.html +0 -24
- data/doc/rdoc/rdoc-style.css +0 -208
- data/ext/libxml/ruby_xml_node_set.c +0 -172
- data/ext/libxml/ruby_xml_node_set.h +0 -20
- data/ext/libxml/ruby_xml_xpointer_context.c +0 -22
- data/ext/libxml/ruby_xml_xpointer_context.h +0 -18
- data/lib/libxml/node_set.rb +0 -27
- data/lib/libxml2-2.dll +0 -0
- data/lib/libxml_ruby.dll.a +0 -0
- data/lib/libxml_ruby.so +0 -0
- data/test/tc_node_set.rb +0 -24
- data/test/tc_node_set2.rb +0 -37
@@ -0,0 +1,13 @@
|
|
1
|
+
/* $Id: ruby_xml_ns.h 324 2008-07-08 23:00:02Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#ifndef __RUBY_XML_ERROR__
|
6
|
+
#define __RUBY_XML_ERROR__
|
7
|
+
|
8
|
+
extern VALUE eXMLError;
|
9
|
+
|
10
|
+
void ruby_init_xml_error();
|
11
|
+
void ruby_xml_raise(xmlErrorPtr xerror);
|
12
|
+
|
13
|
+
#endif
|
@@ -1,312 +1,72 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c
|
1
|
+
/* $Id: ruby_xml_html_parser.c 580 2008-11-18 09:24:43Z cfis $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
5
5
|
#include "ruby_libxml.h"
|
6
6
|
|
7
7
|
VALUE cXMLHTMLParser;
|
8
|
-
|
9
|
-
|
10
|
-
//ctxtRead(FILE *f, char * buf, int len) {
|
11
|
-
// return(fread(buf, 1, len, f));
|
12
|
-
//}
|
8
|
+
ID INPUT_ATTR;
|
9
|
+
ID CONTEXT_ATTR;
|
13
10
|
|
14
11
|
|
15
12
|
/*
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
VALUE
|
23
|
-
ruby_xml_html_parser_filename_get(VALUE self) {
|
24
|
-
ruby_xml_html_parser *rxp;
|
25
|
-
rx_file_data *data;
|
13
|
+
* Document-class: LibXML::XML::HTMLParser
|
14
|
+
*
|
15
|
+
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
16
|
+
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
17
|
+
* it can parse "real world" HTML, even if it severely broken from a
|
18
|
+
* specification point of view. */
|
26
19
|
|
27
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
28
|
-
if (rxp->data == NULL)
|
29
|
-
return(Qnil);
|
30
|
-
|
31
|
-
if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE)
|
32
|
-
return(Qnil);
|
33
|
-
|
34
|
-
data = (rx_file_data *)rxp->data;
|
35
|
-
return(data->filename);
|
36
|
-
}
|
37
|
-
*/
|
38
20
|
|
39
21
|
/*
|
40
22
|
* call-seq:
|
41
|
-
*
|
23
|
+
* XML::HTMLParser.initialize -> parser
|
42
24
|
*
|
43
|
-
*
|
25
|
+
* Initializes a new parser instance with no pre-determined source.
|
44
26
|
*/
|
45
|
-
/*
|
46
27
|
VALUE
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
Check_Type(filename, T_STRING);
|
53
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
54
|
-
|
55
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
56
|
-
if (rxp->data != NULL)
|
57
|
-
rb_fatal("crap, this should be null");
|
58
|
-
|
59
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
60
|
-
data = ALLOC(rx_file_data);
|
61
|
-
rxp->data = data;
|
62
|
-
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_FILE) {
|
63
|
-
return(Qnil);
|
64
|
-
}
|
65
|
-
|
66
|
-
rxp->ctxt = ruby_xml_parser_context_new();
|
67
|
-
data = (rx_file_data *)rxp->data;
|
68
|
-
data->filename = filename;
|
69
|
-
|
70
|
-
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
71
|
-
rxpc->ctxt = htmlCreateFileParserCtxt(StringValuePtr(filename));
|
72
|
-
if (rxpc->ctxt == NULL)
|
73
|
-
rb_sys_fail(StringValuePtr(filename));
|
74
|
-
|
75
|
-
return(data->filename);
|
28
|
+
ruby_xml_html_parser_initialize(VALUE self) {
|
29
|
+
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
30
|
+
rb_iv_set(self, "@input", input);
|
31
|
+
rb_iv_set(self, "@context", Qnil);
|
32
|
+
return self;
|
76
33
|
}
|
77
|
-
*/
|
78
34
|
|
79
|
-
|
80
|
-
|
81
|
-
|
35
|
+
htmlParserCtxtPtr
|
36
|
+
ruby_xml_html_parser_file_ctxt(VALUE input) {
|
37
|
+
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
38
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
39
|
+
VALUE encodingStr = ruby_xml_encoding_to_s(Qnil, encoding);
|
82
40
|
|
83
|
-
|
84
|
-
case RUBY_LIBXML_SRC_TYPE_NULL:
|
85
|
-
break;
|
86
|
-
case RUBY_LIBXML_SRC_TYPE_FILE:
|
87
|
-
data = (void *)(rx_file_data *)rxp->data;
|
88
|
-
ruby_xfree((rx_file_data *)data);
|
89
|
-
break;
|
90
|
-
case RUBY_LIBXML_SRC_TYPE_STRING:
|
91
|
-
data = (void *)(rx_string_data *)rxp->data;
|
92
|
-
ruby_xfree((rx_string_data *)data);
|
93
|
-
break;
|
94
|
-
case RUBY_LIBXML_SRC_TYPE_IO:
|
95
|
-
data = (void *)(rx_io_data *)rxp->data;
|
96
|
-
ruby_xfree((rx_io_data *)data);
|
97
|
-
break;
|
98
|
-
default:
|
99
|
-
rb_fatal("Unknown data type, %d", rxp->data_type);
|
100
|
-
}
|
101
|
-
|
102
|
-
ruby_xfree(rxp);
|
41
|
+
return htmlCreateFileParserCtxt(StringValuePtr(file), StringValuePtr(encodingStr));
|
103
42
|
}
|
104
43
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
*
|
110
|
-
* Obtain the IO instance this parser works with.
|
111
|
-
*/
|
112
|
-
/*
|
113
|
-
VALUE
|
114
|
-
ruby_xml_html_parser_io_get(VALUE self, VALUE io) {
|
115
|
-
ruby_xml_html_parser *rxp;
|
116
|
-
rx_io_data *data;
|
117
|
-
|
118
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
119
|
-
|
120
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL ||
|
121
|
-
rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO ||
|
122
|
-
rxp->data == NULL)
|
123
|
-
return(Qnil);
|
124
|
-
|
125
|
-
data = (rx_io_data *)rxp->data;
|
126
|
-
|
127
|
-
return(data->io);
|
44
|
+
htmlParserCtxtPtr
|
45
|
+
ruby_xml_html_parser_str_ctxt(VALUE input) {
|
46
|
+
VALUE data = rb_ivar_get(input, STRING_ATTR);
|
47
|
+
return htmlCreateMemoryParserCtxt(StringValuePtr(data), RSTRING_LEN(data));
|
128
48
|
}
|
129
|
-
*/
|
130
49
|
|
131
50
|
/*
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
VALUE
|
139
|
-
ruby_xml_html_parser_io_set(VALUE self, VALUE io) {
|
140
|
-
ruby_xml_html_parser *rxp;
|
141
|
-
ruby_xml_parser_context *rxpc;
|
142
|
-
rx_io_data *data;
|
51
|
+
htmlParserCtxtPtr
|
52
|
+
ruby_xml_html_parser_io_ctxt(VALUE input) {
|
53
|
+
VALUE io = rb_ivar_get(input, IO_ATTR);
|
54
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
55
|
+
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
56
|
+
|
143
57
|
OpenFile *fptr;
|
144
58
|
FILE *f;
|
145
59
|
|
146
|
-
if (!rb_obj_is_kind_of(io, rb_cIO))
|
147
|
-
rb_raise(rb_eTypeError, "need an IO object");
|
148
|
-
|
149
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
150
|
-
|
151
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
152
|
-
if (rxp->data != NULL)
|
153
|
-
rb_fatal("crap, this should be null");
|
154
|
-
|
155
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
156
|
-
data = ALLOC(rx_io_data);
|
157
|
-
rxp->data = data;
|
158
|
-
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_IO) {
|
159
|
-
return(Qnil);
|
160
|
-
}
|
161
|
-
|
162
|
-
rxp->ctxt = ruby_xml_parser_context_new();
|
163
|
-
data = (rx_io_data *)rxp->data;
|
164
|
-
data->io = io;
|
165
|
-
|
166
60
|
GetOpenFile(io, fptr);
|
167
61
|
rb_io_check_readable(fptr);
|
168
62
|
f = GetWriteFile(fptr);
|
169
63
|
|
170
|
-
|
171
|
-
rxpc->ctxt = htmlCreateIOParserCtxt(NULL, NULL,
|
64
|
+
return htmlCreateIOParserCtxt(NULL, NULL,
|
172
65
|
(xmlInputReadCallback) ctxtRead,
|
173
|
-
NULL, f,
|
174
|
-
if (NIL_P(rxpc->ctxt))
|
175
|
-
rb_sys_fail(0);
|
176
|
-
|
177
|
-
return(data->io);
|
66
|
+
NULL, f, xmlEncoding);
|
178
67
|
}
|
179
68
|
*/
|
180
69
|
|
181
|
-
void
|
182
|
-
ruby_xml_html_parser_mark(ruby_xml_html_parser *rxp) {
|
183
|
-
if (rxp == NULL) return;
|
184
|
-
if (!NIL_P(rxp->ctxt))
|
185
|
-
rb_gc_mark(rxp->ctxt);
|
186
|
-
|
187
|
-
rb_gc_mark(LIBXML_STATE);
|
188
|
-
|
189
|
-
switch(rxp->data_type) {
|
190
|
-
case RUBY_LIBXML_SRC_TYPE_NULL:
|
191
|
-
break;
|
192
|
-
case RUBY_LIBXML_SRC_TYPE_FILE:
|
193
|
-
if (!NIL_P(((rx_file_data *)rxp->data)->filename))
|
194
|
-
rb_gc_mark(((rx_file_data *)rxp->data)->filename);
|
195
|
-
break;
|
196
|
-
case RUBY_LIBXML_SRC_TYPE_STRING:
|
197
|
-
if (!NIL_P(((rx_string_data *)rxp->data)->str))
|
198
|
-
rb_gc_mark(((rx_string_data *)rxp->data)->str);
|
199
|
-
break;
|
200
|
-
case RUBY_LIBXML_SRC_TYPE_IO:
|
201
|
-
if (!NIL_P(((rx_io_data *)rxp->data)->io))
|
202
|
-
rb_gc_mark(((rx_io_data *)rxp->data)->io);
|
203
|
-
break;
|
204
|
-
default:
|
205
|
-
rb_fatal("unknown datatype: %d", rxp->data_type);
|
206
|
-
}
|
207
|
-
}
|
208
|
-
|
209
|
-
|
210
|
-
/*
|
211
|
-
* call-seq:
|
212
|
-
* XML::HTMLParser.new -> parser
|
213
|
-
*
|
214
|
-
* Create a new parser instance with no pre-determined source.
|
215
|
-
*/
|
216
|
-
VALUE
|
217
|
-
ruby_xml_html_parser_new(VALUE class) {
|
218
|
-
ruby_xml_html_parser *rxp;
|
219
|
-
|
220
|
-
rxp = ALLOC(ruby_xml_html_parser);
|
221
|
-
rxp->ctxt = Qnil;
|
222
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
|
223
|
-
rxp->data = NULL;
|
224
|
-
rxp->parsed = 0;
|
225
|
-
|
226
|
-
return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
|
227
|
-
ruby_xml_html_parser_free, rxp));
|
228
|
-
}
|
229
|
-
|
230
|
-
|
231
|
-
/*
|
232
|
-
* call-seq:
|
233
|
-
* XML::HTMLParser.file -> parser
|
234
|
-
*
|
235
|
-
* Create a new parser instance that will read the specified file.
|
236
|
-
*/
|
237
|
-
/*
|
238
|
-
VALUE
|
239
|
-
ruby_xml_html_parser_new_file(VALUE class, VALUE filename) {
|
240
|
-
VALUE obj;
|
241
|
-
ruby_xml_html_parser *rxp;
|
242
|
-
rx_file_data *data;
|
243
|
-
|
244
|
-
obj = ruby_xml_html_parser_new(class);
|
245
|
-
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
246
|
-
|
247
|
-
data = ALLOC(rx_file_data);
|
248
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_FILE;
|
249
|
-
rxp->data = data;
|
250
|
-
|
251
|
-
ruby_xml_html_parser_filename_set(obj, filename);
|
252
|
-
|
253
|
-
return(obj);
|
254
|
-
}
|
255
|
-
*/
|
256
|
-
|
257
|
-
/*
|
258
|
-
* call-seq:
|
259
|
-
* XML::HTMLParser.io -> parser
|
260
|
-
*
|
261
|
-
* Create a new parser instance that will read from the
|
262
|
-
* specified IO object.
|
263
|
-
*/
|
264
|
-
/*
|
265
|
-
VALUE
|
266
|
-
ruby_xml_html_parser_new_io(VALUE class, VALUE io) {
|
267
|
-
VALUE obj;
|
268
|
-
ruby_xml_html_parser *rxp;
|
269
|
-
rx_io_data *data;
|
270
|
-
|
271
|
-
obj = ruby_xml_html_parser_new(class);
|
272
|
-
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
273
|
-
|
274
|
-
data = ALLOC(rx_io_data);
|
275
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_IO;
|
276
|
-
rxp->data = data;
|
277
|
-
|
278
|
-
ruby_xml_html_parser_io_set(obj, io);
|
279
|
-
|
280
|
-
return(obj);
|
281
|
-
}
|
282
|
-
*/
|
283
|
-
|
284
|
-
/*
|
285
|
-
* call-seq:
|
286
|
-
* XML::HTMLParser.string -> parser
|
287
|
-
*
|
288
|
-
* Create a new parser instance that will parse the given
|
289
|
-
* string.
|
290
|
-
*/
|
291
|
-
VALUE
|
292
|
-
ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
|
293
|
-
VALUE obj;
|
294
|
-
ruby_xml_html_parser *rxp;
|
295
|
-
rx_string_data *data;
|
296
|
-
|
297
|
-
obj = ruby_xml_html_parser_new(class);
|
298
|
-
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
|
299
|
-
|
300
|
-
data = ALLOC(rx_string_data);
|
301
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
302
|
-
rxp->data = data;
|
303
|
-
|
304
|
-
ruby_xml_html_parser_str_set(obj, str);
|
305
|
-
|
306
|
-
return(obj);
|
307
|
-
}
|
308
|
-
|
309
|
-
|
310
70
|
/*
|
311
71
|
* call-seq:
|
312
72
|
* parser.parse -> document
|
@@ -317,111 +77,39 @@ ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
|
|
317
77
|
*/
|
318
78
|
VALUE
|
319
79
|
ruby_xml_html_parser_parse(VALUE self) {
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
* but still allows the resulting tree to be used.
|
337
|
-
*/
|
338
|
-
htmlParseDocument(rxpc->ctxt);
|
339
|
-
xdp = rxpc->ctxt->myDoc;
|
340
|
-
rxp->parsed = 1;
|
341
|
-
|
342
|
-
doc = ruby_xml_document_wrap(xdp);
|
343
|
-
break;
|
344
|
-
default:
|
345
|
-
rb_fatal("Unknown data type, %d", rxp->data_type);
|
346
|
-
}
|
347
|
-
|
348
|
-
return(doc);
|
349
|
-
}
|
350
|
-
|
351
|
-
|
352
|
-
/*
|
353
|
-
* call-seq:
|
354
|
-
* parser.context -> context
|
355
|
-
*
|
356
|
-
* Obtain the XML::Parser::Context associated with this
|
357
|
-
* parser.
|
358
|
-
*/
|
359
|
-
VALUE
|
360
|
-
ruby_xml_html_parser_context_get(VALUE self) {
|
361
|
-
ruby_xml_html_parser *rxp;
|
362
|
-
|
363
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
364
|
-
if (rxp->ctxt == Qnil)
|
365
|
-
return(Qnil);
|
80
|
+
xmlParserCtxtPtr ctxt;
|
81
|
+
VALUE context;
|
82
|
+
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
83
|
+
|
84
|
+
context = rb_ivar_get(self, CONTEXT_ATTR);
|
85
|
+
if (context != Qnil)
|
86
|
+
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
87
|
+
|
88
|
+
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
89
|
+
ctxt = ruby_xml_html_parser_file_ctxt(input);
|
90
|
+
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
91
|
+
ctxt = ruby_xml_html_parser_str_ctxt(input);
|
92
|
+
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
93
|
+
ctxt = ruby_xml_html_parser_parse_document(input);
|
94
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
95
|
+
ctxt = ruby_xml_html_parser_io_ctxt(input);*/
|
366
96
|
else
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
ruby_xml_html_parser_str_get(VALUE self) {
|
379
|
-
ruby_xml_html_parser *rxp;
|
380
|
-
rx_string_data *data;
|
381
|
-
|
382
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
383
|
-
if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
|
384
|
-
return(Qnil);
|
385
|
-
|
386
|
-
data = (rx_string_data *)rxp->data;
|
387
|
-
return(data->str);
|
388
|
-
}
|
389
|
-
|
390
|
-
|
391
|
-
/*
|
392
|
-
* call-seq:
|
393
|
-
* parser.string = "string"
|
394
|
-
*
|
395
|
-
* Set the string this parser works with.
|
396
|
-
*/
|
397
|
-
VALUE
|
398
|
-
ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
|
399
|
-
ruby_xml_html_parser *rxp;
|
400
|
-
ruby_xml_parser_context *rxpc;
|
401
|
-
rx_string_data *data;
|
402
|
-
|
403
|
-
Check_Type(str, T_STRING);
|
404
|
-
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
|
405
|
-
|
406
|
-
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
|
407
|
-
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
|
408
|
-
data = ALLOC(rx_string_data);
|
409
|
-
rxp->data = data;
|
410
|
-
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
|
411
|
-
return(Qnil);
|
97
|
+
rb_raise(rb_eArgError, "You must specify a parser data source");
|
98
|
+
|
99
|
+
if (!ctxt)
|
100
|
+
ruby_xml_raise(&xmlLastError);
|
101
|
+
|
102
|
+
context = ruby_xml_parser_context_wrap(ctxt);
|
103
|
+
rb_ivar_set(self, CONTEXT_ATTR, context);
|
104
|
+
|
105
|
+
if (htmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
106
|
+
xmlFreeDoc(ctxt->myDoc);
|
107
|
+
ruby_xml_raise(&ctxt->lastError);
|
412
108
|
}
|
413
109
|
|
414
|
-
|
415
|
-
data = (rx_string_data *)rxp->data;
|
416
|
-
data->str = str;
|
417
|
-
|
418
|
-
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
|
419
|
-
rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
|
420
|
-
|
421
|
-
return(data->str);
|
110
|
+
return ruby_xml_document_wrap(ctxt->myDoc);
|
422
111
|
}
|
423
112
|
|
424
|
-
|
425
113
|
// Rdoc needs to know
|
426
114
|
#ifdef RDOC_NEVER_DEFINED
|
427
115
|
mLibXML = rb_define_module("LibXML");
|
@@ -430,20 +118,16 @@ ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
|
|
430
118
|
|
431
119
|
void
|
432
120
|
ruby_init_html_parser(void) {
|
121
|
+
INPUT_ATTR = rb_intern("@input");
|
122
|
+
CONTEXT_ATTR = rb_intern("@context");
|
123
|
+
|
433
124
|
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
125
|
+
|
126
|
+
/* Atributes */
|
127
|
+
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
128
|
+
rb_define_attr(cXMLHTMLParser, "context", 1, 0);
|
434
129
|
|
435
|
-
|
436
|
-
|
437
|
-
/*
|
438
|
-
rb_define_singleton_method(cXMLHTMLParser, "file", ruby_xml_html_parser_new_file, 1);
|
439
|
-
rb_define_singleton_method(cXMLHTMLParser, "io", ruby_xml_html_parser_new_io, 1);
|
440
|
-
rb_define_method(cXMLHTMLParser, "filename", ruby_xml_html_parser_filename_get, 0);
|
441
|
-
rb_define_method(cXMLHTMLParser, "filename=", ruby_xml_html_parser_filename_set, 1);
|
442
|
-
rb_define_method(cXMLHTMLParser, "io", ruby_xml_html_parser_io_get, 0);
|
443
|
-
rb_define_method(cXMLHTMLParser, "io=", ruby_xml_html_parser_io_set, 1);
|
444
|
-
*/
|
130
|
+
/* Instance methods */
|
131
|
+
rb_define_method(cXMLHTMLParser, "initialize", ruby_xml_html_parser_initialize, 0);
|
445
132
|
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
446
|
-
rb_define_method(cXMLHTMLParser, "context", ruby_xml_html_parser_context_get, 0);
|
447
|
-
rb_define_method(cXMLHTMLParser, "string", ruby_xml_html_parser_str_get, 0);
|
448
|
-
rb_define_method(cXMLHTMLParser, "string=", ruby_xml_html_parser_str_set, 1);
|
449
133
|
}
|