libxml-ruby 4.1.2 → 5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY +14 -0
- data/ext/libxml/ruby_libxml.h +43 -44
- data/ext/libxml/ruby_xml.c +0 -343
- data/ext/libxml/ruby_xml.h +9 -10
- data/ext/libxml/ruby_xml_attributes.h +2 -0
- data/ext/libxml/ruby_xml_document.c +6 -6
- data/ext/libxml/ruby_xml_document.h +11 -11
- data/ext/libxml/ruby_xml_dtd.c +3 -3
- data/ext/libxml/ruby_xml_encoding.h +20 -18
- data/ext/libxml/ruby_xml_error.c +9 -6
- data/ext/libxml/ruby_xml_error.h +2 -2
- data/ext/libxml/ruby_xml_html_parser_context.c +35 -21
- data/ext/libxml/ruby_xml_namespace.c +0 -3
- data/ext/libxml/ruby_xml_node.c +1394 -1398
- data/ext/libxml/ruby_xml_parser.h +1 -1
- data/ext/libxml/ruby_xml_parser_context.c +47 -39
- data/ext/libxml/ruby_xml_parser_options.c +9 -1
- data/ext/libxml/ruby_xml_parser_options.h +1 -1
- data/ext/libxml/ruby_xml_reader.c +15 -16
- data/ext/libxml/ruby_xml_sax2_handler.c +1 -1
- data/ext/libxml/ruby_xml_sax_parser.c +1 -9
- data/ext/libxml/ruby_xml_schema.c +4 -4
- data/ext/libxml/ruby_xml_version.h +5 -5
- data/ext/libxml/ruby_xml_writer.c +8 -8
- data/ext/libxml/ruby_xml_xpath.c +1 -1
- data/ext/libxml/ruby_xml_xpath_context.c +2 -2
- data/ext/libxml/ruby_xml_xpath_expression.c +1 -1
- data/lib/libxml/document.rb +13 -13
- data/lib/libxml/html_parser.rb +23 -23
- data/lib/libxml/parser.rb +26 -24
- data/test/test.rb +5 -0
- data/test/test_document_write.rb +1 -4
- data/test/test_dtd.rb +1 -4
- data/test/test_encoding.rb +1 -4
- data/test/test_helper.rb +9 -2
- data/test/test_html_parser.rb +162 -162
- data/test/test_namespace.rb +1 -3
- data/test/test_node.rb +1 -3
- data/test/test_node_write.rb +1 -4
- data/test/test_parser.rb +26 -17
- data/test/test_reader.rb +4 -4
- data/test/test_sax_parser.rb +1 -1
- data/test/test_xml.rb +0 -99
- metadata +4 -3
data/ext/libxml/ruby_xml_error.c
CHANGED
@@ -50,7 +50,7 @@ static VALUE rxml_error_get_handler(VALUE self)
|
|
50
50
|
return block;
|
51
51
|
}
|
52
52
|
|
53
|
-
VALUE rxml_error_wrap(
|
53
|
+
VALUE rxml_error_wrap(const xmlError *xerror)
|
54
54
|
{
|
55
55
|
VALUE result = Qnil;
|
56
56
|
|
@@ -97,7 +97,7 @@ VALUE rxml_error_wrap(xmlErrorPtr xerror)
|
|
97
97
|
}
|
98
98
|
|
99
99
|
/* Hook that receives xml error message */
|
100
|
-
static void structuredErrorFunc(void *userData,
|
100
|
+
static void structuredErrorFunc(void *userData, const xmlError *xerror)
|
101
101
|
{
|
102
102
|
VALUE error = rxml_error_wrap(xerror);
|
103
103
|
|
@@ -158,11 +158,14 @@ static VALUE rxml_error_reset_handler(VALUE self)
|
|
158
158
|
return self;
|
159
159
|
}
|
160
160
|
|
161
|
-
|
161
|
+
void rxml_raise(const xmlError *xerror)
|
162
162
|
{
|
163
|
-
|
164
|
-
|
165
|
-
|
163
|
+
if (xerror)
|
164
|
+
{
|
165
|
+
/* Wrap error up as Ruby object and send it off to ruby */
|
166
|
+
VALUE error = rxml_error_wrap(xerror);
|
167
|
+
rb_exc_raise(error);
|
168
|
+
}
|
166
169
|
}
|
167
170
|
|
168
171
|
void rxml_init_error(void)
|
data/ext/libxml/ruby_xml_error.h
CHANGED
@@ -146,19 +146,23 @@ static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt)
|
|
146
146
|
*
|
147
147
|
* Parameters:
|
148
148
|
*
|
149
|
-
* file - A filename or uri
|
149
|
+
* file - A filename or uri
|
150
|
+
* options - A or'ed together list of LibXML::XML::HTMLParser::Options values
|
150
151
|
*/
|
151
|
-
static VALUE rxml_html_parser_context_file(VALUE
|
152
|
+
static VALUE rxml_html_parser_context_file(int argc, VALUE* argv, VALUE klass)
|
152
153
|
{
|
154
|
+
VALUE file, options;
|
155
|
+
rb_scan_args(argc, argv, "11", &file, &options);
|
156
|
+
|
153
157
|
htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
|
154
158
|
if (!ctxt)
|
155
|
-
rxml_raise(
|
159
|
+
rxml_raise(xmlGetLastError());
|
156
160
|
|
157
161
|
/* This is annoying, but xmlInitParserCtxt (called indirectly above) and
|
158
162
|
xmlCtxtUseOptionsInternal (called below) initialize slightly different
|
159
163
|
context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
|
160
164
|
sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
|
161
|
-
htmlCtxtUseOptions(ctxt,
|
165
|
+
htmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));
|
162
166
|
|
163
167
|
return rxml_html_parser_context_wrap(ctxt);
|
164
168
|
}
|
@@ -170,10 +174,14 @@ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
|
|
170
174
|
*
|
171
175
|
* Parameters:
|
172
176
|
*
|
173
|
-
* io - A ruby IO object
|
177
|
+
* io - A ruby IO object
|
178
|
+
* options - A or'ed together list of LibXML::XML::HTMLParser::Options values
|
174
179
|
*/
|
175
|
-
static VALUE rxml_html_parser_context_io(VALUE
|
180
|
+
static VALUE rxml_html_parser_context_io(int argc, VALUE* argv, VALUE klass)
|
176
181
|
{
|
182
|
+
VALUE io, options;
|
183
|
+
rb_scan_args(argc, argv, "11", &io, &options);
|
184
|
+
|
177
185
|
VALUE result;
|
178
186
|
htmlParserCtxtPtr ctxt;
|
179
187
|
xmlParserInputBufferPtr input;
|
@@ -189,14 +197,14 @@ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
|
|
189
197
|
if (!ctxt)
|
190
198
|
{
|
191
199
|
xmlFreeParserInputBuffer(input);
|
192
|
-
rxml_raise(
|
200
|
+
rxml_raise(xmlGetLastError());
|
193
201
|
}
|
194
202
|
|
195
203
|
/* This is annoying, but xmlInitParserCtxt (called indirectly above) and
|
196
204
|
xmlCtxtUseOptionsInternal (called below) initialize slightly different
|
197
205
|
context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
|
198
206
|
sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
|
199
|
-
htmlCtxtUseOptions(ctxt,
|
207
|
+
htmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));
|
200
208
|
|
201
209
|
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
202
210
|
|
@@ -204,7 +212,7 @@ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
|
|
204
212
|
{
|
205
213
|
xmlFreeParserInputBuffer(input);
|
206
214
|
xmlFreeParserCtxt(ctxt);
|
207
|
-
rxml_raise(
|
215
|
+
rxml_raise(xmlGetLastError());
|
208
216
|
}
|
209
217
|
inputPush(ctxt, stream);
|
210
218
|
result = rxml_html_parser_context_wrap(ctxt);
|
@@ -222,29 +230,35 @@ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
|
|
222
230
|
*
|
223
231
|
* Parameters:
|
224
232
|
*
|
225
|
-
* string - A string that contains the data to parse
|
233
|
+
* string - A string that contains the data to parse
|
234
|
+
* options - A or'ed together list of LibXML::XML::HTMLParser::Options values
|
226
235
|
*/
|
227
|
-
static VALUE rxml_html_parser_context_string(VALUE
|
236
|
+
static VALUE rxml_html_parser_context_string(int argc, VALUE* argv, VALUE klass)
|
228
237
|
{
|
229
|
-
|
238
|
+
VALUE string, options;
|
239
|
+
rb_scan_args(argc, argv, "11", &string, &options);
|
240
|
+
|
230
241
|
Check_Type(string, T_STRING);
|
231
242
|
|
232
243
|
if (RSTRING_LEN(string) == 0)
|
233
244
|
rb_raise(rb_eArgError, "Must specify a string with one or more characters");
|
234
245
|
|
235
|
-
ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
|
246
|
+
htmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
|
236
247
|
(int)RSTRING_LEN(string));
|
237
248
|
if (!ctxt)
|
238
|
-
rxml_raise(
|
249
|
+
rxml_raise(xmlGetLastError());
|
239
250
|
|
240
251
|
/* This is annoying, but xmlInitParserCtxt (called indirectly above) and
|
241
252
|
xmlCtxtUseOptionsInternal (called below) initialize slightly different
|
242
253
|
context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
|
243
254
|
sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
|
244
|
-
htmlCtxtUseOptions(ctxt,
|
255
|
+
htmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));
|
245
256
|
|
246
|
-
|
247
|
-
|
257
|
+
// Setup sax handler
|
258
|
+
// TODO - there must be a better way? The sax handler is initialized for XML, but we want
|
259
|
+
// to use HTML
|
260
|
+
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
|
261
|
+
xmlSAX2InitHtmlDefaultSAXHandler(ctxt->sax);
|
248
262
|
|
249
263
|
return rxml_html_parser_context_wrap(ctxt);
|
250
264
|
}
|
@@ -288,7 +302,7 @@ static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE value)
|
|
288
302
|
if (value)
|
289
303
|
ctxt->sax->cdataBlock = NULL;
|
290
304
|
else
|
291
|
-
ctxt->sax->cdataBlock =
|
305
|
+
ctxt->sax->cdataBlock = xmlSAX2CDataBlock;
|
292
306
|
|
293
307
|
return value;
|
294
308
|
}
|
@@ -328,9 +342,9 @@ void rxml_init_html_parser_context(void)
|
|
328
342
|
IO_ATTR = ID2SYM(rb_intern("@io"));
|
329
343
|
cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
|
330
344
|
|
331
|
-
rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
|
332
|
-
rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
|
333
|
-
rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
|
345
|
+
rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, -1);
|
346
|
+
rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, -1);
|
347
|
+
rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, -1);
|
334
348
|
rb_define_method(cXMLHtmlParserContext, "close", rxml_html_parser_context_close, 0);
|
335
349
|
rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
|
336
350
|
rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
|
@@ -58,9 +58,6 @@ static VALUE rxml_namespace_initialize(VALUE self, VALUE node, VALUE prefix,
|
|
58
58
|
xmlPrefix = NIL_P(prefix) ? NULL : (xmlChar *)StringValuePtr(prefix);
|
59
59
|
xns = xmlNewNs(xnode, (xmlChar*) StringValuePtr(href), xmlPrefix);
|
60
60
|
|
61
|
-
if (!xns)
|
62
|
-
rxml_raise(&xmlLastError);
|
63
|
-
|
64
61
|
DATA_PTR(self) = xns;
|
65
62
|
return self;
|
66
63
|
}
|