libxml-ruby 0.9.5-x86-mswin32-60 → 0.9.6-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +28 -0
- data/README +8 -12
- data/ext/libxml/cbg.c +86 -86
- data/ext/libxml/libxml.c +875 -899
- data/ext/libxml/ruby_libxml.h +91 -65
- data/ext/libxml/ruby_xml_attr.c +485 -485
- data/ext/libxml/ruby_xml_attr.h +3 -3
- data/ext/libxml/ruby_xml_attributes.h +2 -2
- data/ext/libxml/ruby_xml_document.c +124 -307
- data/ext/libxml/ruby_xml_document.h +3 -3
- data/ext/libxml/ruby_xml_dtd.c +119 -119
- data/ext/libxml/ruby_xml_dtd.h +2 -2
- data/ext/libxml/ruby_xml_error.c +1 -1
- data/ext/libxml/ruby_xml_error.h +2 -2
- data/ext/libxml/ruby_xml_html_parser.c +119 -119
- data/ext/libxml/ruby_xml_html_parser.h +3 -3
- data/ext/libxml/ruby_xml_input.c +13 -11
- data/ext/libxml/ruby_xml_input.h +3 -3
- data/ext/libxml/ruby_xml_input_cbg.c +197 -197
- data/ext/libxml/ruby_xml_namespace.c +158 -0
- data/ext/libxml/ruby_xml_namespace.h +12 -0
- data/ext/libxml/ruby_xml_namespaces.c +303 -0
- data/ext/libxml/{ruby_xml_ns.h → ruby_xml_namespaces.h} +4 -5
- data/ext/libxml/ruby_xml_node.c +88 -293
- data/ext/libxml/ruby_xml_node.h +4 -4
- data/ext/libxml/ruby_xml_parser.c +152 -152
- data/ext/libxml/ruby_xml_parser.h +3 -3
- data/ext/libxml/ruby_xml_parser_context.c +630 -657
- data/ext/libxml/ruby_xml_parser_context.h +3 -3
- data/ext/libxml/ruby_xml_reader.c +899 -904
- data/ext/libxml/ruby_xml_reader.h +2 -2
- data/ext/libxml/ruby_xml_relaxng.h +2 -2
- data/ext/libxml/ruby_xml_sax_parser.c +175 -175
- data/ext/libxml/ruby_xml_sax_parser.h +3 -3
- data/ext/libxml/ruby_xml_schema.c +165 -165
- data/ext/libxml/ruby_xml_schema.h +2 -2
- data/ext/libxml/ruby_xml_state.h +2 -2
- data/ext/libxml/ruby_xml_xinclude.c +24 -24
- data/ext/libxml/ruby_xml_xinclude.h +3 -3
- data/ext/libxml/ruby_xml_xpath.c +108 -108
- data/ext/libxml/ruby_xml_xpath.h +3 -3
- data/ext/libxml/ruby_xml_xpath_context.c +84 -35
- data/ext/libxml/ruby_xml_xpath_context.h +3 -3
- data/ext/libxml/ruby_xml_xpath_expression.c +5 -7
- data/ext/libxml/ruby_xml_xpath_expression.h +2 -2
- data/ext/libxml/ruby_xml_xpath_object.c +7 -7
- data/ext/libxml/ruby_xml_xpath_object.h +2 -2
- data/ext/libxml/ruby_xml_xpointer.c +107 -107
- data/ext/libxml/ruby_xml_xpointer.h +3 -3
- data/ext/libxml/version.h +2 -2
- data/ext/mingw/libxml_ruby.dll.a +0 -0
- data/ext/mingw/libxml_ruby.so +0 -0
- data/ext/vc/libxml_ruby.vcproj +13 -5
- data/lib/libxml.rb +4 -1
- data/lib/libxml/document.rb +40 -6
- data/lib/libxml/hpricot.rb +76 -76
- data/lib/libxml/namespace.rb +60 -0
- data/lib/libxml/namespaces.rb +36 -0
- data/lib/libxml/node.rb +90 -26
- data/lib/libxml/ns.rb +20 -0
- data/test/model/bands.xml +5 -0
- data/test/tc_attributes.rb +1 -1
- data/test/tc_document.rb +24 -41
- data/test/tc_document_write.rb +87 -115
- data/test/tc_namespace.rb +59 -0
- data/test/tc_namespaces.rb +174 -0
- data/test/tc_node.rb +41 -33
- data/test/tc_node_copy.rb +1 -1
- data/test/tc_node_edit.rb +6 -0
- data/test/tc_node_write.rb +76 -0
- data/test/tc_xinclude.rb +2 -9
- data/test/tc_xpath.rb +38 -11
- data/test/test_suite.rb +3 -1
- metadata +16 -9
- data/ext/libxml/ruby_xml_ns.c +0 -150
- data/test/ets_copy_bug.rb +0 -21
- data/test/ets_copy_bug3.rb +0 -38
- data/test/model/default_validation_bug.rb +0 -0
- data/test/tc_ns.rb +0 -18
@@ -1,9 +1,9 @@
|
|
1
|
-
/* $Id: ruby_xml_document.h
|
1
|
+
/* $Id: ruby_xml_document.h 666 2008-12-07 00:16:50Z cfis $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
5
|
-
#ifndef
|
6
|
-
#define
|
5
|
+
#ifndef __RXML_DOCUMENT__
|
6
|
+
#define __RXML_DOCUMENT__
|
7
7
|
|
8
8
|
extern VALUE cXMLDocument;
|
9
9
|
|
data/ext/libxml/ruby_xml_dtd.c
CHANGED
@@ -1,119 +1,119 @@
|
|
1
|
-
#include "ruby_libxml.h"
|
2
|
-
#include "ruby_xml_dtd.h"
|
3
|
-
|
4
|
-
/*
|
5
|
-
* Document-class: LibXML::XML::Dtd
|
6
|
-
*
|
7
|
-
* The XML::Dtd class is used to prepare DTD's for validation of xml
|
8
|
-
* documents.
|
9
|
-
*
|
10
|
-
* DTDs can be created from a string or a pair of public and system identifiers.
|
11
|
-
* Once a Dtd object is instantiated, an XML document can be validated by the
|
12
|
-
* XML::Document#validate method providing the XML::Dtd object as parameeter.
|
13
|
-
* The method will raise an exception if the document is
|
14
|
-
* not valid.
|
15
|
-
*
|
16
|
-
* Basic usage:
|
17
|
-
*
|
18
|
-
* # parse DTD
|
19
|
-
* dtd = XML::Dtd.new(<<EOF)
|
20
|
-
* <!ELEMENT root (item*) >
|
21
|
-
* <!ELEMENT item (#PCDATA) >
|
22
|
-
* EOF
|
23
|
-
*
|
24
|
-
* # parse xml document to be validated
|
25
|
-
* instance = XML::Document.file('instance.xml')
|
26
|
-
*
|
27
|
-
* # validate
|
28
|
-
* instance.validate(dtd)
|
29
|
-
*/
|
30
|
-
|
31
|
-
VALUE cXMLDtd;
|
32
|
-
|
33
|
-
void rxml_dtd_free(xmlDtdPtr xdtd)
|
34
|
-
{
|
35
|
-
xmlFreeDtd(xdtd);
|
36
|
-
}
|
37
|
-
|
38
|
-
static VALUE rxml_dtd_alloc(VALUE klass)
|
39
|
-
{
|
40
|
-
return Data_Wrap_Struct(klass, NULL, rxml_dtd_free, NULL);
|
41
|
-
}
|
42
|
-
|
43
|
-
/*
|
44
|
-
* call-seq:
|
45
|
-
* XML::Dtd.new("public system") -> dtd
|
46
|
-
* XML::Dtd.new("public", "system") -> dtd
|
47
|
-
*
|
48
|
-
* Create a new Dtd from the specified public and system
|
49
|
-
* identifiers.
|
50
|
-
*/
|
51
|
-
static VALUE rxml_dtd_initialize(int argc, VALUE *argv, VALUE self)
|
52
|
-
{
|
53
|
-
VALUE external, system, dtd_string;
|
54
|
-
xmlParserInputBufferPtr buffer;
|
55
|
-
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
56
|
-
xmlChar *new_string;
|
57
|
-
xmlDtdPtr xdtd;
|
58
|
-
|
59
|
-
// 1 argument -- string --> parsujeme jako dtd
|
60
|
-
// 2 argumenty -- public, system --> bude se hledat
|
61
|
-
switch (argc)
|
62
|
-
{
|
63
|
-
case 2:
|
64
|
-
rb_scan_args(argc, argv, "20", &external, &system);
|
65
|
-
|
66
|
-
Check_Type(external, T_STRING);
|
67
|
-
Check_Type(system, T_STRING);
|
68
|
-
|
69
|
-
xdtd = xmlParseDTD((xmlChar*) StringValuePtr(external),
|
70
|
-
(xmlChar*) StringValuePtr(system));
|
71
|
-
|
72
|
-
if (xdtd == NULL)
|
73
|
-
rxml_raise(&xmlLastError);
|
74
|
-
|
75
|
-
DATA_PTR( self) = xdtd;
|
76
|
-
|
77
|
-
xmlSetTreeDoc((xmlNodePtr) xdtd, NULL);
|
78
|
-
break;
|
79
|
-
|
80
|
-
case 1:
|
81
|
-
rb_scan_args(argc, argv, "10", &dtd_string);
|
82
|
-
Check_Type(dtd_string, T_STRING);
|
83
|
-
|
84
|
-
/* Note that buffer is freed by xmlParserInputBufferPush*/
|
85
|
-
buffer = xmlAllocParserInputBuffer(enc);
|
86
|
-
new_string = xmlStrdup((xmlChar*) StringValuePtr(dtd_string));
|
87
|
-
xmlParserInputBufferPush(buffer, xmlStrlen(new_string),
|
88
|
-
(const char*) new_string);
|
89
|
-
|
90
|
-
xdtd = xmlIOParseDTD(NULL, buffer, enc);
|
91
|
-
|
92
|
-
if (xdtd == NULL)
|
93
|
-
rxml_raise(&xmlLastError);
|
94
|
-
|
95
|
-
xmlFree(new_string);
|
96
|
-
|
97
|
-
DATA_PTR( self) = xdtd;
|
98
|
-
break;
|
99
|
-
|
100
|
-
default:
|
101
|
-
rb_raise(rb_eArgError, "wrong number of arguments (need 1 or 2)");
|
102
|
-
}
|
103
|
-
|
104
|
-
return self;
|
105
|
-
}
|
106
|
-
|
107
|
-
// Rdoc needs to know
|
108
|
-
#ifdef RDOC_NEVER_DEFINED
|
109
|
-
mLibXML = rb_define_module("LibXML");
|
110
|
-
mXML = rb_define_module_under(mLibXML, "XML");
|
111
|
-
#endif
|
112
|
-
|
113
|
-
void ruby_init_xml_dtd()
|
114
|
-
{
|
115
|
-
cXMLDtd = rb_define_class_under(mXML, "Dtd", rb_cObject);
|
116
|
-
rb_define_alloc_func(cXMLDtd, rxml_dtd_alloc);
|
117
|
-
rb_define_method(cXMLDtd, "initialize", rxml_dtd_initialize, -1);
|
118
|
-
}
|
119
|
-
|
1
|
+
#include "ruby_libxml.h"
|
2
|
+
#include "ruby_xml_dtd.h"
|
3
|
+
|
4
|
+
/*
|
5
|
+
* Document-class: LibXML::XML::Dtd
|
6
|
+
*
|
7
|
+
* The XML::Dtd class is used to prepare DTD's for validation of xml
|
8
|
+
* documents.
|
9
|
+
*
|
10
|
+
* DTDs can be created from a string or a pair of public and system identifiers.
|
11
|
+
* Once a Dtd object is instantiated, an XML document can be validated by the
|
12
|
+
* XML::Document#validate method providing the XML::Dtd object as parameeter.
|
13
|
+
* The method will raise an exception if the document is
|
14
|
+
* not valid.
|
15
|
+
*
|
16
|
+
* Basic usage:
|
17
|
+
*
|
18
|
+
* # parse DTD
|
19
|
+
* dtd = XML::Dtd.new(<<EOF)
|
20
|
+
* <!ELEMENT root (item*) >
|
21
|
+
* <!ELEMENT item (#PCDATA) >
|
22
|
+
* EOF
|
23
|
+
*
|
24
|
+
* # parse xml document to be validated
|
25
|
+
* instance = XML::Document.file('instance.xml')
|
26
|
+
*
|
27
|
+
* # validate
|
28
|
+
* instance.validate(dtd)
|
29
|
+
*/
|
30
|
+
|
31
|
+
VALUE cXMLDtd;
|
32
|
+
|
33
|
+
void rxml_dtd_free(xmlDtdPtr xdtd)
|
34
|
+
{
|
35
|
+
xmlFreeDtd(xdtd);
|
36
|
+
}
|
37
|
+
|
38
|
+
static VALUE rxml_dtd_alloc(VALUE klass)
|
39
|
+
{
|
40
|
+
return Data_Wrap_Struct(klass, NULL, rxml_dtd_free, NULL);
|
41
|
+
}
|
42
|
+
|
43
|
+
/*
|
44
|
+
* call-seq:
|
45
|
+
* XML::Dtd.new("public system") -> dtd
|
46
|
+
* XML::Dtd.new("public", "system") -> dtd
|
47
|
+
*
|
48
|
+
* Create a new Dtd from the specified public and system
|
49
|
+
* identifiers.
|
50
|
+
*/
|
51
|
+
static VALUE rxml_dtd_initialize(int argc, VALUE *argv, VALUE self)
|
52
|
+
{
|
53
|
+
VALUE external, system, dtd_string;
|
54
|
+
xmlParserInputBufferPtr buffer;
|
55
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
56
|
+
xmlChar *new_string;
|
57
|
+
xmlDtdPtr xdtd;
|
58
|
+
|
59
|
+
// 1 argument -- string --> parsujeme jako dtd
|
60
|
+
// 2 argumenty -- public, system --> bude se hledat
|
61
|
+
switch (argc)
|
62
|
+
{
|
63
|
+
case 2:
|
64
|
+
rb_scan_args(argc, argv, "20", &external, &system);
|
65
|
+
|
66
|
+
Check_Type(external, T_STRING);
|
67
|
+
Check_Type(system, T_STRING);
|
68
|
+
|
69
|
+
xdtd = xmlParseDTD((xmlChar*) StringValuePtr(external),
|
70
|
+
(xmlChar*) StringValuePtr(system));
|
71
|
+
|
72
|
+
if (xdtd == NULL)
|
73
|
+
rxml_raise(&xmlLastError);
|
74
|
+
|
75
|
+
DATA_PTR( self) = xdtd;
|
76
|
+
|
77
|
+
xmlSetTreeDoc((xmlNodePtr) xdtd, NULL);
|
78
|
+
break;
|
79
|
+
|
80
|
+
case 1:
|
81
|
+
rb_scan_args(argc, argv, "10", &dtd_string);
|
82
|
+
Check_Type(dtd_string, T_STRING);
|
83
|
+
|
84
|
+
/* Note that buffer is freed by xmlParserInputBufferPush*/
|
85
|
+
buffer = xmlAllocParserInputBuffer(enc);
|
86
|
+
new_string = xmlStrdup((xmlChar*) StringValuePtr(dtd_string));
|
87
|
+
xmlParserInputBufferPush(buffer, xmlStrlen(new_string),
|
88
|
+
(const char*) new_string);
|
89
|
+
|
90
|
+
xdtd = xmlIOParseDTD(NULL, buffer, enc);
|
91
|
+
|
92
|
+
if (xdtd == NULL)
|
93
|
+
rxml_raise(&xmlLastError);
|
94
|
+
|
95
|
+
xmlFree(new_string);
|
96
|
+
|
97
|
+
DATA_PTR( self) = xdtd;
|
98
|
+
break;
|
99
|
+
|
100
|
+
default:
|
101
|
+
rb_raise(rb_eArgError, "wrong number of arguments (need 1 or 2)");
|
102
|
+
}
|
103
|
+
|
104
|
+
return self;
|
105
|
+
}
|
106
|
+
|
107
|
+
// Rdoc needs to know
|
108
|
+
#ifdef RDOC_NEVER_DEFINED
|
109
|
+
mLibXML = rb_define_module("LibXML");
|
110
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
111
|
+
#endif
|
112
|
+
|
113
|
+
void ruby_init_xml_dtd()
|
114
|
+
{
|
115
|
+
cXMLDtd = rb_define_class_under(mXML, "Dtd", rb_cObject);
|
116
|
+
rb_define_alloc_func(cXMLDtd, rxml_dtd_alloc);
|
117
|
+
rb_define_method(cXMLDtd, "initialize", rxml_dtd_initialize, -1);
|
118
|
+
}
|
119
|
+
|
data/ext/libxml/ruby_xml_dtd.h
CHANGED
data/ext/libxml/ruby_xml_error.c
CHANGED
data/ext/libxml/ruby_xml_error.h
CHANGED
@@ -1,119 +1,119 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c
|
2
|
-
|
3
|
-
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
-
|
5
|
-
#include "ruby_libxml.h"
|
6
|
-
|
7
|
-
VALUE cXMLHTMLParser;
|
8
|
-
static ID INPUT_ATTR;
|
9
|
-
|
10
|
-
/*
|
11
|
-
* Document-class: LibXML::XML::HTMLParser
|
12
|
-
*
|
13
|
-
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
14
|
-
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
15
|
-
* it can parse "real world" HTML, even if it severely broken from a
|
16
|
-
* specification point of view. */
|
17
|
-
|
18
|
-
/*
|
19
|
-
* call-seq:
|
20
|
-
* XML::HTMLParser.initialize -> parser
|
21
|
-
*
|
22
|
-
* Initializes a new parser instance with no pre-determined source.
|
23
|
-
*/
|
24
|
-
static VALUE rxml_html_parser_initialize(VALUE self)
|
25
|
-
{
|
26
|
-
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
27
|
-
rb_iv_set(self, "@input", input);
|
28
|
-
return self;
|
29
|
-
}
|
30
|
-
|
31
|
-
static htmlDocPtr rxml_html_parser_read_file(VALUE input)
|
32
|
-
{
|
33
|
-
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
34
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
35
|
-
VALUE encoding_str = rxml_input_encoding_to_s(
|
36
|
-
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
37
|
-
encoding_str));
|
38
|
-
int options = 0;
|
39
|
-
|
40
|
-
return htmlReadFile(StringValuePtr(file), xencoding_str, options);
|
41
|
-
}
|
42
|
-
|
43
|
-
static htmlDocPtr rxml_html_parser_read_string(VALUE input)
|
44
|
-
{
|
45
|
-
VALUE string = rb_ivar_get(input, STRING_ATTR);
|
46
|
-
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
47
|
-
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
48
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
49
|
-
VALUE encoding_str = rxml_input_encoding_to_s(
|
50
|
-
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
51
|
-
encoding_str));
|
52
|
-
int options = 0;
|
53
|
-
|
54
|
-
return htmlReadMemory(StringValuePtr(string), RSTRING_LEN(string),
|
55
|
-
|
56
|
-
}
|
57
|
-
|
58
|
-
static htmlDocPtr rxml_html_parser_read_io(VALUE input)
|
59
|
-
{
|
60
|
-
VALUE io = rb_ivar_get(input, IO_ATTR);
|
61
|
-
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
62
|
-
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
63
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
64
|
-
VALUE encoding_str = rxml_input_encoding_to_s(
|
65
|
-
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
66
|
-
encoding_str));
|
67
|
-
int options = 0;
|
68
|
-
|
69
|
-
return htmlReadIO((xmlInputReadCallback) rxml_read_callback, NULL,
|
70
|
-
(void *) io, xbase_url, xencoding_str, options);
|
71
|
-
}
|
72
|
-
|
73
|
-
/*
|
74
|
-
* call-seq:
|
75
|
-
* parser.parse -> document
|
76
|
-
*
|
77
|
-
* Parse the input XML and create an XML::Document with
|
78
|
-
* it's content. If an error occurs, XML::Parser::ParseError
|
79
|
-
* is thrown.
|
80
|
-
*/
|
81
|
-
static VALUE rxml_html_parser_parse(VALUE self)
|
82
|
-
{
|
83
|
-
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
84
|
-
htmlDocPtr xdoc;
|
85
|
-
|
86
|
-
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
87
|
-
xdoc = rxml_html_parser_read_file(input);
|
88
|
-
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
89
|
-
xdoc = rxml_html_parser_read_string(input);
|
90
|
-
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
91
|
-
xdoc = rxml_html_parser_read_io(input);
|
92
|
-
else
|
93
|
-
rb_raise(rb_eArgError, "You must specify a parser data source");
|
94
|
-
|
95
|
-
if (!xdoc)
|
96
|
-
rxml_raise(&xmlLastError);
|
97
|
-
|
98
|
-
return rxml_document_wrap(xdoc);
|
99
|
-
}
|
100
|
-
|
101
|
-
// Rdoc needs to know
|
102
|
-
#ifdef RDOC_NEVER_DEFINED
|
103
|
-
mLibXML = rb_define_module("LibXML");
|
104
|
-
mXML = rb_define_module_under(mLibXML, "XML");
|
105
|
-
#endif
|
106
|
-
|
107
|
-
void ruby_init_html_parser(void)
|
108
|
-
{
|
109
|
-
INPUT_ATTR = rb_intern("@input");
|
110
|
-
|
111
|
-
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
112
|
-
|
113
|
-
/* Atributes */
|
114
|
-
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
115
|
-
|
116
|
-
/* Instance methods */
|
117
|
-
rb_define_method(cXMLHTMLParser, "initialize", rxml_html_parser_initialize, 0);
|
118
|
-
rb_define_method(cXMLHTMLParser, "parse", rxml_html_parser_parse, 0);
|
119
|
-
}
|
1
|
+
/* $Id: ruby_xml_html_parser.c 665 2008-12-06 07:52:49Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include "ruby_libxml.h"
|
6
|
+
|
7
|
+
VALUE cXMLHTMLParser;
|
8
|
+
static ID INPUT_ATTR;
|
9
|
+
|
10
|
+
/*
|
11
|
+
* Document-class: LibXML::XML::HTMLParser
|
12
|
+
*
|
13
|
+
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
14
|
+
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
15
|
+
* it can parse "real world" HTML, even if it severely broken from a
|
16
|
+
* specification point of view. */
|
17
|
+
|
18
|
+
/*
|
19
|
+
* call-seq:
|
20
|
+
* XML::HTMLParser.initialize -> parser
|
21
|
+
*
|
22
|
+
* Initializes a new parser instance with no pre-determined source.
|
23
|
+
*/
|
24
|
+
static VALUE rxml_html_parser_initialize(VALUE self)
|
25
|
+
{
|
26
|
+
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
27
|
+
rb_iv_set(self, "@input", input);
|
28
|
+
return self;
|
29
|
+
}
|
30
|
+
|
31
|
+
static htmlDocPtr rxml_html_parser_read_file(VALUE input)
|
32
|
+
{
|
33
|
+
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
34
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
35
|
+
VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
|
36
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
37
|
+
encoding_str));
|
38
|
+
int options = 0;
|
39
|
+
|
40
|
+
return htmlReadFile(StringValuePtr(file), xencoding_str, options);
|
41
|
+
}
|
42
|
+
|
43
|
+
static htmlDocPtr rxml_html_parser_read_string(VALUE input)
|
44
|
+
{
|
45
|
+
VALUE string = rb_ivar_get(input, STRING_ATTR);
|
46
|
+
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
47
|
+
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
48
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
49
|
+
VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
|
50
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
51
|
+
encoding_str));
|
52
|
+
int options = 0;
|
53
|
+
|
54
|
+
return htmlReadMemory(StringValuePtr(string), RSTRING_LEN(string),
|
55
|
+
xbase_url, xencoding_str, options);
|
56
|
+
}
|
57
|
+
|
58
|
+
static htmlDocPtr rxml_html_parser_read_io(VALUE input)
|
59
|
+
{
|
60
|
+
VALUE io = rb_ivar_get(input, IO_ATTR);
|
61
|
+
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
62
|
+
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
63
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
64
|
+
VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
|
65
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
66
|
+
encoding_str));
|
67
|
+
int options = 0;
|
68
|
+
|
69
|
+
return htmlReadIO((xmlInputReadCallback) rxml_read_callback, NULL,
|
70
|
+
(void *) io, xbase_url, xencoding_str, options);
|
71
|
+
}
|
72
|
+
|
73
|
+
/*
|
74
|
+
* call-seq:
|
75
|
+
* parser.parse -> document
|
76
|
+
*
|
77
|
+
* Parse the input XML and create an XML::Document with
|
78
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
79
|
+
* is thrown.
|
80
|
+
*/
|
81
|
+
static VALUE rxml_html_parser_parse(VALUE self)
|
82
|
+
{
|
83
|
+
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
84
|
+
htmlDocPtr xdoc;
|
85
|
+
|
86
|
+
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
87
|
+
xdoc = rxml_html_parser_read_file(input);
|
88
|
+
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
89
|
+
xdoc = rxml_html_parser_read_string(input);
|
90
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
91
|
+
xdoc = rxml_html_parser_read_io(input);
|
92
|
+
else
|
93
|
+
rb_raise(rb_eArgError, "You must specify a parser data source");
|
94
|
+
|
95
|
+
if (!xdoc)
|
96
|
+
rxml_raise(&xmlLastError);
|
97
|
+
|
98
|
+
return rxml_document_wrap(xdoc);
|
99
|
+
}
|
100
|
+
|
101
|
+
// Rdoc needs to know
|
102
|
+
#ifdef RDOC_NEVER_DEFINED
|
103
|
+
mLibXML = rb_define_module("LibXML");
|
104
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
105
|
+
#endif
|
106
|
+
|
107
|
+
void ruby_init_html_parser(void)
|
108
|
+
{
|
109
|
+
INPUT_ATTR = rb_intern("@input");
|
110
|
+
|
111
|
+
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
112
|
+
|
113
|
+
/* Atributes */
|
114
|
+
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
115
|
+
|
116
|
+
/* Instance methods */
|
117
|
+
rb_define_method(cXMLHTMLParser, "initialize", rxml_html_parser_initialize, 0);
|
118
|
+
rb_define_method(cXMLHTMLParser, "parse", rxml_html_parser_parse, 0);
|
119
|
+
}
|