libxml-ruby 0.9.5 → 0.9.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +28 -0
- data/README +8 -12
- data/ext/libxml/cbg.c +86 -86
- data/ext/libxml/libxml.c +875 -899
- data/ext/libxml/ruby_libxml.h +91 -65
- data/ext/libxml/ruby_xml_attr.c +485 -485
- data/ext/libxml/ruby_xml_attr.h +3 -3
- data/ext/libxml/ruby_xml_attributes.h +2 -2
- data/ext/libxml/ruby_xml_document.c +124 -307
- data/ext/libxml/ruby_xml_document.h +3 -3
- data/ext/libxml/ruby_xml_dtd.c +119 -119
- data/ext/libxml/ruby_xml_dtd.h +2 -2
- data/ext/libxml/ruby_xml_error.c +1 -1
- data/ext/libxml/ruby_xml_error.h +2 -2
- data/ext/libxml/ruby_xml_html_parser.c +119 -119
- data/ext/libxml/ruby_xml_html_parser.h +3 -3
- data/ext/libxml/ruby_xml_input.c +13 -11
- data/ext/libxml/ruby_xml_input.h +3 -3
- data/ext/libxml/ruby_xml_input_cbg.c +197 -197
- data/ext/libxml/ruby_xml_namespace.c +158 -0
- data/ext/libxml/ruby_xml_namespace.h +12 -0
- data/ext/libxml/ruby_xml_namespaces.c +303 -0
- data/ext/libxml/{ruby_xml_ns.h → ruby_xml_namespaces.h} +4 -5
- data/ext/libxml/ruby_xml_node.c +88 -293
- data/ext/libxml/ruby_xml_node.h +4 -4
- data/ext/libxml/ruby_xml_parser.c +152 -152
- data/ext/libxml/ruby_xml_parser.h +3 -3
- data/ext/libxml/ruby_xml_parser_context.c +630 -657
- data/ext/libxml/ruby_xml_parser_context.h +3 -3
- data/ext/libxml/ruby_xml_reader.c +899 -904
- data/ext/libxml/ruby_xml_reader.h +2 -2
- data/ext/libxml/ruby_xml_relaxng.h +2 -2
- data/ext/libxml/ruby_xml_sax_parser.c +175 -175
- data/ext/libxml/ruby_xml_sax_parser.h +3 -3
- data/ext/libxml/ruby_xml_schema.c +165 -165
- data/ext/libxml/ruby_xml_schema.h +2 -2
- data/ext/libxml/ruby_xml_state.h +2 -2
- data/ext/libxml/ruby_xml_xinclude.c +24 -24
- data/ext/libxml/ruby_xml_xinclude.h +3 -3
- data/ext/libxml/ruby_xml_xpath.c +108 -108
- data/ext/libxml/ruby_xml_xpath.h +3 -3
- data/ext/libxml/ruby_xml_xpath_context.c +84 -35
- data/ext/libxml/ruby_xml_xpath_context.h +3 -3
- data/ext/libxml/ruby_xml_xpath_expression.c +5 -7
- data/ext/libxml/ruby_xml_xpath_expression.h +2 -2
- data/ext/libxml/ruby_xml_xpath_object.c +7 -7
- data/ext/libxml/ruby_xml_xpath_object.h +2 -2
- data/ext/libxml/ruby_xml_xpointer.c +107 -107
- data/ext/libxml/ruby_xml_xpointer.h +3 -3
- data/ext/libxml/version.h +2 -2
- data/ext/vc/libxml_ruby.vcproj +13 -5
- data/lib/libxml.rb +4 -1
- data/lib/libxml/document.rb +40 -6
- data/lib/libxml/hpricot.rb +76 -76
- data/lib/libxml/namespace.rb +60 -0
- data/lib/libxml/namespaces.rb +36 -0
- data/lib/libxml/node.rb +90 -26
- data/lib/libxml/ns.rb +20 -0
- data/test/model/bands.xml +5 -0
- data/test/tc_attributes.rb +1 -1
- data/test/tc_document.rb +24 -41
- data/test/tc_document_write.rb +87 -115
- data/test/tc_namespace.rb +59 -0
- data/test/tc_namespaces.rb +174 -0
- data/test/tc_node.rb +41 -33
- data/test/tc_node_copy.rb +1 -1
- data/test/tc_node_edit.rb +6 -0
- data/test/tc_node_write.rb +76 -0
- data/test/tc_xinclude.rb +2 -9
- data/test/tc_xpath.rb +38 -11
- data/test/test_suite.rb +3 -1
- metadata +16 -9
- data/ext/libxml/ruby_xml_ns.c +0 -150
- data/test/ets_copy_bug.rb +0 -21
- data/test/ets_copy_bug3.rb +0 -38
- data/test/model/default_validation_bug.rb +0 -0
- data/test/tc_ns.rb +0 -18
@@ -1,9 +1,9 @@
|
|
1
|
-
/* $Id: ruby_xml_document.h
|
1
|
+
/* $Id: ruby_xml_document.h 666 2008-12-07 00:16:50Z cfis $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
5
|
-
#ifndef
|
6
|
-
#define
|
5
|
+
#ifndef __RXML_DOCUMENT__
|
6
|
+
#define __RXML_DOCUMENT__
|
7
7
|
|
8
8
|
extern VALUE cXMLDocument;
|
9
9
|
|
data/ext/libxml/ruby_xml_dtd.c
CHANGED
@@ -1,119 +1,119 @@
|
|
1
|
-
#include "ruby_libxml.h"
|
2
|
-
#include "ruby_xml_dtd.h"
|
3
|
-
|
4
|
-
/*
|
5
|
-
* Document-class: LibXML::XML::Dtd
|
6
|
-
*
|
7
|
-
* The XML::Dtd class is used to prepare DTD's for validation of xml
|
8
|
-
* documents.
|
9
|
-
*
|
10
|
-
* DTDs can be created from a string or a pair of public and system identifiers.
|
11
|
-
* Once a Dtd object is instantiated, an XML document can be validated by the
|
12
|
-
* XML::Document#validate method providing the XML::Dtd object as parameeter.
|
13
|
-
* The method will raise an exception if the document is
|
14
|
-
* not valid.
|
15
|
-
*
|
16
|
-
* Basic usage:
|
17
|
-
*
|
18
|
-
* # parse DTD
|
19
|
-
* dtd = XML::Dtd.new(<<EOF)
|
20
|
-
* <!ELEMENT root (item*) >
|
21
|
-
* <!ELEMENT item (#PCDATA) >
|
22
|
-
* EOF
|
23
|
-
*
|
24
|
-
* # parse xml document to be validated
|
25
|
-
* instance = XML::Document.file('instance.xml')
|
26
|
-
*
|
27
|
-
* # validate
|
28
|
-
* instance.validate(dtd)
|
29
|
-
*/
|
30
|
-
|
31
|
-
VALUE cXMLDtd;
|
32
|
-
|
33
|
-
void rxml_dtd_free(xmlDtdPtr xdtd)
|
34
|
-
{
|
35
|
-
xmlFreeDtd(xdtd);
|
36
|
-
}
|
37
|
-
|
38
|
-
static VALUE rxml_dtd_alloc(VALUE klass)
|
39
|
-
{
|
40
|
-
return Data_Wrap_Struct(klass, NULL, rxml_dtd_free, NULL);
|
41
|
-
}
|
42
|
-
|
43
|
-
/*
|
44
|
-
* call-seq:
|
45
|
-
* XML::Dtd.new("public system") -> dtd
|
46
|
-
* XML::Dtd.new("public", "system") -> dtd
|
47
|
-
*
|
48
|
-
* Create a new Dtd from the specified public and system
|
49
|
-
* identifiers.
|
50
|
-
*/
|
51
|
-
static VALUE rxml_dtd_initialize(int argc, VALUE *argv, VALUE self)
|
52
|
-
{
|
53
|
-
VALUE external, system, dtd_string;
|
54
|
-
xmlParserInputBufferPtr buffer;
|
55
|
-
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
56
|
-
xmlChar *new_string;
|
57
|
-
xmlDtdPtr xdtd;
|
58
|
-
|
59
|
-
// 1 argument -- string --> parsujeme jako dtd
|
60
|
-
// 2 argumenty -- public, system --> bude se hledat
|
61
|
-
switch (argc)
|
62
|
-
{
|
63
|
-
case 2:
|
64
|
-
rb_scan_args(argc, argv, "20", &external, &system);
|
65
|
-
|
66
|
-
Check_Type(external, T_STRING);
|
67
|
-
Check_Type(system, T_STRING);
|
68
|
-
|
69
|
-
xdtd = xmlParseDTD((xmlChar*) StringValuePtr(external),
|
70
|
-
(xmlChar*) StringValuePtr(system));
|
71
|
-
|
72
|
-
if (xdtd == NULL)
|
73
|
-
rxml_raise(&xmlLastError);
|
74
|
-
|
75
|
-
DATA_PTR( self) = xdtd;
|
76
|
-
|
77
|
-
xmlSetTreeDoc((xmlNodePtr) xdtd, NULL);
|
78
|
-
break;
|
79
|
-
|
80
|
-
case 1:
|
81
|
-
rb_scan_args(argc, argv, "10", &dtd_string);
|
82
|
-
Check_Type(dtd_string, T_STRING);
|
83
|
-
|
84
|
-
/* Note that buffer is freed by xmlParserInputBufferPush*/
|
85
|
-
buffer = xmlAllocParserInputBuffer(enc);
|
86
|
-
new_string = xmlStrdup((xmlChar*) StringValuePtr(dtd_string));
|
87
|
-
xmlParserInputBufferPush(buffer, xmlStrlen(new_string),
|
88
|
-
(const char*) new_string);
|
89
|
-
|
90
|
-
xdtd = xmlIOParseDTD(NULL, buffer, enc);
|
91
|
-
|
92
|
-
if (xdtd == NULL)
|
93
|
-
rxml_raise(&xmlLastError);
|
94
|
-
|
95
|
-
xmlFree(new_string);
|
96
|
-
|
97
|
-
DATA_PTR( self) = xdtd;
|
98
|
-
break;
|
99
|
-
|
100
|
-
default:
|
101
|
-
rb_raise(rb_eArgError, "wrong number of arguments (need 1 or 2)");
|
102
|
-
}
|
103
|
-
|
104
|
-
return self;
|
105
|
-
}
|
106
|
-
|
107
|
-
// Rdoc needs to know
|
108
|
-
#ifdef RDOC_NEVER_DEFINED
|
109
|
-
mLibXML = rb_define_module("LibXML");
|
110
|
-
mXML = rb_define_module_under(mLibXML, "XML");
|
111
|
-
#endif
|
112
|
-
|
113
|
-
void ruby_init_xml_dtd()
|
114
|
-
{
|
115
|
-
cXMLDtd = rb_define_class_under(mXML, "Dtd", rb_cObject);
|
116
|
-
rb_define_alloc_func(cXMLDtd, rxml_dtd_alloc);
|
117
|
-
rb_define_method(cXMLDtd, "initialize", rxml_dtd_initialize, -1);
|
118
|
-
}
|
119
|
-
|
1
|
+
#include "ruby_libxml.h"
|
2
|
+
#include "ruby_xml_dtd.h"
|
3
|
+
|
4
|
+
/*
|
5
|
+
* Document-class: LibXML::XML::Dtd
|
6
|
+
*
|
7
|
+
* The XML::Dtd class is used to prepare DTD's for validation of xml
|
8
|
+
* documents.
|
9
|
+
*
|
10
|
+
* DTDs can be created from a string or a pair of public and system identifiers.
|
11
|
+
* Once a Dtd object is instantiated, an XML document can be validated by the
|
12
|
+
* XML::Document#validate method providing the XML::Dtd object as parameeter.
|
13
|
+
* The method will raise an exception if the document is
|
14
|
+
* not valid.
|
15
|
+
*
|
16
|
+
* Basic usage:
|
17
|
+
*
|
18
|
+
* # parse DTD
|
19
|
+
* dtd = XML::Dtd.new(<<EOF)
|
20
|
+
* <!ELEMENT root (item*) >
|
21
|
+
* <!ELEMENT item (#PCDATA) >
|
22
|
+
* EOF
|
23
|
+
*
|
24
|
+
* # parse xml document to be validated
|
25
|
+
* instance = XML::Document.file('instance.xml')
|
26
|
+
*
|
27
|
+
* # validate
|
28
|
+
* instance.validate(dtd)
|
29
|
+
*/
|
30
|
+
|
31
|
+
VALUE cXMLDtd;
|
32
|
+
|
33
|
+
void rxml_dtd_free(xmlDtdPtr xdtd)
|
34
|
+
{
|
35
|
+
xmlFreeDtd(xdtd);
|
36
|
+
}
|
37
|
+
|
38
|
+
static VALUE rxml_dtd_alloc(VALUE klass)
|
39
|
+
{
|
40
|
+
return Data_Wrap_Struct(klass, NULL, rxml_dtd_free, NULL);
|
41
|
+
}
|
42
|
+
|
43
|
+
/*
|
44
|
+
* call-seq:
|
45
|
+
* XML::Dtd.new("public system") -> dtd
|
46
|
+
* XML::Dtd.new("public", "system") -> dtd
|
47
|
+
*
|
48
|
+
* Create a new Dtd from the specified public and system
|
49
|
+
* identifiers.
|
50
|
+
*/
|
51
|
+
static VALUE rxml_dtd_initialize(int argc, VALUE *argv, VALUE self)
|
52
|
+
{
|
53
|
+
VALUE external, system, dtd_string;
|
54
|
+
xmlParserInputBufferPtr buffer;
|
55
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
56
|
+
xmlChar *new_string;
|
57
|
+
xmlDtdPtr xdtd;
|
58
|
+
|
59
|
+
// 1 argument -- string --> parsujeme jako dtd
|
60
|
+
// 2 argumenty -- public, system --> bude se hledat
|
61
|
+
switch (argc)
|
62
|
+
{
|
63
|
+
case 2:
|
64
|
+
rb_scan_args(argc, argv, "20", &external, &system);
|
65
|
+
|
66
|
+
Check_Type(external, T_STRING);
|
67
|
+
Check_Type(system, T_STRING);
|
68
|
+
|
69
|
+
xdtd = xmlParseDTD((xmlChar*) StringValuePtr(external),
|
70
|
+
(xmlChar*) StringValuePtr(system));
|
71
|
+
|
72
|
+
if (xdtd == NULL)
|
73
|
+
rxml_raise(&xmlLastError);
|
74
|
+
|
75
|
+
DATA_PTR( self) = xdtd;
|
76
|
+
|
77
|
+
xmlSetTreeDoc((xmlNodePtr) xdtd, NULL);
|
78
|
+
break;
|
79
|
+
|
80
|
+
case 1:
|
81
|
+
rb_scan_args(argc, argv, "10", &dtd_string);
|
82
|
+
Check_Type(dtd_string, T_STRING);
|
83
|
+
|
84
|
+
/* Note that buffer is freed by xmlParserInputBufferPush*/
|
85
|
+
buffer = xmlAllocParserInputBuffer(enc);
|
86
|
+
new_string = xmlStrdup((xmlChar*) StringValuePtr(dtd_string));
|
87
|
+
xmlParserInputBufferPush(buffer, xmlStrlen(new_string),
|
88
|
+
(const char*) new_string);
|
89
|
+
|
90
|
+
xdtd = xmlIOParseDTD(NULL, buffer, enc);
|
91
|
+
|
92
|
+
if (xdtd == NULL)
|
93
|
+
rxml_raise(&xmlLastError);
|
94
|
+
|
95
|
+
xmlFree(new_string);
|
96
|
+
|
97
|
+
DATA_PTR( self) = xdtd;
|
98
|
+
break;
|
99
|
+
|
100
|
+
default:
|
101
|
+
rb_raise(rb_eArgError, "wrong number of arguments (need 1 or 2)");
|
102
|
+
}
|
103
|
+
|
104
|
+
return self;
|
105
|
+
}
|
106
|
+
|
107
|
+
// Rdoc needs to know
|
108
|
+
#ifdef RDOC_NEVER_DEFINED
|
109
|
+
mLibXML = rb_define_module("LibXML");
|
110
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
111
|
+
#endif
|
112
|
+
|
113
|
+
void ruby_init_xml_dtd()
|
114
|
+
{
|
115
|
+
cXMLDtd = rb_define_class_under(mXML, "Dtd", rb_cObject);
|
116
|
+
rb_define_alloc_func(cXMLDtd, rxml_dtd_alloc);
|
117
|
+
rb_define_method(cXMLDtd, "initialize", rxml_dtd_initialize, -1);
|
118
|
+
}
|
119
|
+
|
data/ext/libxml/ruby_xml_dtd.h
CHANGED
data/ext/libxml/ruby_xml_error.c
CHANGED
data/ext/libxml/ruby_xml_error.h
CHANGED
@@ -1,119 +1,119 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c
|
2
|
-
|
3
|
-
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
-
|
5
|
-
#include "ruby_libxml.h"
|
6
|
-
|
7
|
-
VALUE cXMLHTMLParser;
|
8
|
-
static ID INPUT_ATTR;
|
9
|
-
|
10
|
-
/*
|
11
|
-
* Document-class: LibXML::XML::HTMLParser
|
12
|
-
*
|
13
|
-
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
14
|
-
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
15
|
-
* it can parse "real world" HTML, even if it severely broken from a
|
16
|
-
* specification point of view. */
|
17
|
-
|
18
|
-
/*
|
19
|
-
* call-seq:
|
20
|
-
* XML::HTMLParser.initialize -> parser
|
21
|
-
*
|
22
|
-
* Initializes a new parser instance with no pre-determined source.
|
23
|
-
*/
|
24
|
-
static VALUE rxml_html_parser_initialize(VALUE self)
|
25
|
-
{
|
26
|
-
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
27
|
-
rb_iv_set(self, "@input", input);
|
28
|
-
return self;
|
29
|
-
}
|
30
|
-
|
31
|
-
static htmlDocPtr rxml_html_parser_read_file(VALUE input)
|
32
|
-
{
|
33
|
-
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
34
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
35
|
-
VALUE encoding_str = rxml_input_encoding_to_s(
|
36
|
-
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
37
|
-
encoding_str));
|
38
|
-
int options = 0;
|
39
|
-
|
40
|
-
return htmlReadFile(StringValuePtr(file), xencoding_str, options);
|
41
|
-
}
|
42
|
-
|
43
|
-
static htmlDocPtr rxml_html_parser_read_string(VALUE input)
|
44
|
-
{
|
45
|
-
VALUE string = rb_ivar_get(input, STRING_ATTR);
|
46
|
-
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
47
|
-
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
48
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
49
|
-
VALUE encoding_str = rxml_input_encoding_to_s(
|
50
|
-
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
51
|
-
encoding_str));
|
52
|
-
int options = 0;
|
53
|
-
|
54
|
-
return htmlReadMemory(StringValuePtr(string), RSTRING_LEN(string),
|
55
|
-
|
56
|
-
}
|
57
|
-
|
58
|
-
static htmlDocPtr rxml_html_parser_read_io(VALUE input)
|
59
|
-
{
|
60
|
-
VALUE io = rb_ivar_get(input, IO_ATTR);
|
61
|
-
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
62
|
-
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
63
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
64
|
-
VALUE encoding_str = rxml_input_encoding_to_s(
|
65
|
-
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
66
|
-
encoding_str));
|
67
|
-
int options = 0;
|
68
|
-
|
69
|
-
return htmlReadIO((xmlInputReadCallback) rxml_read_callback, NULL,
|
70
|
-
(void *) io, xbase_url, xencoding_str, options);
|
71
|
-
}
|
72
|
-
|
73
|
-
/*
|
74
|
-
* call-seq:
|
75
|
-
* parser.parse -> document
|
76
|
-
*
|
77
|
-
* Parse the input XML and create an XML::Document with
|
78
|
-
* it's content. If an error occurs, XML::Parser::ParseError
|
79
|
-
* is thrown.
|
80
|
-
*/
|
81
|
-
static VALUE rxml_html_parser_parse(VALUE self)
|
82
|
-
{
|
83
|
-
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
84
|
-
htmlDocPtr xdoc;
|
85
|
-
|
86
|
-
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
87
|
-
xdoc = rxml_html_parser_read_file(input);
|
88
|
-
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
89
|
-
xdoc = rxml_html_parser_read_string(input);
|
90
|
-
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
91
|
-
xdoc = rxml_html_parser_read_io(input);
|
92
|
-
else
|
93
|
-
rb_raise(rb_eArgError, "You must specify a parser data source");
|
94
|
-
|
95
|
-
if (!xdoc)
|
96
|
-
rxml_raise(&xmlLastError);
|
97
|
-
|
98
|
-
return rxml_document_wrap(xdoc);
|
99
|
-
}
|
100
|
-
|
101
|
-
// Rdoc needs to know
|
102
|
-
#ifdef RDOC_NEVER_DEFINED
|
103
|
-
mLibXML = rb_define_module("LibXML");
|
104
|
-
mXML = rb_define_module_under(mLibXML, "XML");
|
105
|
-
#endif
|
106
|
-
|
107
|
-
void ruby_init_html_parser(void)
|
108
|
-
{
|
109
|
-
INPUT_ATTR = rb_intern("@input");
|
110
|
-
|
111
|
-
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
112
|
-
|
113
|
-
/* Atributes */
|
114
|
-
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
115
|
-
|
116
|
-
/* Instance methods */
|
117
|
-
rb_define_method(cXMLHTMLParser, "initialize", rxml_html_parser_initialize, 0);
|
118
|
-
rb_define_method(cXMLHTMLParser, "parse", rxml_html_parser_parse, 0);
|
119
|
-
}
|
1
|
+
/* $Id: ruby_xml_html_parser.c 665 2008-12-06 07:52:49Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include "ruby_libxml.h"
|
6
|
+
|
7
|
+
VALUE cXMLHTMLParser;
|
8
|
+
static ID INPUT_ATTR;
|
9
|
+
|
10
|
+
/*
|
11
|
+
* Document-class: LibXML::XML::HTMLParser
|
12
|
+
*
|
13
|
+
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
14
|
+
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
15
|
+
* it can parse "real world" HTML, even if it severely broken from a
|
16
|
+
* specification point of view. */
|
17
|
+
|
18
|
+
/*
|
19
|
+
* call-seq:
|
20
|
+
* XML::HTMLParser.initialize -> parser
|
21
|
+
*
|
22
|
+
* Initializes a new parser instance with no pre-determined source.
|
23
|
+
*/
|
24
|
+
static VALUE rxml_html_parser_initialize(VALUE self)
|
25
|
+
{
|
26
|
+
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
27
|
+
rb_iv_set(self, "@input", input);
|
28
|
+
return self;
|
29
|
+
}
|
30
|
+
|
31
|
+
static htmlDocPtr rxml_html_parser_read_file(VALUE input)
|
32
|
+
{
|
33
|
+
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
34
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
35
|
+
VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
|
36
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
37
|
+
encoding_str));
|
38
|
+
int options = 0;
|
39
|
+
|
40
|
+
return htmlReadFile(StringValuePtr(file), xencoding_str, options);
|
41
|
+
}
|
42
|
+
|
43
|
+
static htmlDocPtr rxml_html_parser_read_string(VALUE input)
|
44
|
+
{
|
45
|
+
VALUE string = rb_ivar_get(input, STRING_ATTR);
|
46
|
+
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
47
|
+
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
48
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
49
|
+
VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
|
50
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
51
|
+
encoding_str));
|
52
|
+
int options = 0;
|
53
|
+
|
54
|
+
return htmlReadMemory(StringValuePtr(string), RSTRING_LEN(string),
|
55
|
+
xbase_url, xencoding_str, options);
|
56
|
+
}
|
57
|
+
|
58
|
+
static htmlDocPtr rxml_html_parser_read_io(VALUE input)
|
59
|
+
{
|
60
|
+
VALUE io = rb_ivar_get(input, IO_ATTR);
|
61
|
+
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
62
|
+
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
63
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
64
|
+
VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
|
65
|
+
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
66
|
+
encoding_str));
|
67
|
+
int options = 0;
|
68
|
+
|
69
|
+
return htmlReadIO((xmlInputReadCallback) rxml_read_callback, NULL,
|
70
|
+
(void *) io, xbase_url, xencoding_str, options);
|
71
|
+
}
|
72
|
+
|
73
|
+
/*
|
74
|
+
* call-seq:
|
75
|
+
* parser.parse -> document
|
76
|
+
*
|
77
|
+
* Parse the input XML and create an XML::Document with
|
78
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
79
|
+
* is thrown.
|
80
|
+
*/
|
81
|
+
static VALUE rxml_html_parser_parse(VALUE self)
|
82
|
+
{
|
83
|
+
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
84
|
+
htmlDocPtr xdoc;
|
85
|
+
|
86
|
+
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
87
|
+
xdoc = rxml_html_parser_read_file(input);
|
88
|
+
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
89
|
+
xdoc = rxml_html_parser_read_string(input);
|
90
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
91
|
+
xdoc = rxml_html_parser_read_io(input);
|
92
|
+
else
|
93
|
+
rb_raise(rb_eArgError, "You must specify a parser data source");
|
94
|
+
|
95
|
+
if (!xdoc)
|
96
|
+
rxml_raise(&xmlLastError);
|
97
|
+
|
98
|
+
return rxml_document_wrap(xdoc);
|
99
|
+
}
|
100
|
+
|
101
|
+
// Rdoc needs to know
|
102
|
+
#ifdef RDOC_NEVER_DEFINED
|
103
|
+
mLibXML = rb_define_module("LibXML");
|
104
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
105
|
+
#endif
|
106
|
+
|
107
|
+
void ruby_init_html_parser(void)
|
108
|
+
{
|
109
|
+
INPUT_ATTR = rb_intern("@input");
|
110
|
+
|
111
|
+
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
112
|
+
|
113
|
+
/* Atributes */
|
114
|
+
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
115
|
+
|
116
|
+
/* Instance methods */
|
117
|
+
rb_define_method(cXMLHTMLParser, "initialize", rxml_html_parser_initialize, 0);
|
118
|
+
rb_define_method(cXMLHTMLParser, "parse", rxml_html_parser_parse, 0);
|
119
|
+
}
|