libxml-ruby 0.9.7-x86-mswin32-60 → 0.9.8-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +53 -0
- data/Rakefile +1 -0
- data/ext/libxml/build.log +4 -0
- data/ext/libxml/cbg.c +86 -86
- data/ext/libxml/libxml.c +878 -876
- data/ext/libxml/ruby_libxml.h +8 -4
- data/ext/libxml/ruby_xml_attr.c +36 -168
- data/ext/libxml/ruby_xml_attr.h +2 -4
- data/ext/libxml/ruby_xml_attr_decl.c +177 -0
- data/ext/libxml/ruby_xml_attr_decl.h +13 -0
- data/ext/libxml/ruby_xml_attributes.c +29 -20
- data/ext/libxml/ruby_xml_document.c +895 -898
- data/ext/libxml/ruby_xml_dtd.c +18 -1
- data/ext/libxml/ruby_xml_dtd.h +1 -0
- data/ext/libxml/ruby_xml_encoding.c +116 -0
- data/ext/libxml/ruby_xml_encoding.h +12 -0
- data/ext/libxml/ruby_xml_error.c +8 -2
- data/ext/libxml/ruby_xml_html_parser.c +53 -74
- data/ext/libxml/ruby_xml_html_parser.h +2 -3
- data/ext/libxml/ruby_xml_html_parser_context.c +145 -0
- data/ext/libxml/ruby_xml_html_parser_context.h +12 -0
- data/ext/libxml/ruby_xml_html_parser_options.c +48 -0
- data/ext/libxml/ruby_xml_html_parser_options.h +12 -0
- data/ext/libxml/ruby_xml_input_cbg.c +1 -1
- data/ext/libxml/ruby_xml_io.c +30 -0
- data/ext/libxml/ruby_xml_io.h +9 -0
- data/ext/libxml/ruby_xml_namespace.c +34 -16
- data/ext/libxml/ruby_xml_namespace.h +2 -2
- data/ext/libxml/ruby_xml_namespaces.c +6 -6
- data/ext/libxml/ruby_xml_node.c +1367 -1324
- data/ext/libxml/ruby_xml_node.h +2 -2
- data/ext/libxml/ruby_xml_parser.c +26 -78
- data/ext/libxml/ruby_xml_parser.h +1 -1
- data/ext/libxml/ruby_xml_parser_context.c +284 -13
- data/ext/libxml/ruby_xml_parser_context.h +1 -2
- data/ext/libxml/ruby_xml_parser_options.c +75 -0
- data/ext/libxml/ruby_xml_parser_options.h +14 -0
- data/ext/libxml/ruby_xml_reader.c +277 -183
- data/ext/libxml/ruby_xml_sax_parser.c +60 -57
- data/ext/libxml/ruby_xml_xpath_context.c +43 -8
- data/ext/libxml/ruby_xml_xpath_expression.c +6 -0
- data/ext/libxml/ruby_xml_xpath_object.c +107 -95
- data/ext/libxml/ruby_xml_xpath_object.h +9 -1
- data/ext/libxml/ruby_xml_xpointer.c +107 -107
- data/ext/libxml/version.h +2 -2
- data/ext/mingw/libxml_ruby.dll.a +0 -0
- data/ext/mingw/libxml_ruby.so +0 -0
- data/ext/vc/libxml_ruby.vcproj +43 -3
- data/lib/libxml.rb +2 -3
- data/lib/libxml/attr.rb +71 -2
- data/lib/libxml/attr_decl.rb +81 -0
- data/lib/libxml/document.rb +78 -14
- data/lib/libxml/html_parser.rb +75 -42
- data/lib/libxml/node.rb +11 -0
- data/lib/libxml/parser.rb +106 -62
- data/lib/libxml/reader.rb +12 -0
- data/lib/libxml/sax_parser.rb +42 -52
- data/lib/libxml/xpath_object.rb +15 -0
- data/test/model/atom.xml +12 -12
- data/test/model/bands.xml +4 -4
- data/test/model/books.xml +146 -147
- data/test/model/merge_bug_data.xml +1 -1
- data/test/model/rubynet.xml +1 -0
- data/test/model/shiporder.rng +1 -1
- data/test/model/shiporder.xml +22 -22
- data/test/model/shiporder.xsd +30 -30
- data/test/model/xinclude.xml +1 -1
- data/test/{tc_node_attr.rb → tc_attr.rb} +1 -1
- data/test/tc_attr_decl.rb +131 -0
- data/test/tc_deprecated_require.rb +1 -3
- data/test/tc_document.rb +13 -3
- data/test/tc_document_write.rb +5 -5
- data/test/tc_dtd.rb +13 -5
- data/test/tc_html_parser.rb +14 -26
- data/test/tc_node_cdata.rb +1 -3
- data/test/tc_node_comment.rb +2 -4
- data/test/tc_node_edit.rb +2 -3
- data/test/tc_node_text.rb +35 -1
- data/test/tc_node_write.rb +3 -3
- data/test/tc_node_xlink.rb +2 -4
- data/test/tc_parser.rb +163 -70
- data/test/tc_parser_context.rb +103 -42
- data/test/tc_reader.rb +173 -45
- data/test/tc_relaxng.rb +2 -2
- data/test/tc_sax_parser.rb +48 -52
- data/test/tc_schema.rb +2 -2
- data/test/tc_xpath.rb +37 -6
- data/test/tc_xpath_context.rb +7 -1
- data/test/tc_xpath_expression.rb +1 -3
- data/test/tc_xpointer.rb +1 -3
- data/test/test_suite.rb +2 -3
- metadata +20 -13
- data/ext/libxml/ruby_xml_input.c +0 -329
- data/ext/libxml/ruby_xml_input.h +0 -20
- data/lib/libxml/parser_context.rb +0 -17
- data/lib/libxml/parser_options.rb +0 -25
- data/test/model/simple.xml +0 -7
- data/test/tc_input.rb +0 -13
- data/test/tc_well_formed.rb +0 -11
data/ext/libxml/ruby_xml_dtd.c
CHANGED
@@ -40,6 +40,23 @@ static VALUE rxml_dtd_alloc(VALUE klass)
|
|
40
40
|
return Data_Wrap_Struct(klass, NULL, rxml_dtd_free, NULL);
|
41
41
|
}
|
42
42
|
|
43
|
+
VALUE rxml_dtd_wrap(xmlDtdPtr xdtd)
|
44
|
+
{
|
45
|
+
VALUE result;
|
46
|
+
|
47
|
+
// This node is already wrapped
|
48
|
+
if (xdtd->_private != NULL)
|
49
|
+
return (VALUE) xdtd->_private;
|
50
|
+
|
51
|
+
result = Data_Wrap_Struct(cXMLDtd, NULL, NULL, xdtd);
|
52
|
+
|
53
|
+
xdtd->_private = (void*) result;
|
54
|
+
|
55
|
+
return result;
|
56
|
+
}
|
57
|
+
|
58
|
+
|
59
|
+
|
43
60
|
/*
|
44
61
|
* call-seq:
|
45
62
|
* XML::Dtd.new("public system") -> dtd
|
@@ -72,7 +89,7 @@ static VALUE rxml_dtd_initialize(int argc, VALUE *argv, VALUE self)
|
|
72
89
|
if (xdtd == NULL)
|
73
90
|
rxml_raise(&xmlLastError);
|
74
91
|
|
75
|
-
DATA_PTR(
|
92
|
+
DATA_PTR(self) = xdtd;
|
76
93
|
|
77
94
|
xmlSetTreeDoc((xmlNodePtr) xdtd, NULL);
|
78
95
|
break;
|
data/ext/libxml/ruby_xml_dtd.h
CHANGED
@@ -0,0 +1,116 @@
|
|
1
|
+
/* $Id: rxml_input.c 528 2008-11-15 23:43:48Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include <stdarg.h>
|
6
|
+
#include "ruby_libxml.h"
|
7
|
+
|
8
|
+
/*
|
9
|
+
* Document-class: LibXML::XML::Encoding
|
10
|
+
*
|
11
|
+
* The encoding class defines the encodings that libxml
|
12
|
+
* supports. LibXML converts all data sources to UTF8
|
13
|
+
* internally before processing them.
|
14
|
+
*
|
15
|
+
* By default, LibXML determines a data source's encoding
|
16
|
+
* using the algorithm described on its
|
17
|
+
* website[* http://xmlsoft.org/encoding.html].
|
18
|
+
*
|
19
|
+
* However, you may override a data source's encoding
|
20
|
+
* by using the encoding constants defined in this
|
21
|
+
* module.
|
22
|
+
*
|
23
|
+
* Example 1:
|
24
|
+
*
|
25
|
+
* io = File.open('some_file', 'rb')
|
26
|
+
* parser = XML::Parser.io(io, :encoding => XML::Encoding::ISO_8859_1)
|
27
|
+
* doc = parser.parse
|
28
|
+
*
|
29
|
+
* Example 2:
|
30
|
+
*
|
31
|
+
* parser = XML::HTMLParser.file("some_file", :encoding => XML::Encoding::ISO_8859_1)
|
32
|
+
* doc = parser.parse
|
33
|
+
*
|
34
|
+
* Example 3:
|
35
|
+
*
|
36
|
+
* document = XML::Document.new
|
37
|
+
* document.encoding = XML::Encoding::ISO_8859_1
|
38
|
+
* doc << XML::Node.new
|
39
|
+
*/
|
40
|
+
|
41
|
+
VALUE mXMLEncoding;
|
42
|
+
|
43
|
+
|
44
|
+
/*
|
45
|
+
* call-seq:
|
46
|
+
* Input.s_to_encoding("UTF_8") -> XML::Encoding::UTF_8
|
47
|
+
*
|
48
|
+
* Converts an encoding string to an encoding constant
|
49
|
+
* defined on the XML::Encoding class.
|
50
|
+
*/
|
51
|
+
static VALUE rxml_encoding_from_s(VALUE klass, VALUE encoding)
|
52
|
+
{
|
53
|
+
xmlCharEncoding xencoding;
|
54
|
+
|
55
|
+
if (encoding == Qnil)
|
56
|
+
return Qnil;
|
57
|
+
|
58
|
+
xencoding = xmlParseCharEncoding(StringValuePtr(encoding));
|
59
|
+
return NUM2INT(xencoding);
|
60
|
+
}
|
61
|
+
|
62
|
+
/*
|
63
|
+
* call-seq:
|
64
|
+
* Input.encoding_to_s(Input::ENCODING) -> "encoding"
|
65
|
+
*
|
66
|
+
* Converts an encoding contstant defined on the XML::Encoding
|
67
|
+
* class to its text representation.
|
68
|
+
*/
|
69
|
+
static VALUE rxml_encoding_to_s(VALUE klass, VALUE encoding)
|
70
|
+
{
|
71
|
+
const char* xecoding = xmlGetCharEncodingName(NUM2INT(encoding));
|
72
|
+
|
73
|
+
if (!xecoding)
|
74
|
+
return Qnil;
|
75
|
+
else
|
76
|
+
return rb_str_new2(xecoding);
|
77
|
+
}
|
78
|
+
|
79
|
+
// Rdoc needs to know
|
80
|
+
#ifdef RDOC_NEVER_DEFINED
|
81
|
+
mLibXML = rb_define_module("LibXML");
|
82
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
83
|
+
#endif
|
84
|
+
|
85
|
+
void ruby_init_xml_encoding(void)
|
86
|
+
{
|
87
|
+
mXMLEncoding = rb_define_module_under(mXML, "Encoding");
|
88
|
+
rb_define_module_function(mXMLEncoding, "from_s", rxml_encoding_from_s, 1);
|
89
|
+
rb_define_module_function(mXMLEncoding, "to_s", rxml_encoding_to_s, 1);
|
90
|
+
|
91
|
+
rb_define_const(mXMLEncoding, "UNDEFINED", INT2NUM(XPATH_UNDEFINED));
|
92
|
+
rb_define_const(mXMLEncoding, "ERROR", INT2NUM(XML_CHAR_ENCODING_ERROR)); /* No char encoding detected */
|
93
|
+
rb_define_const(mXMLEncoding, "NONE", INT2NUM(XML_CHAR_ENCODING_NONE)); /* No char encoding detected */
|
94
|
+
rb_define_const(mXMLEncoding, "UTF_8", INT2NUM(XML_CHAR_ENCODING_UTF8)); /* UTF-8 */
|
95
|
+
rb_define_const(mXMLEncoding, "UTF_16LE", INT2NUM(XML_CHAR_ENCODING_UTF16LE)); /* UTF-16 little endian */
|
96
|
+
rb_define_const(mXMLEncoding, "UTF_16BE", INT2NUM(XML_CHAR_ENCODING_UTF16BE)); /* UTF-16 big endian */
|
97
|
+
rb_define_const(mXMLEncoding, "UCS_4LE", INT2NUM(XML_CHAR_ENCODING_UCS4LE)); /* UCS-4 little endian */
|
98
|
+
rb_define_const(mXMLEncoding, "UCS_4BE", INT2NUM(XML_CHAR_ENCODING_UCS4BE)); /* UCS-4 big endian */
|
99
|
+
rb_define_const(mXMLEncoding, "EBCDIC", INT2NUM(XML_CHAR_ENCODING_EBCDIC)); /* EBCDIC uh! */
|
100
|
+
rb_define_const(mXMLEncoding, "UCS_4_2143", INT2NUM(XML_CHAR_ENCODING_UCS4_2143)); /* UCS-4 unusual ordering */
|
101
|
+
rb_define_const(mXMLEncoding, "UCS_4_3412", INT2NUM(XML_CHAR_ENCODING_UCS4_3412)); /* UCS-4 unusual ordering */
|
102
|
+
rb_define_const(mXMLEncoding, "UCS_2", INT2NUM(XML_CHAR_ENCODING_UCS2)); /* UCS-2 */
|
103
|
+
rb_define_const(mXMLEncoding, "ISO_8859_1", INT2NUM(XML_CHAR_ENCODING_8859_1)); /* ISO-8859-1 ISO Latin 1 */
|
104
|
+
rb_define_const(mXMLEncoding, "ISO_8859_2", INT2NUM(XML_CHAR_ENCODING_8859_2)); /* ISO-8859-2 ISO Latin 2 */
|
105
|
+
rb_define_const(mXMLEncoding, "ISO_8859_3", INT2NUM(XML_CHAR_ENCODING_8859_3)); /* ISO-8859-3 */
|
106
|
+
rb_define_const(mXMLEncoding, "ISO_8859_4", INT2NUM(XML_CHAR_ENCODING_8859_4)); /* ISO-8859-4 */
|
107
|
+
rb_define_const(mXMLEncoding, "ISO_8859_5", INT2NUM(XML_CHAR_ENCODING_8859_5)); /* ISO-8859-5 */
|
108
|
+
rb_define_const(mXMLEncoding, "ISO_8859_6", INT2NUM(XML_CHAR_ENCODING_8859_6)); /* ISO-8859-6 */
|
109
|
+
rb_define_const(mXMLEncoding, "ISO_8859_7", INT2NUM(XML_CHAR_ENCODING_8859_7)); /* ISO-8859-7 */
|
110
|
+
rb_define_const(mXMLEncoding, "ISO_8859_8", INT2NUM(XML_CHAR_ENCODING_8859_8)); /* ISO-8859-8 */
|
111
|
+
rb_define_const(mXMLEncoding, "ISO_8859_9", INT2NUM(XML_CHAR_ENCODING_8859_9)); /* ISO-8859-9 */
|
112
|
+
rb_define_const(mXMLEncoding, "ISO_2022_JP", INT2NUM(XML_CHAR_ENCODING_2022_JP)); /* ISO-2022-JP */
|
113
|
+
rb_define_const(mXMLEncoding, "SHIFT_JIS", INT2NUM(XML_CHAR_ENCODING_SHIFT_JIS)); /* Shift_JIS */
|
114
|
+
rb_define_const(mXMLEncoding, "EUC_JP", INT2NUM(XML_CHAR_ENCODING_EUC_JP)); /* EUC-JP */
|
115
|
+
rb_define_const(mXMLEncoding, "ASCII", INT2NUM(XML_CHAR_ENCODING_ASCII)); /* pure ASCII */
|
116
|
+
}
|
@@ -0,0 +1,12 @@
|
|
1
|
+
/* $Id: rxml_parser.h 39 2006-02-21 20:40:16Z roscopeco $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#ifndef __RXML_ENCODING__
|
6
|
+
#define __RXML_ENCODING__
|
7
|
+
|
8
|
+
extern VALUE mXMLEncoding;
|
9
|
+
|
10
|
+
void ruby_init_xml_encoding();
|
11
|
+
|
12
|
+
#endif
|
data/ext/libxml/ruby_xml_error.c
CHANGED
@@ -108,10 +108,16 @@ VALUE rxml_error_wrap(xmlErrorPtr xerror)
|
|
108
108
|
rb_iv_set(result, "@int1", INT2NUM(xerror->int1));
|
109
109
|
rb_iv_set(result, "@int2", INT2NUM(xerror->int2));
|
110
110
|
|
111
|
-
//rb_define_attr(eXMLError, "ctxt", 1, 0);
|
112
111
|
if (xerror->node)
|
113
112
|
{
|
114
|
-
|
113
|
+
/* Returning the original node is too dangerous because its
|
114
|
+
parent document is never returned to Ruby. So return a
|
115
|
+
copy of the node, which does not belong to any document,
|
116
|
+
and can free itself when Ruby calls its free method. Note
|
117
|
+
we just copy the node, and don't bother with the overhead
|
118
|
+
of a recursive query. */
|
119
|
+
xmlNodePtr xNode = xmlCopyNode((const xmlNodePtr)xerror->node, 2);
|
120
|
+
VALUE node = rxml_node_wrap(xNode);
|
115
121
|
rb_iv_set(result, "@node", node);
|
116
122
|
}
|
117
123
|
return result;
|
@@ -1,78 +1,60 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c
|
1
|
+
/* $Id: ruby_xml_html_parser.c 737 2009-01-23 01:23:08Z walltndr $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
5
5
|
#include "ruby_libxml.h"
|
6
6
|
|
7
|
-
|
8
|
-
static ID INPUT_ATTR;
|
9
|
-
|
10
|
-
/*
|
11
|
-
* Document-class: LibXML::XML::HTMLParser
|
7
|
+
/* Document-class: LibXML::XML::HTMLParser
|
12
8
|
*
|
13
9
|
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
14
10
|
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
15
11
|
* it can parse "real world" HTML, even if it severely broken from a
|
16
|
-
* specification point of view.
|
12
|
+
* specification point of view.
|
13
|
+
*
|
14
|
+
* The HTML parser creates an in-memory document object
|
15
|
+
* that consist of any number of XML::Node instances. This is simple
|
16
|
+
* and powerful model, but has the major limitation that the size of
|
17
|
+
* the document that can be processed is limited by the amount of
|
18
|
+
* memory available.
|
19
|
+
*
|
20
|
+
* Using the html parser is simple:
|
21
|
+
*
|
22
|
+
* parser = XML::HTMLParser.file('my_file')
|
23
|
+
* doc = parser.parse
|
24
|
+
*
|
25
|
+
* You can also parse documents (see XML::HTMLParser.document),
|
26
|
+
* strings (see XML::HTMLParser.string) and io objects (see
|
27
|
+
* XML::HTMLParser.io).
|
28
|
+
*/
|
17
29
|
|
18
|
-
|
19
|
-
|
30
|
+
VALUE cXMLHtmlParser;
|
31
|
+
static ID CONTEXT_ATTR;
|
32
|
+
|
33
|
+
|
34
|
+
/* call-seq:
|
20
35
|
* XML::HTMLParser.initialize -> parser
|
21
36
|
*
|
22
37
|
* Initializes a new parser instance with no pre-determined source.
|
23
38
|
*/
|
24
|
-
static VALUE rxml_html_parser_initialize(VALUE self)
|
39
|
+
static VALUE rxml_html_parser_initialize(int argc, VALUE *argv, VALUE self)
|
25
40
|
{
|
26
|
-
VALUE
|
27
|
-
rb_iv_set(self, "@input", input);
|
28
|
-
return self;
|
29
|
-
}
|
41
|
+
VALUE context = Qnil;
|
30
42
|
|
31
|
-
|
32
|
-
{
|
33
|
-
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
34
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
35
|
-
VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
|
36
|
-
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
37
|
-
encoding_str));
|
38
|
-
int options = 0;
|
39
|
-
|
40
|
-
return htmlReadFile(StringValuePtr(file), xencoding_str, options);
|
41
|
-
}
|
43
|
+
rb_scan_args(argc, argv, "01", &context);
|
42
44
|
|
43
|
-
|
44
|
-
{
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
49
|
-
VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
|
50
|
-
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
51
|
-
encoding_str));
|
52
|
-
int options = 0;
|
53
|
-
|
54
|
-
return htmlReadMemory(StringValuePtr(string), RSTRING_LEN(string),
|
55
|
-
xbase_url, xencoding_str, options);
|
56
|
-
}
|
45
|
+
if (context == Qnil)
|
46
|
+
{
|
47
|
+
rb_warn("Passing no parameters to XML::HTMLParser.new is deprecated. Pass an instance of XML::Parser::Context instead.");
|
48
|
+
context = rb_class_new_instance(0, NULL, cXMLParserContext);
|
49
|
+
}
|
57
50
|
|
58
|
-
|
59
|
-
|
60
|
-
VALUE io = rb_ivar_get(input, IO_ATTR);
|
61
|
-
VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
|
62
|
-
char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
|
63
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
64
|
-
VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
|
65
|
-
char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
|
66
|
-
encoding_str));
|
67
|
-
int options = 0;
|
68
|
-
|
69
|
-
return htmlReadIO((xmlInputReadCallback) rxml_read_callback, NULL,
|
70
|
-
(void *) io, xbase_url, xencoding_str, options);
|
51
|
+
rb_ivar_set(self, CONTEXT_ATTR, context);
|
52
|
+
return self;
|
71
53
|
}
|
72
54
|
|
73
55
|
/*
|
74
56
|
* call-seq:
|
75
|
-
* parser.parse ->
|
57
|
+
* parser.parse -> XML::Document
|
76
58
|
*
|
77
59
|
* Parse the input XML and create an XML::Document with
|
78
60
|
* it's content. If an error occurs, XML::Parser::ParseError
|
@@ -80,22 +62,19 @@ static htmlDocPtr rxml_html_parser_read_io(VALUE input)
|
|
80
62
|
*/
|
81
63
|
static VALUE rxml_html_parser_parse(VALUE self)
|
82
64
|
{
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
rxml_raise(&xmlLastError);
|
97
|
-
|
98
|
-
return rxml_document_wrap(xdoc);
|
65
|
+
xmlParserCtxtPtr ctxt;
|
66
|
+
VALUE context = rb_ivar_get(self, CONTEXT_ATTR);
|
67
|
+
|
68
|
+
Data_Get_Struct(context, xmlParserCtxt, ctxt);
|
69
|
+
|
70
|
+
if (htmlParseDocument(ctxt) == -1 && ! ctxt->recovery)
|
71
|
+
{
|
72
|
+
if (ctxt->myDoc)
|
73
|
+
xmlFreeDoc(ctxt->myDoc);
|
74
|
+
rxml_raise(&ctxt->lastError);
|
75
|
+
}
|
76
|
+
|
77
|
+
return rxml_document_wrap(ctxt->myDoc);
|
99
78
|
}
|
100
79
|
|
101
80
|
// Rdoc needs to know
|
@@ -106,14 +85,14 @@ mXML = rb_define_module_under(mLibXML, "XML");
|
|
106
85
|
|
107
86
|
void ruby_init_html_parser(void)
|
108
87
|
{
|
109
|
-
|
88
|
+
CONTEXT_ATTR = rb_intern("@context");
|
110
89
|
|
111
|
-
|
90
|
+
cXMLHtmlParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
112
91
|
|
113
92
|
/* Atributes */
|
114
|
-
rb_define_attr(
|
93
|
+
rb_define_attr(cXMLHtmlParser, "input", 1, 0);
|
115
94
|
|
116
95
|
/* Instance methods */
|
117
|
-
rb_define_method(
|
118
|
-
rb_define_method(
|
96
|
+
rb_define_method(cXMLHtmlParser, "initialize", rxml_html_parser_initialize, -1);
|
97
|
+
rb_define_method(cXMLHtmlParser, "parse", rxml_html_parser_parse, 0);
|
119
98
|
}
|
@@ -1,12 +1,11 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.h
|
1
|
+
/* $Id: ruby_xml_html_parser.h 711 2009-01-20 07:17:15Z cfis $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
5
5
|
#ifndef __RXML_HTML_PARSER__
|
6
6
|
#define __RXML_HTML_PARSER__
|
7
7
|
|
8
|
-
extern
|
9
|
-
extern VALUE cXMLHTMLParser;
|
8
|
+
extern VALUE cXMLHtmlParser;
|
10
9
|
|
11
10
|
void ruby_init_html_parser(void);
|
12
11
|
|
@@ -0,0 +1,145 @@
|
|
1
|
+
/* $Id: ruby_xml_parser_context.c 710 2009-01-20 05:30:51Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include "ruby_libxml.h"
|
6
|
+
#include "ruby_xml_html_parser_context.h"
|
7
|
+
|
8
|
+
/*
|
9
|
+
* Document-class: LibXML::XML::HTMLParser::Context
|
10
|
+
*
|
11
|
+
* The XML::HTMLParser::Context class provides in-depth control over how
|
12
|
+
* a document is parsed.
|
13
|
+
*/
|
14
|
+
|
15
|
+
VALUE cXMLHtmlParserContext;
|
16
|
+
|
17
|
+
static void rxml_html_parser_context_free(xmlParserCtxtPtr ctxt)
|
18
|
+
{
|
19
|
+
xmlFreeParserCtxt(ctxt);
|
20
|
+
}
|
21
|
+
|
22
|
+
static VALUE rxml_html_parser_context_wrap(xmlParserCtxtPtr ctxt)
|
23
|
+
{
|
24
|
+
return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
|
25
|
+
}
|
26
|
+
|
27
|
+
/* call-seq:
|
28
|
+
* XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
|
29
|
+
*
|
30
|
+
* Creates a new parser context based on the specified file or uri.
|
31
|
+
*
|
32
|
+
* Parameters:
|
33
|
+
*
|
34
|
+
* file - A filename or uri.
|
35
|
+
*/
|
36
|
+
static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
|
37
|
+
{
|
38
|
+
xmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
|
39
|
+
return rxml_html_parser_context_wrap(ctxt);
|
40
|
+
}
|
41
|
+
|
42
|
+
/* call-seq:
|
43
|
+
* XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
|
44
|
+
*
|
45
|
+
* Creates a new parser context based on the specified io object.
|
46
|
+
*
|
47
|
+
* Parameters:
|
48
|
+
*
|
49
|
+
* io - A ruby IO object.
|
50
|
+
*/
|
51
|
+
static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
|
52
|
+
{
|
53
|
+
htmlParserCtxtPtr ctxt;
|
54
|
+
xmlParserInputBufferPtr input;
|
55
|
+
xmlParserInputPtr stream;
|
56
|
+
|
57
|
+
input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
|
58
|
+
(void*)io, XML_CHAR_ENCODING_NONE);
|
59
|
+
|
60
|
+
ctxt = htmlNewParserCtxt();
|
61
|
+
if (!ctxt)
|
62
|
+
{
|
63
|
+
xmlFreeParserInputBuffer(input);
|
64
|
+
rxml_raise(&xmlLastError);
|
65
|
+
}
|
66
|
+
|
67
|
+
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
68
|
+
|
69
|
+
if (!stream)
|
70
|
+
{
|
71
|
+
xmlFreeParserInputBuffer(input);
|
72
|
+
xmlFreeParserCtxt(ctxt);
|
73
|
+
rxml_raise(&xmlLastError);
|
74
|
+
}
|
75
|
+
inputPush(ctxt, stream);
|
76
|
+
|
77
|
+
return rxml_html_parser_context_wrap(ctxt);
|
78
|
+
}
|
79
|
+
|
80
|
+
/* call-seq:
|
81
|
+
* XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
|
82
|
+
*
|
83
|
+
* Creates a new parser context based on the specified string.
|
84
|
+
*
|
85
|
+
* Parameters:
|
86
|
+
*
|
87
|
+
* string - A string that contains the data to parse.
|
88
|
+
*/
|
89
|
+
static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
|
90
|
+
{
|
91
|
+
xmlParserCtxtPtr ctxt;
|
92
|
+
Check_Type(string, T_STRING);
|
93
|
+
|
94
|
+
if (RSTRING_LEN(string) == 0)
|
95
|
+
rb_raise(rb_eArgError, "Must specify a string with one or more characters");
|
96
|
+
|
97
|
+
ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
|
98
|
+
RSTRING_LEN(string));
|
99
|
+
if (!ctxt)
|
100
|
+
rxml_raise(&xmlLastError);
|
101
|
+
|
102
|
+
htmlDefaultSAXHandlerInit();
|
103
|
+
if (ctxt->sax != NULL)
|
104
|
+
memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
|
105
|
+
|
106
|
+
return rxml_html_parser_context_wrap(ctxt);
|
107
|
+
}
|
108
|
+
|
109
|
+
/*
|
110
|
+
* call-seq:
|
111
|
+
* context.options = XML::Parser::Options::NOENT |
|
112
|
+
XML::Parser::Options::NOCDATA
|
113
|
+
*
|
114
|
+
* Provides control over the execution of a parser. Valid values
|
115
|
+
* are the constants defined on XML::Parser::Options. Multiple
|
116
|
+
* options can be combined by using Bitwise OR (|).
|
117
|
+
*/
|
118
|
+
static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
|
119
|
+
{
|
120
|
+
int result;
|
121
|
+
xmlParserCtxtPtr ctxt;
|
122
|
+
Check_Type(options, T_FIXNUM);
|
123
|
+
|
124
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
125
|
+
result = htmlCtxtUseOptions(ctxt, NUM2INT(options));
|
126
|
+
|
127
|
+
return self;
|
128
|
+
}
|
129
|
+
|
130
|
+
// Rdoc needs to know
|
131
|
+
#ifdef RDOC_NEVER_DEFINED
|
132
|
+
mLibXML = rb_define_module("LibXML");
|
133
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
134
|
+
cXMLHtmlParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
135
|
+
#endif
|
136
|
+
|
137
|
+
void ruby_init_html_parser_context(void)
|
138
|
+
{
|
139
|
+
cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
|
140
|
+
|
141
|
+
rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
|
142
|
+
rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
|
143
|
+
rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
|
144
|
+
rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
|
145
|
+
}
|