libxml-ruby 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +6 -0
- data/ext/libxml/libxml.c +2 -1
- data/ext/libxml/libxml.c.rej +16 -0
- data/ext/libxml/ruby_libxml.h +72 -71
- data/ext/libxml/ruby_xml_error.c +11 -0
- data/ext/libxml/ruby_xml_html_parser.c +133 -133
- data/ext/libxml/ruby_xml_parser.c +3 -3
- data/ext/libxml/ruby_xml_sax_parser.c +228 -228
- data/ext/libxml/ruby_xml_xpath_context.c +294 -281
- data/ext/libxml/ruby_xml_xpath_expression.c +69 -0
- data/ext/libxml/ruby_xml_xpath_expression.h +12 -0
- data/ext/libxml/version.h +2 -2
- data/ext/vc/libxml_ruby.vcproj +9 -1
- data/test/tc_document_write.rb +14 -0
- data/test/tc_xpath_expression.rb +35 -0
- data/test/test_suite.rb +1 -0
- metadata +7 -2
data/CHANGES
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
(See log/ChangeLog for more detailed changes derived directly from source control.)
|
2
2
|
|
3
|
+
== 0.9.2 / 2008-11-19 Charlie Savage
|
4
|
+
|
5
|
+
* Add support for compiled XPath expressions (donated by Pavel Valodzka)
|
6
|
+
|
7
|
+
* Fixes for compiling on OS X 10.5.4 and 10.5.5
|
8
|
+
|
3
9
|
== 0.9.1 / 2008-11-18 Charlie Savage
|
4
10
|
|
5
11
|
* Expose LibXML's encoding support via a new Encoding object.
|
data/ext/libxml/libxml.c
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
/* $Id: libxml.c
|
1
|
+
/* $Id: libxml.c 600 2008-11-19 07:39:29Z cfis $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
@@ -872,6 +872,7 @@ Init_libxml_ruby(void) {
|
|
872
872
|
ruby_init_xml_xinclude();
|
873
873
|
ruby_init_xml_xpath();
|
874
874
|
ruby_init_xml_xpath_context();
|
875
|
+
ruby_init_xml_xpath_expression();
|
875
876
|
ruby_init_xml_xpointer();
|
876
877
|
ruby_init_html_parser();
|
877
878
|
ruby_init_input_callbacks();
|
@@ -0,0 +1,16 @@
|
|
1
|
+
***************
|
2
|
+
*** 46,51 ****
|
3
|
+
ruby_init_xml_xinclude();
|
4
|
+
ruby_init_xml_xpath();
|
5
|
+
ruby_init_xml_xpath_context();
|
6
|
+
ruby_init_xml_xpointer();
|
7
|
+
ruby_init_xml_xpointer_context();
|
8
|
+
ruby_init_html_parser();
|
9
|
+
--- 46,52 ----
|
10
|
+
ruby_init_xml_xinclude();
|
11
|
+
ruby_init_xml_xpath();
|
12
|
+
ruby_init_xml_xpath_context();
|
13
|
+
+ ruby_init_xml_xpath_expression();
|
14
|
+
ruby_init_xml_xpointer();
|
15
|
+
ruby_init_xml_xpointer_context();
|
16
|
+
ruby_init_html_parser();
|
data/ext/libxml/ruby_libxml.h
CHANGED
@@ -1,71 +1,72 @@
|
|
1
|
-
/* Please see the LICENSE file for copyright and distribution information */
|
2
|
-
|
3
|
-
#ifndef __RUBY_LIBXML_H__
|
4
|
-
#define __RUBY_LIBXML_H__
|
5
|
-
|
6
|
-
#include "version.h"
|
7
|
-
|
8
|
-
#include <ruby.h>
|
9
|
-
#include <rubyio.h>
|
10
|
-
#include <util.h>
|
11
|
-
#include <libxml/parser.h>
|
12
|
-
#include <libxml/parserInternals.h>
|
13
|
-
#include <libxml/debugXML.h>
|
14
|
-
#include <libxml/xmlversion.h>
|
15
|
-
#include <libxml/xmlmemory.h>
|
16
|
-
#include <libxml/xpath.h>
|
17
|
-
#include <libxml/valid.h>
|
18
|
-
#include <libxml/catalog.h>
|
19
|
-
#include <libxml/HTMLparser.h>
|
20
|
-
#include <libxml/xmlreader.h>
|
21
|
-
|
22
|
-
// Maybe not yet defined in ruby
|
23
|
-
#ifndef RSTRING_LEN
|
24
|
-
#define RSTRING_LEN(x) RSTRING(x)->len
|
25
|
-
#endif
|
26
|
-
|
27
|
-
// not in Ruby 1.9
|
28
|
-
#ifndef GetWriteFile
|
29
|
-
#define GetWriteFile(fp) rb_io_stdio_file(fp)
|
30
|
-
#define OpenFile rb_io_t
|
31
|
-
#endif
|
32
|
-
|
33
|
-
#ifdef LIBXML_DEBUG_ENABLED
|
34
|
-
#include <libxml/xpathInternals.h>
|
35
|
-
#endif
|
36
|
-
#ifdef LIBXML_XINCLUDE_ENABLED
|
37
|
-
#include <libxml/xinclude.h>
|
38
|
-
#endif
|
39
|
-
#ifdef LIBXML_XPTR_ENABLED
|
40
|
-
#include <libxml/xpointer.h>
|
41
|
-
#endif
|
42
|
-
|
43
|
-
|
44
|
-
#include "ruby_xml_encoding.h"
|
45
|
-
#include "ruby_xml_error.h"
|
46
|
-
#include "ruby_xml_input.h"
|
47
|
-
#include "ruby_xml_state.h"
|
48
|
-
#include "ruby_xml_attributes.h"
|
49
|
-
#include "ruby_xml_attr.h"
|
50
|
-
#include "ruby_xml_document.h"
|
51
|
-
#include "ruby_xml_node.h"
|
52
|
-
#include "ruby_xml_ns.h"
|
53
|
-
#include "ruby_xml_parser.h"
|
54
|
-
#include "ruby_xml_parser_context.h"
|
55
|
-
#include "ruby_xml_sax_parser.h"
|
56
|
-
#include "ruby_xml_xinclude.h"
|
57
|
-
#include "ruby_xml_xpath.h"
|
58
|
-
#include "
|
59
|
-
#include "
|
60
|
-
#include "
|
61
|
-
#include "
|
62
|
-
#include "
|
63
|
-
#include "
|
64
|
-
#include "
|
65
|
-
#include "
|
66
|
-
#include "
|
67
|
-
|
68
|
-
|
69
|
-
extern VALUE
|
70
|
-
|
71
|
-
|
1
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
2
|
+
|
3
|
+
#ifndef __RUBY_LIBXML_H__
|
4
|
+
#define __RUBY_LIBXML_H__
|
5
|
+
|
6
|
+
#include "version.h"
|
7
|
+
|
8
|
+
#include <ruby.h>
|
9
|
+
#include <rubyio.h>
|
10
|
+
#include <util.h>
|
11
|
+
#include <libxml/parser.h>
|
12
|
+
#include <libxml/parserInternals.h>
|
13
|
+
#include <libxml/debugXML.h>
|
14
|
+
#include <libxml/xmlversion.h>
|
15
|
+
#include <libxml/xmlmemory.h>
|
16
|
+
#include <libxml/xpath.h>
|
17
|
+
#include <libxml/valid.h>
|
18
|
+
#include <libxml/catalog.h>
|
19
|
+
#include <libxml/HTMLparser.h>
|
20
|
+
#include <libxml/xmlreader.h>
|
21
|
+
|
22
|
+
// Maybe not yet defined in ruby
|
23
|
+
#ifndef RSTRING_LEN
|
24
|
+
#define RSTRING_LEN(x) RSTRING(x)->len
|
25
|
+
#endif
|
26
|
+
|
27
|
+
// not in Ruby 1.9
|
28
|
+
#ifndef GetWriteFile
|
29
|
+
#define GetWriteFile(fp) rb_io_stdio_file(fp)
|
30
|
+
#define OpenFile rb_io_t
|
31
|
+
#endif
|
32
|
+
|
33
|
+
#ifdef LIBXML_DEBUG_ENABLED
|
34
|
+
#include <libxml/xpathInternals.h>
|
35
|
+
#endif
|
36
|
+
#ifdef LIBXML_XINCLUDE_ENABLED
|
37
|
+
#include <libxml/xinclude.h>
|
38
|
+
#endif
|
39
|
+
#ifdef LIBXML_XPTR_ENABLED
|
40
|
+
#include <libxml/xpointer.h>
|
41
|
+
#endif
|
42
|
+
|
43
|
+
|
44
|
+
#include "ruby_xml_encoding.h"
|
45
|
+
#include "ruby_xml_error.h"
|
46
|
+
#include "ruby_xml_input.h"
|
47
|
+
#include "ruby_xml_state.h"
|
48
|
+
#include "ruby_xml_attributes.h"
|
49
|
+
#include "ruby_xml_attr.h"
|
50
|
+
#include "ruby_xml_document.h"
|
51
|
+
#include "ruby_xml_node.h"
|
52
|
+
#include "ruby_xml_ns.h"
|
53
|
+
#include "ruby_xml_parser.h"
|
54
|
+
#include "ruby_xml_parser_context.h"
|
55
|
+
#include "ruby_xml_sax_parser.h"
|
56
|
+
#include "ruby_xml_xinclude.h"
|
57
|
+
#include "ruby_xml_xpath.h"
|
58
|
+
#include "ruby_xml_xpath_expression.h"
|
59
|
+
#include "ruby_xml_xpath_context.h"
|
60
|
+
#include "ruby_xml_xpath_object.h"
|
61
|
+
#include "ruby_xml_xpointer.h"
|
62
|
+
#include "ruby_xml_input_cbg.h"
|
63
|
+
#include "ruby_xml_dtd.h"
|
64
|
+
#include "ruby_xml_schema.h"
|
65
|
+
#include "ruby_xml_relaxng.h"
|
66
|
+
#include "ruby_xml_html_parser.h"
|
67
|
+
#include "ruby_xml_reader.h"
|
68
|
+
|
69
|
+
extern VALUE mLibXML;
|
70
|
+
extern VALUE mXML;
|
71
|
+
|
72
|
+
#endif
|
data/ext/libxml/ruby_xml_error.c
CHANGED
@@ -202,7 +202,9 @@ ruby_init_xml_error() {
|
|
202
202
|
rb_define_const(eXMLError, "VALID", INT2NUM(XML_FROM_VALID));
|
203
203
|
rb_define_const(eXMLError, "CHECK", INT2NUM(XML_FROM_CHECK));
|
204
204
|
rb_define_const(eXMLError, "WRITER", INT2NUM(XML_FROM_WRITER));
|
205
|
+
#if LIBXML_VERSION >= 20621
|
205
206
|
rb_define_const(eXMLError, "MODULE", INT2NUM(XML_FROM_MODULE));
|
207
|
+
#endif
|
206
208
|
#if LIBXML_VERSION >= 20632
|
207
209
|
rb_define_const(eXMLError, "I18N", INT2NUM(XML_FROM_I18N));
|
208
210
|
rb_define_const(eXMLError, "SCHEMATRONV", INT2NUM(XML_FROM_SCHEMATRONV));
|
@@ -323,7 +325,12 @@ ruby_init_xml_error() {
|
|
323
325
|
rb_define_const(eXMLError, "NS_ERR_UNDEFINED_NAMESPACE", INT2NUM(XML_NS_ERR_UNDEFINED_NAMESPACE));
|
324
326
|
rb_define_const(eXMLError, "NS_ERR_QNAME", INT2NUM(XML_NS_ERR_QNAME));
|
325
327
|
rb_define_const(eXMLError, "NS_ERR_ATTRIBUTE_REDEFINED", INT2NUM(XML_NS_ERR_ATTRIBUTE_REDEFINED));
|
328
|
+
#if LIBXML_VERSION >= 20620
|
326
329
|
rb_define_const(eXMLError, "NS_ERR_EMPTY", INT2NUM(XML_NS_ERR_EMPTY));
|
330
|
+
#endif
|
331
|
+
#if LIBXML_VERSION >= 20700
|
332
|
+
rb_define_const(eXMLError, "NS_ERR_COLON", INT2NUM(XML_NS_ERR_COLON));
|
333
|
+
#endif
|
327
334
|
rb_define_const(eXMLError, "DTD_ATTRIBUTE_DEFAULT", INT2NUM(XML_DTD_ATTRIBUTE_DEFAULT));
|
328
335
|
rb_define_const(eXMLError, "DTD_ATTRIBUTE_REDEFINED", INT2NUM(XML_DTD_ATTRIBUTE_REDEFINED));
|
329
336
|
rb_define_const(eXMLError, "DTD_ATTRIBUTE_VALUE", INT2NUM(XML_DTD_ATTRIBUTE_VALUE));
|
@@ -781,8 +788,10 @@ ruby_init_xml_error() {
|
|
781
788
|
rb_define_const(eXMLError, "SCHEMAV_CVC_AU", INT2NUM(XML_SCHEMAV_CVC_AU));
|
782
789
|
rb_define_const(eXMLError, "SCHEMAV_CVC_TYPE_1", INT2NUM(XML_SCHEMAV_CVC_TYPE_1));
|
783
790
|
rb_define_const(eXMLError, "SCHEMAV_CVC_TYPE_2", INT2NUM(XML_SCHEMAV_CVC_TYPE_2));
|
791
|
+
#if LIBXML_VERSION >= 20618
|
784
792
|
rb_define_const(eXMLError, "SCHEMAV_CVC_IDC", INT2NUM(XML_SCHEMAV_CVC_IDC));
|
785
793
|
rb_define_const(eXMLError, "SCHEMAV_CVC_WILDCARD", INT2NUM(XML_SCHEMAV_CVC_WILDCARD));
|
794
|
+
#endif
|
786
795
|
#if LIBXML_VERSION >= 20631
|
787
796
|
rb_define_const(eXMLError, "SCHEMAV_MISC", INT2NUM(XML_SCHEMAV_MISC));
|
788
797
|
#endif
|
@@ -909,8 +918,10 @@ ruby_init_xml_error() {
|
|
909
918
|
rb_define_const(eXMLError, "SCHEMATRONV_ASSERT", INT2NUM(XML_SCHEMATRONV_ASSERT));
|
910
919
|
rb_define_const(eXMLError, "SCHEMATRONV_REPORT", INT2NUM(XML_SCHEMATRONV_REPORT));
|
911
920
|
#endif
|
921
|
+
#if LIBXML_VERSION >= 20618
|
912
922
|
rb_define_const(eXMLError, "MODULE_OPEN", INT2NUM(XML_MODULE_OPEN));
|
913
923
|
rb_define_const(eXMLError, "MODULE_CLOSE", INT2NUM(XML_MODULE_CLOSE));
|
924
|
+
#endif
|
914
925
|
rb_define_const(eXMLError, "CHECK_FOUND_ELEMENT", INT2NUM(XML_CHECK_FOUND_ELEMENT));
|
915
926
|
rb_define_const(eXMLError, "CHECK_FOUND_ATTRIBUTE", INT2NUM(XML_CHECK_FOUND_ATTRIBUTE));
|
916
927
|
rb_define_const(eXMLError, "CHECK_FOUND_TEXT", INT2NUM(XML_CHECK_FOUND_TEXT));
|
@@ -1,133 +1,133 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c
|
2
|
-
|
3
|
-
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
-
|
5
|
-
#include "ruby_libxml.h"
|
6
|
-
|
7
|
-
VALUE cXMLHTMLParser;
|
8
|
-
ID INPUT_ATTR;
|
9
|
-
ID CONTEXT_ATTR;
|
10
|
-
|
11
|
-
|
12
|
-
/*
|
13
|
-
* Document-class: LibXML::XML::HTMLParser
|
14
|
-
*
|
15
|
-
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
16
|
-
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
17
|
-
* it can parse "real world" HTML, even if it severely broken from a
|
18
|
-
* specification point of view. */
|
19
|
-
|
20
|
-
|
21
|
-
/*
|
22
|
-
* call-seq:
|
23
|
-
* XML::HTMLParser.initialize -> parser
|
24
|
-
*
|
25
|
-
* Initializes a new parser instance with no pre-determined source.
|
26
|
-
*/
|
27
|
-
VALUE
|
28
|
-
ruby_xml_html_parser_initialize(VALUE self) {
|
29
|
-
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
30
|
-
rb_iv_set(self, "@input", input);
|
31
|
-
rb_iv_set(self, "@context", Qnil);
|
32
|
-
return self;
|
33
|
-
}
|
34
|
-
|
35
|
-
htmlParserCtxtPtr
|
36
|
-
ruby_xml_html_parser_file_ctxt(VALUE input) {
|
37
|
-
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
38
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
39
|
-
VALUE encodingStr = ruby_xml_encoding_to_s(Qnil, encoding);
|
40
|
-
|
41
|
-
return htmlCreateFileParserCtxt(StringValuePtr(file), StringValuePtr(encodingStr));
|
42
|
-
}
|
43
|
-
|
44
|
-
htmlParserCtxtPtr
|
45
|
-
ruby_xml_html_parser_str_ctxt(VALUE input) {
|
46
|
-
VALUE data = rb_ivar_get(input, STRING_ATTR);
|
47
|
-
return htmlCreateMemoryParserCtxt(StringValuePtr(data), RSTRING_LEN(data));
|
48
|
-
}
|
49
|
-
|
50
|
-
/*
|
51
|
-
htmlParserCtxtPtr
|
52
|
-
ruby_xml_html_parser_io_ctxt(VALUE input) {
|
53
|
-
VALUE io = rb_ivar_get(input, IO_ATTR);
|
54
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
55
|
-
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
56
|
-
|
57
|
-
OpenFile *fptr;
|
58
|
-
FILE *f;
|
59
|
-
|
60
|
-
GetOpenFile(io, fptr);
|
61
|
-
rb_io_check_readable(fptr);
|
62
|
-
f = GetWriteFile(fptr);
|
63
|
-
|
64
|
-
return htmlCreateIOParserCtxt(NULL, NULL,
|
65
|
-
(xmlInputReadCallback) ctxtRead,
|
66
|
-
NULL, f, xmlEncoding);
|
67
|
-
}
|
68
|
-
*/
|
69
|
-
|
70
|
-
/*
|
71
|
-
* call-seq:
|
72
|
-
* parser.parse -> document
|
73
|
-
*
|
74
|
-
* Parse the input XML and create an XML::Document with
|
75
|
-
* it's content. If an error occurs, XML::Parser::ParseError
|
76
|
-
* is thrown.
|
77
|
-
*/
|
78
|
-
VALUE
|
79
|
-
ruby_xml_html_parser_parse(VALUE self) {
|
80
|
-
xmlParserCtxtPtr ctxt;
|
81
|
-
VALUE context;
|
82
|
-
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
83
|
-
|
84
|
-
context = rb_ivar_get(self, CONTEXT_ATTR);
|
85
|
-
if (context != Qnil)
|
86
|
-
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
87
|
-
|
88
|
-
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
89
|
-
ctxt = ruby_xml_html_parser_file_ctxt(input);
|
90
|
-
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
91
|
-
ctxt = ruby_xml_html_parser_str_ctxt(input);
|
92
|
-
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
93
|
-
ctxt = ruby_xml_html_parser_parse_document(input);
|
94
|
-
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
95
|
-
ctxt = ruby_xml_html_parser_io_ctxt(input);*/
|
96
|
-
else
|
97
|
-
rb_raise(rb_eArgError, "You must specify a parser data source");
|
98
|
-
|
99
|
-
if (!ctxt)
|
100
|
-
ruby_xml_raise(&xmlLastError);
|
101
|
-
|
102
|
-
context = ruby_xml_parser_context_wrap(ctxt);
|
103
|
-
rb_ivar_set(self, CONTEXT_ATTR, context);
|
104
|
-
|
105
|
-
if (htmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
106
|
-
xmlFreeDoc(ctxt->myDoc);
|
107
|
-
ruby_xml_raise(&ctxt->lastError);
|
108
|
-
}
|
109
|
-
|
110
|
-
return ruby_xml_document_wrap(ctxt->myDoc);
|
111
|
-
}
|
112
|
-
|
113
|
-
// Rdoc needs to know
|
114
|
-
#ifdef RDOC_NEVER_DEFINED
|
115
|
-
mLibXML = rb_define_module("LibXML");
|
116
|
-
mXML = rb_define_module_under(mLibXML, "XML");
|
117
|
-
#endif
|
118
|
-
|
119
|
-
void
|
120
|
-
ruby_init_html_parser(void) {
|
121
|
-
INPUT_ATTR = rb_intern("@input");
|
122
|
-
CONTEXT_ATTR = rb_intern("@context");
|
123
|
-
|
124
|
-
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
125
|
-
|
126
|
-
/* Atributes */
|
127
|
-
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
128
|
-
rb_define_attr(cXMLHTMLParser, "context", 1, 0);
|
129
|
-
|
130
|
-
/* Instance methods */
|
131
|
-
rb_define_method(cXMLHTMLParser, "initialize", ruby_xml_html_parser_initialize, 0);
|
132
|
-
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
133
|
-
}
|
1
|
+
/* $Id: ruby_xml_html_parser.c 604 2008-11-19 18:01:55Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include "ruby_libxml.h"
|
6
|
+
|
7
|
+
VALUE cXMLHTMLParser;
|
8
|
+
static ID INPUT_ATTR;
|
9
|
+
static ID CONTEXT_ATTR;
|
10
|
+
|
11
|
+
|
12
|
+
/*
|
13
|
+
* Document-class: LibXML::XML::HTMLParser
|
14
|
+
*
|
15
|
+
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
16
|
+
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
17
|
+
* it can parse "real world" HTML, even if it severely broken from a
|
18
|
+
* specification point of view. */
|
19
|
+
|
20
|
+
|
21
|
+
/*
|
22
|
+
* call-seq:
|
23
|
+
* XML::HTMLParser.initialize -> parser
|
24
|
+
*
|
25
|
+
* Initializes a new parser instance with no pre-determined source.
|
26
|
+
*/
|
27
|
+
VALUE
|
28
|
+
ruby_xml_html_parser_initialize(VALUE self) {
|
29
|
+
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
30
|
+
rb_iv_set(self, "@input", input);
|
31
|
+
rb_iv_set(self, "@context", Qnil);
|
32
|
+
return self;
|
33
|
+
}
|
34
|
+
|
35
|
+
htmlParserCtxtPtr
|
36
|
+
ruby_xml_html_parser_file_ctxt(VALUE input) {
|
37
|
+
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
38
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
39
|
+
VALUE encodingStr = ruby_xml_encoding_to_s(Qnil, encoding);
|
40
|
+
|
41
|
+
return htmlCreateFileParserCtxt(StringValuePtr(file), StringValuePtr(encodingStr));
|
42
|
+
}
|
43
|
+
|
44
|
+
htmlParserCtxtPtr
|
45
|
+
ruby_xml_html_parser_str_ctxt(VALUE input) {
|
46
|
+
VALUE data = rb_ivar_get(input, STRING_ATTR);
|
47
|
+
return htmlCreateMemoryParserCtxt(StringValuePtr(data), RSTRING_LEN(data));
|
48
|
+
}
|
49
|
+
|
50
|
+
/*
|
51
|
+
htmlParserCtxtPtr
|
52
|
+
ruby_xml_html_parser_io_ctxt(VALUE input) {
|
53
|
+
VALUE io = rb_ivar_get(input, IO_ATTR);
|
54
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
55
|
+
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
56
|
+
|
57
|
+
OpenFile *fptr;
|
58
|
+
FILE *f;
|
59
|
+
|
60
|
+
GetOpenFile(io, fptr);
|
61
|
+
rb_io_check_readable(fptr);
|
62
|
+
f = GetWriteFile(fptr);
|
63
|
+
|
64
|
+
return htmlCreateIOParserCtxt(NULL, NULL,
|
65
|
+
(xmlInputReadCallback) ctxtRead,
|
66
|
+
NULL, f, xmlEncoding);
|
67
|
+
}
|
68
|
+
*/
|
69
|
+
|
70
|
+
/*
|
71
|
+
* call-seq:
|
72
|
+
* parser.parse -> document
|
73
|
+
*
|
74
|
+
* Parse the input XML and create an XML::Document with
|
75
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
76
|
+
* is thrown.
|
77
|
+
*/
|
78
|
+
VALUE
|
79
|
+
ruby_xml_html_parser_parse(VALUE self) {
|
80
|
+
xmlParserCtxtPtr ctxt;
|
81
|
+
VALUE context;
|
82
|
+
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
83
|
+
|
84
|
+
context = rb_ivar_get(self, CONTEXT_ATTR);
|
85
|
+
if (context != Qnil)
|
86
|
+
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
87
|
+
|
88
|
+
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
89
|
+
ctxt = ruby_xml_html_parser_file_ctxt(input);
|
90
|
+
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
91
|
+
ctxt = ruby_xml_html_parser_str_ctxt(input);
|
92
|
+
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
93
|
+
ctxt = ruby_xml_html_parser_parse_document(input);
|
94
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
95
|
+
ctxt = ruby_xml_html_parser_io_ctxt(input);*/
|
96
|
+
else
|
97
|
+
rb_raise(rb_eArgError, "You must specify a parser data source");
|
98
|
+
|
99
|
+
if (!ctxt)
|
100
|
+
ruby_xml_raise(&xmlLastError);
|
101
|
+
|
102
|
+
context = ruby_xml_parser_context_wrap(ctxt);
|
103
|
+
rb_ivar_set(self, CONTEXT_ATTR, context);
|
104
|
+
|
105
|
+
if (htmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
106
|
+
xmlFreeDoc(ctxt->myDoc);
|
107
|
+
ruby_xml_raise(&ctxt->lastError);
|
108
|
+
}
|
109
|
+
|
110
|
+
return ruby_xml_document_wrap(ctxt->myDoc);
|
111
|
+
}
|
112
|
+
|
113
|
+
// Rdoc needs to know
|
114
|
+
#ifdef RDOC_NEVER_DEFINED
|
115
|
+
mLibXML = rb_define_module("LibXML");
|
116
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
117
|
+
#endif
|
118
|
+
|
119
|
+
void
|
120
|
+
ruby_init_html_parser(void) {
|
121
|
+
INPUT_ATTR = rb_intern("@input");
|
122
|
+
CONTEXT_ATTR = rb_intern("@context");
|
123
|
+
|
124
|
+
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
125
|
+
|
126
|
+
/* Atributes */
|
127
|
+
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
128
|
+
rb_define_attr(cXMLHTMLParser, "context", 1, 0);
|
129
|
+
|
130
|
+
/* Instance methods */
|
131
|
+
rb_define_method(cXMLHTMLParser, "initialize", ruby_xml_html_parser_initialize, 0);
|
132
|
+
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
133
|
+
}
|