libxml-ruby 0.9.1-x86-mswin32-60 → 0.9.2-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +6 -0
- data/ext/libxml/libxml.c +2 -1
- data/ext/libxml/libxml.c.rej +16 -0
- data/ext/libxml/ruby_libxml.h +72 -71
- data/ext/libxml/ruby_xml_error.c +11 -0
- data/ext/libxml/ruby_xml_html_parser.c +133 -133
- data/ext/libxml/ruby_xml_parser.c +3 -3
- data/ext/libxml/ruby_xml_sax_parser.c +228 -228
- data/ext/libxml/ruby_xml_xpath_context.c +294 -281
- data/ext/libxml/ruby_xml_xpath_expression.c +69 -0
- data/ext/libxml/ruby_xml_xpath_expression.h +12 -0
- data/ext/libxml/version.h +2 -2
- data/ext/mingw/libxml_ruby.dll.a +0 -0
- data/ext/mingw/libxml_ruby.so +0 -0
- data/ext/vc/libxml_ruby.vcproj +9 -1
- data/test/tc_document_write.rb +14 -0
- data/test/tc_xpath_expression.rb +35 -0
- data/test/test_suite.rb +1 -0
- metadata +7 -2
data/CHANGES
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
(See log/ChangeLog for more detailed changes derived directly from source control.)
|
2
2
|
|
3
|
+
== 0.9.2 / 2008-11-19 Charlie Savage
|
4
|
+
|
5
|
+
* Add support for compiled XPath expressions (donated by Pavel Valodzka)
|
6
|
+
|
7
|
+
* Fixes for compiling on OS X 10.5.4 and 10.5.5
|
8
|
+
|
3
9
|
== 0.9.1 / 2008-11-18 Charlie Savage
|
4
10
|
|
5
11
|
* Expose LibXML's encoding support via a new Encoding object.
|
data/ext/libxml/libxml.c
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
/* $Id: libxml.c
|
1
|
+
/* $Id: libxml.c 600 2008-11-19 07:39:29Z cfis $ */
|
2
2
|
|
3
3
|
/* Please see the LICENSE file for copyright and distribution information */
|
4
4
|
|
@@ -872,6 +872,7 @@ Init_libxml_ruby(void) {
|
|
872
872
|
ruby_init_xml_xinclude();
|
873
873
|
ruby_init_xml_xpath();
|
874
874
|
ruby_init_xml_xpath_context();
|
875
|
+
ruby_init_xml_xpath_expression();
|
875
876
|
ruby_init_xml_xpointer();
|
876
877
|
ruby_init_html_parser();
|
877
878
|
ruby_init_input_callbacks();
|
@@ -0,0 +1,16 @@
|
|
1
|
+
***************
|
2
|
+
*** 46,51 ****
|
3
|
+
ruby_init_xml_xinclude();
|
4
|
+
ruby_init_xml_xpath();
|
5
|
+
ruby_init_xml_xpath_context();
|
6
|
+
ruby_init_xml_xpointer();
|
7
|
+
ruby_init_xml_xpointer_context();
|
8
|
+
ruby_init_html_parser();
|
9
|
+
--- 46,52 ----
|
10
|
+
ruby_init_xml_xinclude();
|
11
|
+
ruby_init_xml_xpath();
|
12
|
+
ruby_init_xml_xpath_context();
|
13
|
+
+ ruby_init_xml_xpath_expression();
|
14
|
+
ruby_init_xml_xpointer();
|
15
|
+
ruby_init_xml_xpointer_context();
|
16
|
+
ruby_init_html_parser();
|
data/ext/libxml/ruby_libxml.h
CHANGED
@@ -1,71 +1,72 @@
|
|
1
|
-
/* Please see the LICENSE file for copyright and distribution information */
|
2
|
-
|
3
|
-
#ifndef __RUBY_LIBXML_H__
|
4
|
-
#define __RUBY_LIBXML_H__
|
5
|
-
|
6
|
-
#include "version.h"
|
7
|
-
|
8
|
-
#include <ruby.h>
|
9
|
-
#include <rubyio.h>
|
10
|
-
#include <util.h>
|
11
|
-
#include <libxml/parser.h>
|
12
|
-
#include <libxml/parserInternals.h>
|
13
|
-
#include <libxml/debugXML.h>
|
14
|
-
#include <libxml/xmlversion.h>
|
15
|
-
#include <libxml/xmlmemory.h>
|
16
|
-
#include <libxml/xpath.h>
|
17
|
-
#include <libxml/valid.h>
|
18
|
-
#include <libxml/catalog.h>
|
19
|
-
#include <libxml/HTMLparser.h>
|
20
|
-
#include <libxml/xmlreader.h>
|
21
|
-
|
22
|
-
// Maybe not yet defined in ruby
|
23
|
-
#ifndef RSTRING_LEN
|
24
|
-
#define RSTRING_LEN(x) RSTRING(x)->len
|
25
|
-
#endif
|
26
|
-
|
27
|
-
// not in Ruby 1.9
|
28
|
-
#ifndef GetWriteFile
|
29
|
-
#define GetWriteFile(fp) rb_io_stdio_file(fp)
|
30
|
-
#define OpenFile rb_io_t
|
31
|
-
#endif
|
32
|
-
|
33
|
-
#ifdef LIBXML_DEBUG_ENABLED
|
34
|
-
#include <libxml/xpathInternals.h>
|
35
|
-
#endif
|
36
|
-
#ifdef LIBXML_XINCLUDE_ENABLED
|
37
|
-
#include <libxml/xinclude.h>
|
38
|
-
#endif
|
39
|
-
#ifdef LIBXML_XPTR_ENABLED
|
40
|
-
#include <libxml/xpointer.h>
|
41
|
-
#endif
|
42
|
-
|
43
|
-
|
44
|
-
#include "ruby_xml_encoding.h"
|
45
|
-
#include "ruby_xml_error.h"
|
46
|
-
#include "ruby_xml_input.h"
|
47
|
-
#include "ruby_xml_state.h"
|
48
|
-
#include "ruby_xml_attributes.h"
|
49
|
-
#include "ruby_xml_attr.h"
|
50
|
-
#include "ruby_xml_document.h"
|
51
|
-
#include "ruby_xml_node.h"
|
52
|
-
#include "ruby_xml_ns.h"
|
53
|
-
#include "ruby_xml_parser.h"
|
54
|
-
#include "ruby_xml_parser_context.h"
|
55
|
-
#include "ruby_xml_sax_parser.h"
|
56
|
-
#include "ruby_xml_xinclude.h"
|
57
|
-
#include "ruby_xml_xpath.h"
|
58
|
-
#include "
|
59
|
-
#include "
|
60
|
-
#include "
|
61
|
-
#include "
|
62
|
-
#include "
|
63
|
-
#include "
|
64
|
-
#include "
|
65
|
-
#include "
|
66
|
-
#include "
|
67
|
-
|
68
|
-
|
69
|
-
extern VALUE
|
70
|
-
|
71
|
-
|
1
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
2
|
+
|
3
|
+
#ifndef __RUBY_LIBXML_H__
|
4
|
+
#define __RUBY_LIBXML_H__
|
5
|
+
|
6
|
+
#include "version.h"
|
7
|
+
|
8
|
+
#include <ruby.h>
|
9
|
+
#include <rubyio.h>
|
10
|
+
#include <util.h>
|
11
|
+
#include <libxml/parser.h>
|
12
|
+
#include <libxml/parserInternals.h>
|
13
|
+
#include <libxml/debugXML.h>
|
14
|
+
#include <libxml/xmlversion.h>
|
15
|
+
#include <libxml/xmlmemory.h>
|
16
|
+
#include <libxml/xpath.h>
|
17
|
+
#include <libxml/valid.h>
|
18
|
+
#include <libxml/catalog.h>
|
19
|
+
#include <libxml/HTMLparser.h>
|
20
|
+
#include <libxml/xmlreader.h>
|
21
|
+
|
22
|
+
// Maybe not yet defined in ruby
|
23
|
+
#ifndef RSTRING_LEN
|
24
|
+
#define RSTRING_LEN(x) RSTRING(x)->len
|
25
|
+
#endif
|
26
|
+
|
27
|
+
// not in Ruby 1.9
|
28
|
+
#ifndef GetWriteFile
|
29
|
+
#define GetWriteFile(fp) rb_io_stdio_file(fp)
|
30
|
+
#define OpenFile rb_io_t
|
31
|
+
#endif
|
32
|
+
|
33
|
+
#ifdef LIBXML_DEBUG_ENABLED
|
34
|
+
#include <libxml/xpathInternals.h>
|
35
|
+
#endif
|
36
|
+
#ifdef LIBXML_XINCLUDE_ENABLED
|
37
|
+
#include <libxml/xinclude.h>
|
38
|
+
#endif
|
39
|
+
#ifdef LIBXML_XPTR_ENABLED
|
40
|
+
#include <libxml/xpointer.h>
|
41
|
+
#endif
|
42
|
+
|
43
|
+
|
44
|
+
#include "ruby_xml_encoding.h"
|
45
|
+
#include "ruby_xml_error.h"
|
46
|
+
#include "ruby_xml_input.h"
|
47
|
+
#include "ruby_xml_state.h"
|
48
|
+
#include "ruby_xml_attributes.h"
|
49
|
+
#include "ruby_xml_attr.h"
|
50
|
+
#include "ruby_xml_document.h"
|
51
|
+
#include "ruby_xml_node.h"
|
52
|
+
#include "ruby_xml_ns.h"
|
53
|
+
#include "ruby_xml_parser.h"
|
54
|
+
#include "ruby_xml_parser_context.h"
|
55
|
+
#include "ruby_xml_sax_parser.h"
|
56
|
+
#include "ruby_xml_xinclude.h"
|
57
|
+
#include "ruby_xml_xpath.h"
|
58
|
+
#include "ruby_xml_xpath_expression.h"
|
59
|
+
#include "ruby_xml_xpath_context.h"
|
60
|
+
#include "ruby_xml_xpath_object.h"
|
61
|
+
#include "ruby_xml_xpointer.h"
|
62
|
+
#include "ruby_xml_input_cbg.h"
|
63
|
+
#include "ruby_xml_dtd.h"
|
64
|
+
#include "ruby_xml_schema.h"
|
65
|
+
#include "ruby_xml_relaxng.h"
|
66
|
+
#include "ruby_xml_html_parser.h"
|
67
|
+
#include "ruby_xml_reader.h"
|
68
|
+
|
69
|
+
extern VALUE mLibXML;
|
70
|
+
extern VALUE mXML;
|
71
|
+
|
72
|
+
#endif
|
data/ext/libxml/ruby_xml_error.c
CHANGED
@@ -202,7 +202,9 @@ ruby_init_xml_error() {
|
|
202
202
|
rb_define_const(eXMLError, "VALID", INT2NUM(XML_FROM_VALID));
|
203
203
|
rb_define_const(eXMLError, "CHECK", INT2NUM(XML_FROM_CHECK));
|
204
204
|
rb_define_const(eXMLError, "WRITER", INT2NUM(XML_FROM_WRITER));
|
205
|
+
#if LIBXML_VERSION >= 20621
|
205
206
|
rb_define_const(eXMLError, "MODULE", INT2NUM(XML_FROM_MODULE));
|
207
|
+
#endif
|
206
208
|
#if LIBXML_VERSION >= 20632
|
207
209
|
rb_define_const(eXMLError, "I18N", INT2NUM(XML_FROM_I18N));
|
208
210
|
rb_define_const(eXMLError, "SCHEMATRONV", INT2NUM(XML_FROM_SCHEMATRONV));
|
@@ -323,7 +325,12 @@ ruby_init_xml_error() {
|
|
323
325
|
rb_define_const(eXMLError, "NS_ERR_UNDEFINED_NAMESPACE", INT2NUM(XML_NS_ERR_UNDEFINED_NAMESPACE));
|
324
326
|
rb_define_const(eXMLError, "NS_ERR_QNAME", INT2NUM(XML_NS_ERR_QNAME));
|
325
327
|
rb_define_const(eXMLError, "NS_ERR_ATTRIBUTE_REDEFINED", INT2NUM(XML_NS_ERR_ATTRIBUTE_REDEFINED));
|
328
|
+
#if LIBXML_VERSION >= 20620
|
326
329
|
rb_define_const(eXMLError, "NS_ERR_EMPTY", INT2NUM(XML_NS_ERR_EMPTY));
|
330
|
+
#endif
|
331
|
+
#if LIBXML_VERSION >= 20700
|
332
|
+
rb_define_const(eXMLError, "NS_ERR_COLON", INT2NUM(XML_NS_ERR_COLON));
|
333
|
+
#endif
|
327
334
|
rb_define_const(eXMLError, "DTD_ATTRIBUTE_DEFAULT", INT2NUM(XML_DTD_ATTRIBUTE_DEFAULT));
|
328
335
|
rb_define_const(eXMLError, "DTD_ATTRIBUTE_REDEFINED", INT2NUM(XML_DTD_ATTRIBUTE_REDEFINED));
|
329
336
|
rb_define_const(eXMLError, "DTD_ATTRIBUTE_VALUE", INT2NUM(XML_DTD_ATTRIBUTE_VALUE));
|
@@ -781,8 +788,10 @@ ruby_init_xml_error() {
|
|
781
788
|
rb_define_const(eXMLError, "SCHEMAV_CVC_AU", INT2NUM(XML_SCHEMAV_CVC_AU));
|
782
789
|
rb_define_const(eXMLError, "SCHEMAV_CVC_TYPE_1", INT2NUM(XML_SCHEMAV_CVC_TYPE_1));
|
783
790
|
rb_define_const(eXMLError, "SCHEMAV_CVC_TYPE_2", INT2NUM(XML_SCHEMAV_CVC_TYPE_2));
|
791
|
+
#if LIBXML_VERSION >= 20618
|
784
792
|
rb_define_const(eXMLError, "SCHEMAV_CVC_IDC", INT2NUM(XML_SCHEMAV_CVC_IDC));
|
785
793
|
rb_define_const(eXMLError, "SCHEMAV_CVC_WILDCARD", INT2NUM(XML_SCHEMAV_CVC_WILDCARD));
|
794
|
+
#endif
|
786
795
|
#if LIBXML_VERSION >= 20631
|
787
796
|
rb_define_const(eXMLError, "SCHEMAV_MISC", INT2NUM(XML_SCHEMAV_MISC));
|
788
797
|
#endif
|
@@ -909,8 +918,10 @@ ruby_init_xml_error() {
|
|
909
918
|
rb_define_const(eXMLError, "SCHEMATRONV_ASSERT", INT2NUM(XML_SCHEMATRONV_ASSERT));
|
910
919
|
rb_define_const(eXMLError, "SCHEMATRONV_REPORT", INT2NUM(XML_SCHEMATRONV_REPORT));
|
911
920
|
#endif
|
921
|
+
#if LIBXML_VERSION >= 20618
|
912
922
|
rb_define_const(eXMLError, "MODULE_OPEN", INT2NUM(XML_MODULE_OPEN));
|
913
923
|
rb_define_const(eXMLError, "MODULE_CLOSE", INT2NUM(XML_MODULE_CLOSE));
|
924
|
+
#endif
|
914
925
|
rb_define_const(eXMLError, "CHECK_FOUND_ELEMENT", INT2NUM(XML_CHECK_FOUND_ELEMENT));
|
915
926
|
rb_define_const(eXMLError, "CHECK_FOUND_ATTRIBUTE", INT2NUM(XML_CHECK_FOUND_ATTRIBUTE));
|
916
927
|
rb_define_const(eXMLError, "CHECK_FOUND_TEXT", INT2NUM(XML_CHECK_FOUND_TEXT));
|
@@ -1,133 +1,133 @@
|
|
1
|
-
/* $Id: ruby_xml_html_parser.c
|
2
|
-
|
3
|
-
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
-
|
5
|
-
#include "ruby_libxml.h"
|
6
|
-
|
7
|
-
VALUE cXMLHTMLParser;
|
8
|
-
ID INPUT_ATTR;
|
9
|
-
ID CONTEXT_ATTR;
|
10
|
-
|
11
|
-
|
12
|
-
/*
|
13
|
-
* Document-class: LibXML::XML::HTMLParser
|
14
|
-
*
|
15
|
-
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
16
|
-
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
17
|
-
* it can parse "real world" HTML, even if it severely broken from a
|
18
|
-
* specification point of view. */
|
19
|
-
|
20
|
-
|
21
|
-
/*
|
22
|
-
* call-seq:
|
23
|
-
* XML::HTMLParser.initialize -> parser
|
24
|
-
*
|
25
|
-
* Initializes a new parser instance with no pre-determined source.
|
26
|
-
*/
|
27
|
-
VALUE
|
28
|
-
ruby_xml_html_parser_initialize(VALUE self) {
|
29
|
-
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
30
|
-
rb_iv_set(self, "@input", input);
|
31
|
-
rb_iv_set(self, "@context", Qnil);
|
32
|
-
return self;
|
33
|
-
}
|
34
|
-
|
35
|
-
htmlParserCtxtPtr
|
36
|
-
ruby_xml_html_parser_file_ctxt(VALUE input) {
|
37
|
-
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
38
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
39
|
-
VALUE encodingStr = ruby_xml_encoding_to_s(Qnil, encoding);
|
40
|
-
|
41
|
-
return htmlCreateFileParserCtxt(StringValuePtr(file), StringValuePtr(encodingStr));
|
42
|
-
}
|
43
|
-
|
44
|
-
htmlParserCtxtPtr
|
45
|
-
ruby_xml_html_parser_str_ctxt(VALUE input) {
|
46
|
-
VALUE data = rb_ivar_get(input, STRING_ATTR);
|
47
|
-
return htmlCreateMemoryParserCtxt(StringValuePtr(data), RSTRING_LEN(data));
|
48
|
-
}
|
49
|
-
|
50
|
-
/*
|
51
|
-
htmlParserCtxtPtr
|
52
|
-
ruby_xml_html_parser_io_ctxt(VALUE input) {
|
53
|
-
VALUE io = rb_ivar_get(input, IO_ATTR);
|
54
|
-
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
55
|
-
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
56
|
-
|
57
|
-
OpenFile *fptr;
|
58
|
-
FILE *f;
|
59
|
-
|
60
|
-
GetOpenFile(io, fptr);
|
61
|
-
rb_io_check_readable(fptr);
|
62
|
-
f = GetWriteFile(fptr);
|
63
|
-
|
64
|
-
return htmlCreateIOParserCtxt(NULL, NULL,
|
65
|
-
(xmlInputReadCallback) ctxtRead,
|
66
|
-
NULL, f, xmlEncoding);
|
67
|
-
}
|
68
|
-
*/
|
69
|
-
|
70
|
-
/*
|
71
|
-
* call-seq:
|
72
|
-
* parser.parse -> document
|
73
|
-
*
|
74
|
-
* Parse the input XML and create an XML::Document with
|
75
|
-
* it's content. If an error occurs, XML::Parser::ParseError
|
76
|
-
* is thrown.
|
77
|
-
*/
|
78
|
-
VALUE
|
79
|
-
ruby_xml_html_parser_parse(VALUE self) {
|
80
|
-
xmlParserCtxtPtr ctxt;
|
81
|
-
VALUE context;
|
82
|
-
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
83
|
-
|
84
|
-
context = rb_ivar_get(self, CONTEXT_ATTR);
|
85
|
-
if (context != Qnil)
|
86
|
-
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
87
|
-
|
88
|
-
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
89
|
-
ctxt = ruby_xml_html_parser_file_ctxt(input);
|
90
|
-
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
91
|
-
ctxt = ruby_xml_html_parser_str_ctxt(input);
|
92
|
-
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
93
|
-
ctxt = ruby_xml_html_parser_parse_document(input);
|
94
|
-
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
95
|
-
ctxt = ruby_xml_html_parser_io_ctxt(input);*/
|
96
|
-
else
|
97
|
-
rb_raise(rb_eArgError, "You must specify a parser data source");
|
98
|
-
|
99
|
-
if (!ctxt)
|
100
|
-
ruby_xml_raise(&xmlLastError);
|
101
|
-
|
102
|
-
context = ruby_xml_parser_context_wrap(ctxt);
|
103
|
-
rb_ivar_set(self, CONTEXT_ATTR, context);
|
104
|
-
|
105
|
-
if (htmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
106
|
-
xmlFreeDoc(ctxt->myDoc);
|
107
|
-
ruby_xml_raise(&ctxt->lastError);
|
108
|
-
}
|
109
|
-
|
110
|
-
return ruby_xml_document_wrap(ctxt->myDoc);
|
111
|
-
}
|
112
|
-
|
113
|
-
// Rdoc needs to know
|
114
|
-
#ifdef RDOC_NEVER_DEFINED
|
115
|
-
mLibXML = rb_define_module("LibXML");
|
116
|
-
mXML = rb_define_module_under(mLibXML, "XML");
|
117
|
-
#endif
|
118
|
-
|
119
|
-
void
|
120
|
-
ruby_init_html_parser(void) {
|
121
|
-
INPUT_ATTR = rb_intern("@input");
|
122
|
-
CONTEXT_ATTR = rb_intern("@context");
|
123
|
-
|
124
|
-
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
125
|
-
|
126
|
-
/* Atributes */
|
127
|
-
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
128
|
-
rb_define_attr(cXMLHTMLParser, "context", 1, 0);
|
129
|
-
|
130
|
-
/* Instance methods */
|
131
|
-
rb_define_method(cXMLHTMLParser, "initialize", ruby_xml_html_parser_initialize, 0);
|
132
|
-
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
133
|
-
}
|
1
|
+
/* $Id: ruby_xml_html_parser.c 604 2008-11-19 18:01:55Z cfis $ */
|
2
|
+
|
3
|
+
/* Please see the LICENSE file for copyright and distribution information */
|
4
|
+
|
5
|
+
#include "ruby_libxml.h"
|
6
|
+
|
7
|
+
VALUE cXMLHTMLParser;
|
8
|
+
static ID INPUT_ATTR;
|
9
|
+
static ID CONTEXT_ATTR;
|
10
|
+
|
11
|
+
|
12
|
+
/*
|
13
|
+
* Document-class: LibXML::XML::HTMLParser
|
14
|
+
*
|
15
|
+
* The HTML parser implements an HTML 4.0 non-verifying parser with an API
|
16
|
+
* compatible with the XML::Parser. In contrast with the XML::Parser,
|
17
|
+
* it can parse "real world" HTML, even if it severely broken from a
|
18
|
+
* specification point of view. */
|
19
|
+
|
20
|
+
|
21
|
+
/*
|
22
|
+
* call-seq:
|
23
|
+
* XML::HTMLParser.initialize -> parser
|
24
|
+
*
|
25
|
+
* Initializes a new parser instance with no pre-determined source.
|
26
|
+
*/
|
27
|
+
VALUE
|
28
|
+
ruby_xml_html_parser_initialize(VALUE self) {
|
29
|
+
VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
|
30
|
+
rb_iv_set(self, "@input", input);
|
31
|
+
rb_iv_set(self, "@context", Qnil);
|
32
|
+
return self;
|
33
|
+
}
|
34
|
+
|
35
|
+
htmlParserCtxtPtr
|
36
|
+
ruby_xml_html_parser_file_ctxt(VALUE input) {
|
37
|
+
VALUE file = rb_ivar_get(input, FILE_ATTR);
|
38
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
39
|
+
VALUE encodingStr = ruby_xml_encoding_to_s(Qnil, encoding);
|
40
|
+
|
41
|
+
return htmlCreateFileParserCtxt(StringValuePtr(file), StringValuePtr(encodingStr));
|
42
|
+
}
|
43
|
+
|
44
|
+
htmlParserCtxtPtr
|
45
|
+
ruby_xml_html_parser_str_ctxt(VALUE input) {
|
46
|
+
VALUE data = rb_ivar_get(input, STRING_ATTR);
|
47
|
+
return htmlCreateMemoryParserCtxt(StringValuePtr(data), RSTRING_LEN(data));
|
48
|
+
}
|
49
|
+
|
50
|
+
/*
|
51
|
+
htmlParserCtxtPtr
|
52
|
+
ruby_xml_html_parser_io_ctxt(VALUE input) {
|
53
|
+
VALUE io = rb_ivar_get(input, IO_ATTR);
|
54
|
+
VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
|
55
|
+
xmlCharEncoding xmlEncoding = NUM2INT(encoding);
|
56
|
+
|
57
|
+
OpenFile *fptr;
|
58
|
+
FILE *f;
|
59
|
+
|
60
|
+
GetOpenFile(io, fptr);
|
61
|
+
rb_io_check_readable(fptr);
|
62
|
+
f = GetWriteFile(fptr);
|
63
|
+
|
64
|
+
return htmlCreateIOParserCtxt(NULL, NULL,
|
65
|
+
(xmlInputReadCallback) ctxtRead,
|
66
|
+
NULL, f, xmlEncoding);
|
67
|
+
}
|
68
|
+
*/
|
69
|
+
|
70
|
+
/*
|
71
|
+
* call-seq:
|
72
|
+
* parser.parse -> document
|
73
|
+
*
|
74
|
+
* Parse the input XML and create an XML::Document with
|
75
|
+
* it's content. If an error occurs, XML::Parser::ParseError
|
76
|
+
* is thrown.
|
77
|
+
*/
|
78
|
+
VALUE
|
79
|
+
ruby_xml_html_parser_parse(VALUE self) {
|
80
|
+
xmlParserCtxtPtr ctxt;
|
81
|
+
VALUE context;
|
82
|
+
VALUE input = rb_ivar_get(self, INPUT_ATTR);
|
83
|
+
|
84
|
+
context = rb_ivar_get(self, CONTEXT_ATTR);
|
85
|
+
if (context != Qnil)
|
86
|
+
rb_raise(rb_eRuntimeError, "You cannot parse a data source twice");
|
87
|
+
|
88
|
+
if (rb_ivar_get(input, FILE_ATTR) != Qnil)
|
89
|
+
ctxt = ruby_xml_html_parser_file_ctxt(input);
|
90
|
+
else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
|
91
|
+
ctxt = ruby_xml_html_parser_str_ctxt(input);
|
92
|
+
/*else if (rb_ivar_get(input, DOCUMENT_ATTR) != Qnil)
|
93
|
+
ctxt = ruby_xml_html_parser_parse_document(input);
|
94
|
+
else if (rb_ivar_get(input, IO_ATTR) != Qnil)
|
95
|
+
ctxt = ruby_xml_html_parser_io_ctxt(input);*/
|
96
|
+
else
|
97
|
+
rb_raise(rb_eArgError, "You must specify a parser data source");
|
98
|
+
|
99
|
+
if (!ctxt)
|
100
|
+
ruby_xml_raise(&xmlLastError);
|
101
|
+
|
102
|
+
context = ruby_xml_parser_context_wrap(ctxt);
|
103
|
+
rb_ivar_set(self, CONTEXT_ATTR, context);
|
104
|
+
|
105
|
+
if (htmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) {
|
106
|
+
xmlFreeDoc(ctxt->myDoc);
|
107
|
+
ruby_xml_raise(&ctxt->lastError);
|
108
|
+
}
|
109
|
+
|
110
|
+
return ruby_xml_document_wrap(ctxt->myDoc);
|
111
|
+
}
|
112
|
+
|
113
|
+
// Rdoc needs to know
|
114
|
+
#ifdef RDOC_NEVER_DEFINED
|
115
|
+
mLibXML = rb_define_module("LibXML");
|
116
|
+
mXML = rb_define_module_under(mLibXML, "XML");
|
117
|
+
#endif
|
118
|
+
|
119
|
+
void
|
120
|
+
ruby_init_html_parser(void) {
|
121
|
+
INPUT_ATTR = rb_intern("@input");
|
122
|
+
CONTEXT_ATTR = rb_intern("@context");
|
123
|
+
|
124
|
+
cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
|
125
|
+
|
126
|
+
/* Atributes */
|
127
|
+
rb_define_attr(cXMLHTMLParser, "input", 1, 0);
|
128
|
+
rb_define_attr(cXMLHTMLParser, "context", 1, 0);
|
129
|
+
|
130
|
+
/* Instance methods */
|
131
|
+
rb_define_method(cXMLHTMLParser, "initialize", ruby_xml_html_parser_initialize, 0);
|
132
|
+
rb_define_method(cXMLHTMLParser, "parse", ruby_xml_html_parser_parse, 0);
|
133
|
+
}
|