nokogiri 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/History.ja.txt +34 -0
- data/History.txt +36 -0
- data/Manifest.txt +21 -0
- data/README.ja.txt +1 -1
- data/README.txt +1 -1
- data/Rakefile +27 -89
- data/ext/nokogiri/extconf.rb +48 -63
- data/ext/nokogiri/html_document.c +90 -29
- data/ext/nokogiri/html_sax_parser.c +23 -2
- data/ext/nokogiri/native.c +18 -8
- data/ext/nokogiri/native.h +22 -0
- data/ext/nokogiri/xml_attr.c +83 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_cdata.c +1 -1
- data/ext/nokogiri/xml_document.c +84 -18
- data/ext/nokogiri/xml_document_fragment.c +38 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +2 -22
- data/ext/nokogiri/xml_entity_reference.c +41 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +10 -3
- data/ext/nokogiri/xml_io.h +1 -0
- data/ext/nokogiri/xml_node.c +116 -66
- data/ext/nokogiri/xml_node_set.c +5 -1
- data/ext/nokogiri/xml_processing_instruction.c +44 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +20 -4
- data/ext/nokogiri/xml_sax_parser.c +51 -15
- data/ext/nokogiri/xml_sax_push_parser.c +85 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +12 -8
- data/ext/nokogiri/xml_syntax_error.h +2 -1
- data/ext/nokogiri/xml_xpath_context.c +11 -2
- data/ext/nokogiri/xslt_stylesheet.c +1 -6
- data/lib/nokogiri.rb +10 -13
- data/lib/nokogiri/css.rb +1 -1
- data/lib/nokogiri/css/generated_parser.rb +287 -295
- data/lib/nokogiri/css/generated_tokenizer.rb +36 -51
- data/lib/nokogiri/css/node.rb +1 -3
- data/lib/nokogiri/css/parser.rb +21 -12
- data/lib/nokogiri/css/parser.y +55 -44
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rex +23 -32
- data/lib/nokogiri/decorators/hpricot/node_set.rb +1 -1
- data/lib/nokogiri/html.rb +10 -4
- data/lib/nokogiri/html/document.rb +6 -2
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +2 -1
- data/lib/nokogiri/xml.rb +3 -1
- data/lib/nokogiri/xml/attr.rb +3 -4
- data/lib/nokogiri/xml/cdata.rb +1 -1
- data/lib/nokogiri/xml/document.rb +4 -7
- data/lib/nokogiri/xml/document_fragment.rb +9 -0
- data/lib/nokogiri/xml/dtd.rb +3 -0
- data/lib/nokogiri/xml/node.rb +144 -40
- data/lib/nokogiri/xml/node/save_options.rb +32 -0
- data/lib/nokogiri/xml/node_set.rb +11 -20
- data/lib/nokogiri/xml/processing_instruction.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +5 -0
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +47 -0
- data/lib/nokogiri/xml/syntax_error.rb +3 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- data/tasks/test.rb +136 -0
- data/test/css/test_parser.rb +4 -0
- data/test/css/test_tokenizer.rb +30 -17
- data/test/css/test_xpath_visitor.rb +11 -0
- data/test/helper.rb +11 -0
- data/test/hpricot/test_builder.rb +2 -9
- data/test/hpricot/test_parser.rb +4 -4
- data/test/html/test_builder.rb +7 -7
- data/test/html/test_document.rb +90 -4
- data/test/html/test_node.rb +1 -0
- data/test/test_css_cache.rb +1 -3
- data/test/test_reader.rb +19 -1
- data/test/test_xslt_transforms.rb +1 -1
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/sax/test_parser.rb +17 -0
- data/test/xml/sax/test_push_parser.rb +67 -0
- data/test/xml/test_attr.rb +16 -0
- data/test/xml/test_cdata.rb +1 -1
- data/test/xml/test_document.rb +45 -0
- data/test/xml/test_document_fragment.rb +18 -0
- data/test/xml/test_dtd.rb +2 -4
- data/test/xml/test_entity_reference.rb +16 -0
- data/test/xml/test_node.rb +149 -80
- data/test/xml/test_processing_instruction.rb +24 -0
- metadata +28 -2
@@ -2,21 +2,71 @@
|
|
2
2
|
|
3
3
|
/*
|
4
4
|
* call-seq:
|
5
|
-
*
|
5
|
+
* new
|
6
6
|
*
|
7
|
-
*
|
7
|
+
* Create a new document
|
8
8
|
*/
|
9
|
-
static VALUE
|
9
|
+
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
10
10
|
{
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
VALUE uri, external_id;
|
12
|
+
|
13
|
+
rb_scan_args(argc, argv, "02", &uri, &external_id);
|
14
|
+
|
15
|
+
htmlDocPtr doc = htmlNewDoc(
|
16
|
+
RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
|
17
|
+
RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
|
18
|
+
);
|
19
|
+
return Nokogiri_wrap_xml_document(klass, doc);
|
20
|
+
}
|
21
|
+
|
22
|
+
/*
|
23
|
+
* call-seq:
|
24
|
+
* read_io(io, url, encoding, options)
|
25
|
+
*
|
26
|
+
* Read the HTML document from +io+ with given +url+, +encoding+,
|
27
|
+
* and +options+. See Nokogiri::HTML.parse
|
28
|
+
*/
|
29
|
+
static VALUE read_io( VALUE klass,
|
30
|
+
VALUE io,
|
31
|
+
VALUE url,
|
32
|
+
VALUE encoding,
|
33
|
+
VALUE options )
|
34
|
+
{
|
35
|
+
const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
|
36
|
+
const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
|
37
|
+
VALUE error_list = rb_ary_new();
|
15
38
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
39
|
+
xmlInitParser();
|
40
|
+
xmlResetLastError();
|
41
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
42
|
+
|
43
|
+
htmlDocPtr doc = htmlReadIO(
|
44
|
+
io_read_callback,
|
45
|
+
io_close_callback,
|
46
|
+
(void *)io,
|
47
|
+
c_url,
|
48
|
+
c_enc,
|
49
|
+
NUM2INT(options)
|
50
|
+
);
|
51
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
52
|
+
|
53
|
+
if(doc == NULL) {
|
54
|
+
xmlFreeDoc(doc);
|
55
|
+
|
56
|
+
xmlErrorPtr error = xmlGetLastError();
|
57
|
+
if(error)
|
58
|
+
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
|
59
|
+
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
|
60
|
+
);
|
61
|
+
else
|
62
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
63
|
+
|
64
|
+
return Qnil;
|
65
|
+
}
|
66
|
+
|
67
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
68
|
+
rb_funcall(document, rb_intern("errors="), 1, error_list);
|
69
|
+
return document;
|
20
70
|
}
|
21
71
|
|
22
72
|
/*
|
@@ -35,17 +85,33 @@ static VALUE read_memory( VALUE klass,
|
|
35
85
|
const char * c_buffer = StringValuePtr(string);
|
36
86
|
const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
|
37
87
|
const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
|
38
|
-
int len =
|
88
|
+
int len = RSTRING_LEN(string);
|
89
|
+
VALUE error_list = rb_ary_new();
|
90
|
+
|
91
|
+
xmlInitParser();
|
92
|
+
xmlResetLastError();
|
93
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
39
94
|
|
40
95
|
htmlDocPtr doc = htmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
|
96
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
41
97
|
|
42
98
|
if(doc == NULL) {
|
43
99
|
xmlFreeDoc(doc);
|
44
|
-
|
100
|
+
|
101
|
+
xmlErrorPtr error = xmlGetLastError();
|
102
|
+
if(error)
|
103
|
+
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
|
104
|
+
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
|
105
|
+
);
|
106
|
+
else
|
107
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
108
|
+
|
45
109
|
return Qnil;
|
46
110
|
}
|
47
111
|
|
48
|
-
|
112
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
113
|
+
rb_funcall(document, rb_intern("errors="), 1, error_list);
|
114
|
+
return document;
|
49
115
|
}
|
50
116
|
|
51
117
|
/*
|
@@ -64,23 +130,18 @@ static VALUE type(VALUE self)
|
|
64
130
|
VALUE cNokogiriHtmlDocument ;
|
65
131
|
void init_html_document()
|
66
132
|
{
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
VALUE
|
72
|
-
VALUE
|
73
|
-
|
74
|
-
|
75
|
-
VALUE xml_doc = rb_define_class_under(xml, "Document", node);
|
76
|
-
VALUE klass = rb_define_class_under(html, "Document", xml_doc);
|
77
|
-
*/
|
78
|
-
|
79
|
-
VALUE klass ;
|
80
|
-
klass = cNokogiriHtmlDocument = rb_const_get(mNokogiriHtml, rb_intern("Document"));
|
133
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
134
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
135
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
136
|
+
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
137
|
+
VALUE xml_doc = rb_define_class_under(xml, "Document", node);
|
138
|
+
VALUE klass = rb_define_class_under(html, "Document", xml_doc);
|
139
|
+
|
140
|
+
cNokogiriHtmlDocument = klass;
|
81
141
|
|
82
142
|
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
|
143
|
+
rb_define_singleton_method(klass, "read_io", read_io, 4);
|
144
|
+
rb_define_singleton_method(klass, "new", new, -1);
|
83
145
|
|
84
146
|
rb_define_method(klass, "type", type, 0);
|
85
|
-
rb_define_method(klass, "serialize", serialize, 0);
|
86
147
|
}
|
@@ -1,5 +1,11 @@
|
|
1
1
|
#include <html_sax_parser.h>
|
2
2
|
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* native_parse_file(data, encoding)
|
6
|
+
*
|
7
|
+
* Parse +data+ with +encoding+
|
8
|
+
*/
|
3
9
|
static VALUE native_parse_file(VALUE self, VALUE data, VALUE encoding)
|
4
10
|
{
|
5
11
|
xmlSAXHandlerPtr handler;
|
@@ -13,6 +19,12 @@ static VALUE native_parse_file(VALUE self, VALUE data, VALUE encoding)
|
|
13
19
|
return data;
|
14
20
|
}
|
15
21
|
|
22
|
+
/*
|
23
|
+
* call-seq:
|
24
|
+
* native_parse_memory(data, encoding)
|
25
|
+
*
|
26
|
+
* Parse +data+ with +encoding+
|
27
|
+
*/
|
16
28
|
static VALUE native_parse_memory(VALUE self, VALUE data, VALUE encoding)
|
17
29
|
{
|
18
30
|
xmlSAXHandlerPtr handler;
|
@@ -29,8 +41,17 @@ static VALUE native_parse_memory(VALUE self, VALUE data, VALUE encoding)
|
|
29
41
|
VALUE cNokogiriHtmlSaxParser ;
|
30
42
|
void init_html_sax_parser()
|
31
43
|
{
|
32
|
-
VALUE
|
33
|
-
|
44
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
45
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
46
|
+
VALUE sax = rb_define_module_under(html, "SAX");
|
47
|
+
/*
|
48
|
+
* Nokogiri::HTML::SAX::Parser is used for parsing HTML with SAX
|
49
|
+
* callbacks.
|
50
|
+
*/
|
51
|
+
VALUE klass = rb_define_class_under(sax, "Parser", cNokogiriXmlSaxParser);
|
52
|
+
|
53
|
+
cNokogiriHtmlSaxParser = klass;
|
54
|
+
|
34
55
|
rb_define_private_method(klass, "native_parse_memory", native_parse_memory, 2);
|
35
56
|
rb_define_private_method(klass, "native_parse_file", native_parse_file, 2);
|
36
57
|
}
|
data/ext/nokogiri/native.c
CHANGED
@@ -9,30 +9,40 @@ VALUE mNokogiriHtmlSax ;
|
|
9
9
|
|
10
10
|
void Init_native()
|
11
11
|
{
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
xmlMemSetup(
|
13
|
+
(xmlFreeFunc)ruby_xfree,
|
14
|
+
(xmlMallocFunc)ruby_xmalloc,
|
15
|
+
(xmlReallocFunc)ruby_xrealloc,
|
16
|
+
strdup
|
17
|
+
);
|
18
|
+
|
19
|
+
mNokogiri = rb_define_module("Nokogiri");
|
20
|
+
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
|
21
|
+
mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
|
22
|
+
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
23
|
+
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
|
24
|
+
mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
|
18
25
|
|
19
26
|
rb_const_set( mNokogiri,
|
20
27
|
rb_intern("LIBXML_VERSION"),
|
21
28
|
rb_str_new2(LIBXML_DOTTED_VERSION)
|
22
29
|
);
|
23
30
|
|
24
|
-
xmlSetStructuredErrorFunc(NULL, Nokogiri_error_handler);
|
25
|
-
|
26
31
|
init_xml_document();
|
27
32
|
init_html_document();
|
28
33
|
init_xml_node();
|
34
|
+
init_xml_document_fragment();
|
29
35
|
init_xml_text();
|
30
36
|
init_xml_cdata();
|
37
|
+
init_xml_processing_instruction();
|
38
|
+
init_xml_attr();
|
39
|
+
init_xml_entity_reference();
|
31
40
|
init_xml_comment();
|
32
41
|
init_xml_node_set();
|
33
42
|
init_xml_xpath_context();
|
34
43
|
init_xml_xpath();
|
35
44
|
init_xml_sax_parser();
|
45
|
+
init_xml_sax_push_parser();
|
36
46
|
init_xml_reader();
|
37
47
|
init_xml_dtd();
|
38
48
|
init_html_sax_parser();
|
data/ext/nokogiri/native.h
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
#include <libxml/xpath.h>
|
9
9
|
#include <libxml/xpathInternals.h>
|
10
10
|
#include <libxml/xmlreader.h>
|
11
|
+
#include <libxml/xmlsave.h>
|
11
12
|
#include <libxml/HTMLparser.h>
|
12
13
|
#include <libxml/HTMLtree.h>
|
13
14
|
|
@@ -17,12 +18,17 @@
|
|
17
18
|
#include <xml_node.h>
|
18
19
|
#include <xml_text.h>
|
19
20
|
#include <xml_cdata.h>
|
21
|
+
#include <xml_attr.h>
|
22
|
+
#include <xml_processing_instruction.h>
|
23
|
+
#include <xml_entity_reference.h>
|
24
|
+
#include <xml_document_fragment.h>
|
20
25
|
#include <xml_comment.h>
|
21
26
|
#include <xml_node_set.h>
|
22
27
|
#include <xml_xpath.h>
|
23
28
|
#include <xml_dtd.h>
|
24
29
|
#include <xml_xpath_context.h>
|
25
30
|
#include <xml_sax_parser.h>
|
31
|
+
#include <xml_sax_push_parser.h>
|
26
32
|
#include <xml_reader.h>
|
27
33
|
#include <html_sax_parser.h>
|
28
34
|
#include <xslt_stylesheet.h>
|
@@ -45,6 +51,22 @@ extern VALUE mNokogiriXslt ;
|
|
45
51
|
#define NOKOGIRI_DEBUG_START(p)
|
46
52
|
#define NOKOGIRI_DEBUG_END(p)
|
47
53
|
|
54
|
+
#ifndef RSTRING_PTR
|
55
|
+
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
56
|
+
#endif
|
57
|
+
|
58
|
+
#ifndef RSTRING_LEN
|
59
|
+
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
60
|
+
#endif
|
61
|
+
|
62
|
+
#ifndef RARRAY_PTR
|
63
|
+
#define RARRAY_PTR(a) RARRAY(a)->ptr
|
64
|
+
#endif
|
65
|
+
|
66
|
+
#ifndef RARRAY_LEN
|
67
|
+
#define RARRAY_LEN(a) RARRAY(a)->len
|
68
|
+
#endif
|
69
|
+
|
48
70
|
#endif
|
49
71
|
|
50
72
|
#endif
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#include <xml_attr.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* value=(content)
|
6
|
+
*
|
7
|
+
* Set the value for this Attr to +content+
|
8
|
+
*/
|
9
|
+
static VALUE set_value(VALUE self, VALUE content)
|
10
|
+
{
|
11
|
+
xmlAttrPtr attr;
|
12
|
+
Data_Get_Struct(self, xmlAttr, attr);
|
13
|
+
|
14
|
+
if(attr->children) xmlFreeNodeList(attr->children);
|
15
|
+
|
16
|
+
attr->children = attr->last = NULL;
|
17
|
+
|
18
|
+
if(content) {
|
19
|
+
xmlChar *buffer;
|
20
|
+
xmlNode *tmp;
|
21
|
+
|
22
|
+
// Encode our content
|
23
|
+
buffer = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValuePtr(content));
|
24
|
+
|
25
|
+
attr->children = xmlStringGetNodeList(attr->doc, buffer);
|
26
|
+
attr->last = NULL;
|
27
|
+
tmp = attr->children;
|
28
|
+
|
29
|
+
// Loop through the children
|
30
|
+
for(tmp = attr->children; tmp; tmp = tmp->next) {
|
31
|
+
tmp->parent = (xmlNode *)attr;
|
32
|
+
tmp->doc = attr->doc;
|
33
|
+
if(tmp->next == NULL) attr->last = tmp;
|
34
|
+
}
|
35
|
+
|
36
|
+
// Free up memory
|
37
|
+
xmlFree(buffer);
|
38
|
+
}
|
39
|
+
|
40
|
+
return content;
|
41
|
+
}
|
42
|
+
|
43
|
+
/*
|
44
|
+
* call-seq:
|
45
|
+
* new(document, content)
|
46
|
+
*
|
47
|
+
* Create a new Attr element on the +document+ with +name+
|
48
|
+
*/
|
49
|
+
static VALUE new(VALUE klass, VALUE doc, VALUE name)
|
50
|
+
{
|
51
|
+
xmlDocPtr xml_doc;
|
52
|
+
Data_Get_Struct(doc, xmlDoc, xml_doc);
|
53
|
+
|
54
|
+
xmlAttrPtr node = xmlNewDocProp(
|
55
|
+
xml_doc,
|
56
|
+
(const xmlChar *)StringValuePtr(name),
|
57
|
+
NULL
|
58
|
+
);
|
59
|
+
|
60
|
+
VALUE rb_node = Nokogiri_wrap_xml_node((xmlNodePtr)node);
|
61
|
+
|
62
|
+
if(rb_block_given_p()) rb_yield(rb_node);
|
63
|
+
|
64
|
+
return rb_node;
|
65
|
+
}
|
66
|
+
|
67
|
+
VALUE cNokogiriXmlAttr;
|
68
|
+
void init_xml_attr()
|
69
|
+
{
|
70
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
71
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
72
|
+
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
73
|
+
|
74
|
+
/*
|
75
|
+
* Attr represents a Attr node in an xml document.
|
76
|
+
*/
|
77
|
+
VALUE klass = rb_define_class_under(xml, "Attr", node);
|
78
|
+
|
79
|
+
cNokogiriXmlAttr = klass;
|
80
|
+
|
81
|
+
rb_define_singleton_method(klass, "new", new, 2);
|
82
|
+
rb_define_method(klass, "value=", set_value, 1);
|
83
|
+
}
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -14,7 +14,7 @@ static VALUE new(VALUE klass, VALUE doc, VALUE content)
|
|
14
14
|
xmlNodePtr node = xmlNewCDataBlock(
|
15
15
|
xml_doc,
|
16
16
|
(const xmlChar *)StringValuePtr(content),
|
17
|
-
|
17
|
+
RSTRING_LEN(content)
|
18
18
|
);
|
19
19
|
|
20
20
|
VALUE rb_node = Nokogiri_wrap_xml_node(node);
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -10,21 +10,19 @@ static void dealloc(xmlDocPtr doc)
|
|
10
10
|
|
11
11
|
/*
|
12
12
|
* call-seq:
|
13
|
-
*
|
13
|
+
* url
|
14
14
|
*
|
15
|
-
*
|
15
|
+
* Get the url name for this document.
|
16
16
|
*/
|
17
|
-
static VALUE
|
17
|
+
static VALUE url(VALUE self)
|
18
18
|
{
|
19
19
|
xmlDocPtr doc;
|
20
|
-
xmlChar *buf;
|
21
|
-
int size;
|
22
20
|
Data_Get_Struct(self, xmlDoc, doc);
|
23
21
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
return
|
22
|
+
if(doc->URL)
|
23
|
+
return rb_str_new2((const char *)doc->URL);
|
24
|
+
|
25
|
+
return Qnil;
|
28
26
|
}
|
29
27
|
|
30
28
|
/*
|
@@ -62,6 +60,21 @@ static VALUE root(VALUE self)
|
|
62
60
|
return Nokogiri_wrap_xml_node(root) ;
|
63
61
|
}
|
64
62
|
|
63
|
+
/*
|
64
|
+
* call-seq:
|
65
|
+
* encoding
|
66
|
+
*
|
67
|
+
* Get the encoding for this Document
|
68
|
+
*/
|
69
|
+
static VALUE encoding(VALUE self)
|
70
|
+
{
|
71
|
+
xmlDocPtr doc;
|
72
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
73
|
+
|
74
|
+
if(!doc->encoding) return Qnil;
|
75
|
+
return rb_str_new2((const char *)doc->encoding);
|
76
|
+
}
|
77
|
+
|
65
78
|
/*
|
66
79
|
* call-seq:
|
67
80
|
* read_io(io, url, encoding, options)
|
@@ -76,8 +89,11 @@ static VALUE read_io( VALUE klass,
|
|
76
89
|
{
|
77
90
|
const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
|
78
91
|
const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
|
92
|
+
VALUE error_list = rb_ary_new();
|
79
93
|
|
80
94
|
xmlInitParser();
|
95
|
+
xmlResetLastError();
|
96
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
81
97
|
|
82
98
|
xmlDocPtr doc = xmlReadIO(
|
83
99
|
(xmlInputReadCallback)io_read_callback,
|
@@ -87,14 +103,25 @@ static VALUE read_io( VALUE klass,
|
|
87
103
|
c_enc,
|
88
104
|
NUM2INT(options)
|
89
105
|
);
|
106
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
90
107
|
|
91
108
|
if(doc == NULL) {
|
92
109
|
xmlFreeDoc(doc);
|
93
|
-
|
110
|
+
|
111
|
+
xmlErrorPtr error = xmlGetLastError();
|
112
|
+
if(error)
|
113
|
+
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
|
114
|
+
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
|
115
|
+
);
|
116
|
+
else
|
117
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
118
|
+
|
94
119
|
return Qnil;
|
95
120
|
}
|
96
121
|
|
97
|
-
|
122
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
123
|
+
rb_funcall(document, rb_intern("errors="), 1, error_list);
|
124
|
+
return document;
|
98
125
|
}
|
99
126
|
|
100
127
|
/*
|
@@ -112,18 +139,55 @@ static VALUE read_memory( VALUE klass,
|
|
112
139
|
const char * c_buffer = StringValuePtr(string);
|
113
140
|
const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
|
114
141
|
const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
|
115
|
-
int len =
|
142
|
+
int len = RSTRING_LEN(string);
|
143
|
+
VALUE error_list = rb_ary_new();
|
116
144
|
|
117
145
|
xmlInitParser();
|
146
|
+
xmlResetLastError();
|
147
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
118
148
|
xmlDocPtr doc = xmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
|
149
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
119
150
|
|
120
151
|
if(doc == NULL) {
|
121
152
|
xmlFreeDoc(doc);
|
122
|
-
|
153
|
+
|
154
|
+
xmlErrorPtr error = xmlGetLastError();
|
155
|
+
if(error)
|
156
|
+
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
|
157
|
+
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
|
158
|
+
);
|
159
|
+
else
|
160
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
161
|
+
|
123
162
|
return Qnil;
|
124
163
|
}
|
125
164
|
|
126
|
-
|
165
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
166
|
+
rb_funcall(document, rb_intern("errors="), 1, error_list);
|
167
|
+
return document;
|
168
|
+
}
|
169
|
+
|
170
|
+
/*
|
171
|
+
* call-seq:
|
172
|
+
* dup
|
173
|
+
*
|
174
|
+
* Copy this Document. An optional depth may be passed in, but it defaults
|
175
|
+
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
|
176
|
+
*/
|
177
|
+
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
|
178
|
+
{
|
179
|
+
VALUE level;
|
180
|
+
|
181
|
+
if(rb_scan_args(argc, argv, "01", &level) == 0)
|
182
|
+
level = INT2NUM(1);
|
183
|
+
|
184
|
+
xmlDocPtr doc, dup;
|
185
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
186
|
+
|
187
|
+
dup = xmlCopyDoc(doc, NUM2INT(level));
|
188
|
+
if(dup == NULL) return Qnil;
|
189
|
+
|
190
|
+
return Nokogiri_wrap_xml_document(cNokogiriXmlDocument, dup);
|
127
191
|
}
|
128
192
|
|
129
193
|
/*
|
@@ -169,9 +233,9 @@ static VALUE load_external_subsets_set(VALUE klass, VALUE value)
|
|
169
233
|
VALUE cNokogiriXmlDocument ;
|
170
234
|
void init_xml_document()
|
171
235
|
{
|
172
|
-
VALUE nokogiri
|
173
|
-
VALUE xml
|
174
|
-
VALUE node
|
236
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
237
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
238
|
+
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
175
239
|
|
176
240
|
/*
|
177
241
|
* Nokogiri::XML::Document wraps an xml document.
|
@@ -188,7 +252,9 @@ void init_xml_document()
|
|
188
252
|
|
189
253
|
rb_define_method(klass, "root", root, 0);
|
190
254
|
rb_define_method(klass, "root=", set_root, 1);
|
191
|
-
rb_define_method(klass, "
|
255
|
+
rb_define_method(klass, "encoding", encoding, 0);
|
256
|
+
rb_define_method(klass, "dup", duplicate_node, -1);
|
257
|
+
rb_define_method(klass, "url", url, 0);
|
192
258
|
rb_undef_method(klass, "parent");
|
193
259
|
}
|
194
260
|
|