nokogiri 1.10.7 → 1.16.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +42 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +188 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +862 -421
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +222 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +39 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +408 -243
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1343 -674
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +305 -213
- data/ext/nokogiri/xml_relax_ng.c +87 -78
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +149 -103
- data/ext/nokogiri/xml_sax_push_parser.c +65 -37
- data/ext/nokogiri/xml_schema.c +138 -82
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +35 -26
- data/ext/nokogiri/xml_xpath_context.c +363 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +126 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +5 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +205 -96
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +326 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +75 -34
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -127
- data/lib/nokogiri/xml/document_fragment.rb +93 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1096 -419
- data/lib/nokogiri/xml/node_set.rb +137 -61
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +7 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +39 -38
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +121 -291
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -1,13 +1,24 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlReader;
|
4
|
+
|
5
|
+
static void
|
6
|
+
xml_reader_deallocate(void *data)
|
4
7
|
{
|
5
|
-
|
8
|
+
xmlTextReaderPtr reader = data;
|
6
9
|
xmlFreeTextReader(reader);
|
7
|
-
NOKOGIRI_DEBUG_END(reader);
|
8
10
|
}
|
9
11
|
|
10
|
-
static
|
12
|
+
static const rb_data_type_t xml_reader_type = {
|
13
|
+
.wrap_struct_name = "Nokogiri::XML::Reader",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_reader_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
20
|
+
static int
|
21
|
+
has_attributes(xmlTextReaderPtr reader)
|
11
22
|
{
|
12
23
|
/*
|
13
24
|
* this implementation of xmlTextReaderHasAttributes explicitly includes
|
@@ -16,47 +27,40 @@ static int has_attributes(xmlTextReaderPtr reader)
|
|
16
27
|
*/
|
17
28
|
xmlNodePtr node ;
|
18
29
|
node = xmlTextReaderCurrentNode(reader);
|
19
|
-
if (node == NULL)
|
20
|
-
return(0);
|
30
|
+
if (node == NULL) {
|
31
|
+
return (0);
|
32
|
+
}
|
21
33
|
|
22
34
|
if ((node->type == XML_ELEMENT_NODE) &&
|
23
|
-
((node->properties != NULL) || (node->nsDef != NULL)))
|
24
|
-
return(1);
|
25
|
-
|
35
|
+
((node->properties != NULL) || (node->nsDef != NULL))) {
|
36
|
+
return (1);
|
37
|
+
}
|
38
|
+
return (0);
|
26
39
|
}
|
27
40
|
|
28
|
-
|
41
|
+
// TODO: merge this function into the `namespaces` method implementation
|
42
|
+
static void
|
43
|
+
Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
|
29
44
|
{
|
30
45
|
xmlNsPtr ns;
|
31
|
-
|
32
|
-
char *key ;
|
33
|
-
size_t keylen ;
|
46
|
+
VALUE key;
|
34
47
|
|
35
|
-
if (node->type != XML_ELEMENT_NODE) return ;
|
48
|
+
if (node->type != XML_ELEMENT_NODE) { return ; }
|
36
49
|
|
37
50
|
ns = node->nsDef;
|
38
51
|
while (ns != NULL) {
|
39
52
|
|
40
|
-
|
41
|
-
if (keylen > XMLNS_BUFFER_LEN) {
|
42
|
-
key = (char*)malloc(keylen) ;
|
43
|
-
} else {
|
44
|
-
key = buffer ;
|
45
|
-
}
|
46
|
-
|
53
|
+
key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding());
|
47
54
|
if (ns->prefix) {
|
48
|
-
|
49
|
-
|
50
|
-
sprintf(key, "%s", XMLNS_PREFIX);
|
55
|
+
rb_str_cat_cstr(key, ":");
|
56
|
+
rb_str_cat_cstr(key, (const char *)ns->prefix);
|
51
57
|
}
|
52
58
|
|
59
|
+
key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding());
|
53
60
|
rb_hash_aset(attr_hash,
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
if (key != buffer) {
|
58
|
-
free(key);
|
59
|
-
}
|
61
|
+
key,
|
62
|
+
(ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
|
63
|
+
);
|
60
64
|
ns = ns->next ;
|
61
65
|
}
|
62
66
|
}
|
@@ -68,15 +72,16 @@ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
|
|
68
72
|
*
|
69
73
|
* Was an attribute generated from the default value in the DTD or schema?
|
70
74
|
*/
|
71
|
-
static VALUE
|
75
|
+
static VALUE
|
76
|
+
default_eh(VALUE self)
|
72
77
|
{
|
73
78
|
xmlTextReaderPtr reader;
|
74
79
|
int eh;
|
75
80
|
|
76
|
-
|
81
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
77
82
|
eh = xmlTextReaderIsDefault(reader);
|
78
|
-
if(eh == 0) return Qfalse;
|
79
|
-
if(eh == 1) return Qtrue;
|
83
|
+
if (eh == 0) { return Qfalse; }
|
84
|
+
if (eh == 1) { return Qtrue; }
|
80
85
|
|
81
86
|
return Qnil;
|
82
87
|
}
|
@@ -87,15 +92,16 @@ static VALUE default_eh(VALUE self)
|
|
87
92
|
*
|
88
93
|
* Does this node have a text value?
|
89
94
|
*/
|
90
|
-
static VALUE
|
95
|
+
static VALUE
|
96
|
+
value_eh(VALUE self)
|
91
97
|
{
|
92
98
|
xmlTextReaderPtr reader;
|
93
99
|
int eh;
|
94
100
|
|
95
|
-
|
101
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
96
102
|
eh = xmlTextReaderHasValue(reader);
|
97
|
-
if(eh == 0) return Qfalse;
|
98
|
-
if(eh == 1) return Qtrue;
|
103
|
+
if (eh == 0) { return Qfalse; }
|
104
|
+
if (eh == 1) { return Qtrue; }
|
99
105
|
|
100
106
|
return Qnil;
|
101
107
|
}
|
@@ -106,15 +112,16 @@ static VALUE value_eh(VALUE self)
|
|
106
112
|
*
|
107
113
|
* Does this node have attributes?
|
108
114
|
*/
|
109
|
-
static VALUE
|
115
|
+
static VALUE
|
116
|
+
attributes_eh(VALUE self)
|
110
117
|
{
|
111
118
|
xmlTextReaderPtr reader;
|
112
119
|
int eh;
|
113
120
|
|
114
|
-
|
121
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
115
122
|
eh = has_attributes(reader);
|
116
|
-
if(eh == 0) return Qfalse;
|
117
|
-
if(eh == 1) return Qtrue;
|
123
|
+
if (eh == 0) { return Qfalse; }
|
124
|
+
if (eh == 1) { return Qtrue; }
|
118
125
|
|
119
126
|
return Qnil;
|
120
127
|
}
|
@@ -125,52 +132,94 @@ static VALUE attributes_eh(VALUE self)
|
|
125
132
|
*
|
126
133
|
* Get a hash of namespaces for this Node
|
127
134
|
*/
|
128
|
-
static VALUE
|
135
|
+
static VALUE
|
136
|
+
rb_xml_reader_namespaces(VALUE rb_reader)
|
129
137
|
{
|
130
|
-
|
131
|
-
|
132
|
-
|
138
|
+
VALUE rb_namespaces = rb_hash_new() ;
|
139
|
+
xmlTextReaderPtr c_reader;
|
140
|
+
xmlNodePtr c_node;
|
141
|
+
VALUE rb_errors;
|
133
142
|
|
134
|
-
|
143
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
144
|
+
|
145
|
+
if (! has_attributes(c_reader)) {
|
146
|
+
return rb_namespaces ;
|
147
|
+
}
|
135
148
|
|
136
|
-
|
149
|
+
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
137
150
|
|
138
|
-
|
139
|
-
|
151
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
152
|
+
c_node = xmlTextReaderExpand(c_reader);
|
153
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
140
154
|
|
141
|
-
|
142
|
-
|
155
|
+
if (c_node == NULL) {
|
156
|
+
if (RARRAY_LEN(rb_errors) > 0) {
|
157
|
+
VALUE rb_error = rb_ary_entry(rb_errors, 0);
|
158
|
+
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
|
159
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
160
|
+
}
|
161
|
+
return Qnil;
|
162
|
+
}
|
143
163
|
|
144
|
-
Nokogiri_xml_node_namespaces(
|
164
|
+
Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
|
145
165
|
|
146
|
-
return
|
166
|
+
return rb_namespaces ;
|
147
167
|
}
|
148
168
|
|
149
169
|
/*
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
170
|
+
:call-seq: attribute_hash() → Hash<String ⇒ String>
|
171
|
+
|
172
|
+
Get the attributes of the current node as a Hash of names and values.
|
173
|
+
|
174
|
+
See related: #attributes and #namespaces
|
154
175
|
*/
|
155
|
-
static VALUE
|
176
|
+
static VALUE
|
177
|
+
rb_xml_reader_attribute_hash(VALUE rb_reader)
|
156
178
|
{
|
157
|
-
|
158
|
-
|
159
|
-
|
179
|
+
VALUE rb_attributes = rb_hash_new();
|
180
|
+
xmlTextReaderPtr c_reader;
|
181
|
+
xmlNodePtr c_node;
|
182
|
+
xmlAttrPtr c_property;
|
183
|
+
VALUE rb_errors;
|
160
184
|
|
161
|
-
|
185
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
162
186
|
|
163
|
-
|
187
|
+
if (!has_attributes(c_reader)) {
|
188
|
+
return rb_attributes;
|
189
|
+
}
|
164
190
|
|
165
|
-
|
166
|
-
return attr ;
|
191
|
+
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
167
192
|
|
168
|
-
|
169
|
-
|
193
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
194
|
+
c_node = xmlTextReaderExpand(c_reader);
|
195
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
170
196
|
|
171
|
-
|
197
|
+
if (c_node == NULL) {
|
198
|
+
if (RARRAY_LEN(rb_errors) > 0) {
|
199
|
+
VALUE rb_error = rb_ary_entry(rb_errors, 0);
|
200
|
+
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
|
201
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
202
|
+
}
|
203
|
+
return Qnil;
|
204
|
+
}
|
205
|
+
|
206
|
+
c_property = c_node->properties;
|
207
|
+
while (c_property != NULL) {
|
208
|
+
VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
|
209
|
+
VALUE rb_value = Qnil;
|
210
|
+
xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
|
211
|
+
|
212
|
+
if (c_value) {
|
213
|
+
rb_value = NOKOGIRI_STR_NEW2(c_value);
|
214
|
+
xmlFree(c_value);
|
215
|
+
}
|
216
|
+
|
217
|
+
rb_hash_aset(rb_attributes, rb_name, rb_value);
|
172
218
|
|
173
|
-
|
219
|
+
c_property = c_property->next;
|
220
|
+
}
|
221
|
+
|
222
|
+
return rb_attributes;
|
174
223
|
}
|
175
224
|
|
176
225
|
/*
|
@@ -179,22 +228,23 @@ static VALUE attribute_nodes(VALUE self)
|
|
179
228
|
*
|
180
229
|
* Get the value of attribute at +index+
|
181
230
|
*/
|
182
|
-
static VALUE
|
231
|
+
static VALUE
|
232
|
+
attribute_at(VALUE self, VALUE index)
|
183
233
|
{
|
184
234
|
xmlTextReaderPtr reader;
|
185
235
|
xmlChar *value;
|
186
236
|
VALUE rb_value;
|
187
237
|
|
188
|
-
|
238
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
189
239
|
|
190
|
-
if(NIL_P(index)) return Qnil;
|
240
|
+
if (NIL_P(index)) { return Qnil; }
|
191
241
|
index = rb_Integer(index);
|
192
242
|
|
193
243
|
value = xmlTextReaderGetAttributeNo(
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
if(value == NULL) return Qnil;
|
244
|
+
reader,
|
245
|
+
(int)NUM2INT(index)
|
246
|
+
);
|
247
|
+
if (value == NULL) { return Qnil; }
|
198
248
|
|
199
249
|
rb_value = NOKOGIRI_STR_NEW2(value);
|
200
250
|
xmlFree(value);
|
@@ -207,19 +257,20 @@ static VALUE attribute_at(VALUE self, VALUE index)
|
|
207
257
|
*
|
208
258
|
* Get the value of attribute named +name+
|
209
259
|
*/
|
210
|
-
static VALUE
|
260
|
+
static VALUE
|
261
|
+
reader_attribute(VALUE self, VALUE name)
|
211
262
|
{
|
212
263
|
xmlTextReaderPtr reader;
|
213
264
|
xmlChar *value ;
|
214
265
|
VALUE rb_value;
|
215
266
|
|
216
|
-
|
267
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
217
268
|
|
218
|
-
if(NIL_P(name)) return Qnil;
|
269
|
+
if (NIL_P(name)) { return Qnil; }
|
219
270
|
name = StringValue(name) ;
|
220
271
|
|
221
|
-
value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValueCStr(name));
|
222
|
-
if(value == NULL) return Qnil;
|
272
|
+
value = xmlTextReaderGetAttribute(reader, (xmlChar *)StringValueCStr(name));
|
273
|
+
if (value == NULL) { return Qnil; }
|
223
274
|
|
224
275
|
rb_value = NOKOGIRI_STR_NEW2(value);
|
225
276
|
xmlFree(value);
|
@@ -232,16 +283,17 @@ static VALUE reader_attribute(VALUE self, VALUE name)
|
|
232
283
|
*
|
233
284
|
* Get the number of attributes for the current node
|
234
285
|
*/
|
235
|
-
static VALUE
|
286
|
+
static VALUE
|
287
|
+
attribute_count(VALUE self)
|
236
288
|
{
|
237
289
|
xmlTextReaderPtr reader;
|
238
290
|
int count;
|
239
291
|
|
240
|
-
|
292
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
241
293
|
count = xmlTextReaderAttributeCount(reader);
|
242
|
-
if(count == -1) return Qnil;
|
294
|
+
if (count == -1) { return Qnil; }
|
243
295
|
|
244
|
-
return INT2NUM(
|
296
|
+
return INT2NUM(count);
|
245
297
|
}
|
246
298
|
|
247
299
|
/*
|
@@ -250,16 +302,17 @@ static VALUE attribute_count(VALUE self)
|
|
250
302
|
*
|
251
303
|
* Get the depth of the node
|
252
304
|
*/
|
253
|
-
static VALUE
|
305
|
+
static VALUE
|
306
|
+
depth(VALUE self)
|
254
307
|
{
|
255
308
|
xmlTextReaderPtr reader;
|
256
309
|
int depth;
|
257
310
|
|
258
|
-
|
311
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
259
312
|
depth = xmlTextReaderDepth(reader);
|
260
|
-
if(depth == -1) return Qnil;
|
313
|
+
if (depth == -1) { return Qnil; }
|
261
314
|
|
262
|
-
return INT2NUM(
|
315
|
+
return INT2NUM(depth);
|
263
316
|
}
|
264
317
|
|
265
318
|
/*
|
@@ -268,14 +321,15 @@ static VALUE depth(VALUE self)
|
|
268
321
|
*
|
269
322
|
* Get the XML version of the document being read
|
270
323
|
*/
|
271
|
-
static VALUE
|
324
|
+
static VALUE
|
325
|
+
xml_version(VALUE self)
|
272
326
|
{
|
273
327
|
xmlTextReaderPtr reader;
|
274
328
|
const char *version;
|
275
329
|
|
276
|
-
|
330
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
277
331
|
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
278
|
-
if(version == NULL) return Qnil;
|
332
|
+
if (version == NULL) { return Qnil; }
|
279
333
|
|
280
334
|
return NOKOGIRI_STR_NEW2(version);
|
281
335
|
}
|
@@ -286,14 +340,15 @@ static VALUE xml_version(VALUE self)
|
|
286
340
|
*
|
287
341
|
* Get the xml:lang scope within which the node resides.
|
288
342
|
*/
|
289
|
-
static VALUE
|
343
|
+
static VALUE
|
344
|
+
lang(VALUE self)
|
290
345
|
{
|
291
346
|
xmlTextReaderPtr reader;
|
292
347
|
const char *lang;
|
293
348
|
|
294
|
-
|
349
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
295
350
|
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
296
|
-
if(lang == NULL) return Qnil;
|
351
|
+
if (lang == NULL) { return Qnil; }
|
297
352
|
|
298
353
|
return NOKOGIRI_STR_NEW2(lang);
|
299
354
|
}
|
@@ -304,14 +359,15 @@ static VALUE lang(VALUE self)
|
|
304
359
|
*
|
305
360
|
* Get the text value of the node if present. Returns a utf-8 encoded string.
|
306
361
|
*/
|
307
|
-
static VALUE
|
362
|
+
static VALUE
|
363
|
+
value(VALUE self)
|
308
364
|
{
|
309
365
|
xmlTextReaderPtr reader;
|
310
366
|
const char *value;
|
311
367
|
|
312
|
-
|
368
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
313
369
|
value = (const char *)xmlTextReaderConstValue(reader);
|
314
|
-
if(value == NULL) return Qnil;
|
370
|
+
if (value == NULL) { return Qnil; }
|
315
371
|
|
316
372
|
return NOKOGIRI_STR_NEW2(value);
|
317
373
|
}
|
@@ -322,14 +378,15 @@ static VALUE value(VALUE self)
|
|
322
378
|
*
|
323
379
|
* Get the shorthand reference to the namespace associated with the node.
|
324
380
|
*/
|
325
|
-
static VALUE
|
381
|
+
static VALUE
|
382
|
+
prefix(VALUE self)
|
326
383
|
{
|
327
384
|
xmlTextReaderPtr reader;
|
328
385
|
const char *prefix;
|
329
386
|
|
330
|
-
|
387
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
331
388
|
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
332
|
-
if(prefix == NULL) return Qnil;
|
389
|
+
if (prefix == NULL) { return Qnil; }
|
333
390
|
|
334
391
|
return NOKOGIRI_STR_NEW2(prefix);
|
335
392
|
}
|
@@ -340,14 +397,15 @@ static VALUE prefix(VALUE self)
|
|
340
397
|
*
|
341
398
|
* Get the URI defining the namespace associated with the node
|
342
399
|
*/
|
343
|
-
static VALUE
|
400
|
+
static VALUE
|
401
|
+
namespace_uri(VALUE self)
|
344
402
|
{
|
345
403
|
xmlTextReaderPtr reader;
|
346
404
|
const char *uri;
|
347
405
|
|
348
|
-
|
406
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
349
407
|
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
350
|
-
if(uri == NULL) return Qnil;
|
408
|
+
if (uri == NULL) { return Qnil; }
|
351
409
|
|
352
410
|
return NOKOGIRI_STR_NEW2(uri);
|
353
411
|
}
|
@@ -358,14 +416,15 @@ static VALUE namespace_uri(VALUE self)
|
|
358
416
|
*
|
359
417
|
* Get the local name of the node
|
360
418
|
*/
|
361
|
-
static VALUE
|
419
|
+
static VALUE
|
420
|
+
local_name(VALUE self)
|
362
421
|
{
|
363
422
|
xmlTextReaderPtr reader;
|
364
423
|
const char *name;
|
365
424
|
|
366
|
-
|
425
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
367
426
|
name = (const char *)xmlTextReaderConstLocalName(reader);
|
368
|
-
if(name == NULL) return Qnil;
|
427
|
+
if (name == NULL) { return Qnil; }
|
369
428
|
|
370
429
|
return NOKOGIRI_STR_NEW2(name);
|
371
430
|
}
|
@@ -376,14 +435,15 @@ static VALUE local_name(VALUE self)
|
|
376
435
|
*
|
377
436
|
* Get the name of the node. Returns a utf-8 encoded string.
|
378
437
|
*/
|
379
|
-
static VALUE
|
438
|
+
static VALUE
|
439
|
+
name(VALUE self)
|
380
440
|
{
|
381
441
|
xmlTextReaderPtr reader;
|
382
442
|
const char *name;
|
383
443
|
|
384
|
-
|
444
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
385
445
|
name = (const char *)xmlTextReaderConstName(reader);
|
386
|
-
if(name == NULL) return Qnil;
|
446
|
+
if (name == NULL) { return Qnil; }
|
387
447
|
|
388
448
|
return NOKOGIRI_STR_NEW2(name);
|
389
449
|
}
|
@@ -394,16 +454,24 @@ static VALUE name(VALUE self)
|
|
394
454
|
*
|
395
455
|
* Get the xml:base of the node
|
396
456
|
*/
|
397
|
-
static VALUE
|
457
|
+
static VALUE
|
458
|
+
rb_xml_reader_base_uri(VALUE rb_reader)
|
398
459
|
{
|
399
|
-
|
400
|
-
|
460
|
+
VALUE rb_base_uri;
|
461
|
+
xmlTextReaderPtr c_reader;
|
462
|
+
xmlChar *c_base_uri;
|
463
|
+
|
464
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
465
|
+
|
466
|
+
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
467
|
+
if (c_base_uri == NULL) {
|
468
|
+
return Qnil;
|
469
|
+
}
|
401
470
|
|
402
|
-
|
403
|
-
|
404
|
-
if (base_uri == NULL) return Qnil;
|
471
|
+
rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
|
472
|
+
xmlFree(c_base_uri);
|
405
473
|
|
406
|
-
return
|
474
|
+
return rb_base_uri;
|
407
475
|
}
|
408
476
|
|
409
477
|
/*
|
@@ -412,11 +480,12 @@ static VALUE base_uri(VALUE self)
|
|
412
480
|
*
|
413
481
|
* Get the state of the reader
|
414
482
|
*/
|
415
|
-
static VALUE
|
483
|
+
static VALUE
|
484
|
+
state(VALUE self)
|
416
485
|
{
|
417
486
|
xmlTextReaderPtr reader;
|
418
|
-
|
419
|
-
return INT2NUM(
|
487
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
488
|
+
return INT2NUM(xmlTextReaderReadState(reader));
|
420
489
|
}
|
421
490
|
|
422
491
|
/*
|
@@ -425,11 +494,12 @@ static VALUE state(VALUE self)
|
|
425
494
|
*
|
426
495
|
* Get the type of readers current node
|
427
496
|
*/
|
428
|
-
static VALUE
|
497
|
+
static VALUE
|
498
|
+
node_type(VALUE self)
|
429
499
|
{
|
430
500
|
xmlTextReaderPtr reader;
|
431
|
-
|
432
|
-
return INT2NUM(
|
501
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
502
|
+
return INT2NUM(xmlTextReaderNodeType(reader));
|
433
503
|
}
|
434
504
|
|
435
505
|
/*
|
@@ -438,14 +508,15 @@ static VALUE node_type(VALUE self)
|
|
438
508
|
*
|
439
509
|
* Move the Reader forward through the XML document.
|
440
510
|
*/
|
441
|
-
static VALUE
|
511
|
+
static VALUE
|
512
|
+
read_more(VALUE self)
|
442
513
|
{
|
443
514
|
xmlTextReaderPtr reader;
|
444
|
-
|
515
|
+
xmlErrorConstPtr error;
|
445
516
|
VALUE error_list;
|
446
517
|
int ret;
|
447
518
|
|
448
|
-
|
519
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
449
520
|
|
450
521
|
error_list = rb_funcall(self, rb_intern("errors"), 0);
|
451
522
|
|
@@ -453,14 +524,15 @@ static VALUE read_more(VALUE self)
|
|
453
524
|
ret = xmlTextReaderRead(reader);
|
454
525
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
455
526
|
|
456
|
-
if(ret == 1) return self;
|
457
|
-
if(ret == 0) return Qnil;
|
527
|
+
if (ret == 1) { return self; }
|
528
|
+
if (ret == 0) { return Qnil; }
|
458
529
|
|
459
530
|
error = xmlGetLastError();
|
460
|
-
if(error)
|
531
|
+
if (error) {
|
461
532
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
462
|
-
else
|
533
|
+
} else {
|
463
534
|
rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
|
535
|
+
}
|
464
536
|
|
465
537
|
return Qnil;
|
466
538
|
}
|
@@ -472,19 +544,20 @@ static VALUE read_more(VALUE self)
|
|
472
544
|
* Read the contents of the current node, including child nodes and markup.
|
473
545
|
* Returns a utf-8 encoded string.
|
474
546
|
*/
|
475
|
-
static VALUE
|
547
|
+
static VALUE
|
548
|
+
inner_xml(VALUE self)
|
476
549
|
{
|
477
550
|
xmlTextReaderPtr reader;
|
478
|
-
xmlChar*
|
551
|
+
xmlChar *value;
|
479
552
|
VALUE str;
|
480
553
|
|
481
|
-
|
554
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
482
555
|
|
483
556
|
value = xmlTextReaderReadInnerXml(reader);
|
484
557
|
|
485
558
|
str = Qnil;
|
486
|
-
if(value) {
|
487
|
-
str = NOKOGIRI_STR_NEW2((char*)value);
|
559
|
+
if (value) {
|
560
|
+
str = NOKOGIRI_STR_NEW2((char *)value);
|
488
561
|
xmlFree(value);
|
489
562
|
}
|
490
563
|
|
@@ -498,18 +571,19 @@ static VALUE inner_xml(VALUE self)
|
|
498
571
|
* Read the current node and its contents, including child nodes and markup.
|
499
572
|
* Returns a utf-8 encoded string.
|
500
573
|
*/
|
501
|
-
static VALUE
|
574
|
+
static VALUE
|
575
|
+
outer_xml(VALUE self)
|
502
576
|
{
|
503
577
|
xmlTextReaderPtr reader;
|
504
578
|
xmlChar *value;
|
505
579
|
VALUE str = Qnil;
|
506
580
|
|
507
|
-
|
581
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
508
582
|
|
509
583
|
value = xmlTextReaderReadOuterXml(reader);
|
510
584
|
|
511
|
-
if(value) {
|
512
|
-
str = NOKOGIRI_STR_NEW2((char*)value);
|
585
|
+
if (value) {
|
586
|
+
str = NOKOGIRI_STR_NEW2((char *)value);
|
513
587
|
xmlFree(value);
|
514
588
|
}
|
515
589
|
return str;
|
@@ -521,36 +595,37 @@ static VALUE outer_xml(VALUE self)
|
|
521
595
|
*
|
522
596
|
* Create a new reader that parses +string+
|
523
597
|
*/
|
524
|
-
static VALUE
|
598
|
+
static VALUE
|
599
|
+
from_memory(int argc, VALUE *argv, VALUE klass)
|
525
600
|
{
|
526
601
|
VALUE rb_buffer, rb_url, encoding, rb_options;
|
527
602
|
xmlTextReaderPtr reader;
|
528
|
-
const char *
|
529
|
-
const char *
|
603
|
+
const char *c_url = NULL;
|
604
|
+
const char *c_encoding = NULL;
|
530
605
|
int c_options = 0;
|
531
606
|
VALUE rb_reader, args[3];
|
532
607
|
|
533
608
|
rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
|
534
609
|
|
535
|
-
if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
|
536
|
-
if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
|
537
|
-
if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
|
538
|
-
if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
|
610
|
+
if (!RTEST(rb_buffer)) { rb_raise(rb_eArgError, "string cannot be nil"); }
|
611
|
+
if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
|
612
|
+
if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
|
613
|
+
if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
|
539
614
|
|
540
615
|
reader = xmlReaderForMemory(
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
if(reader == NULL) {
|
616
|
+
StringValuePtr(rb_buffer),
|
617
|
+
(int)RSTRING_LEN(rb_buffer),
|
618
|
+
c_url,
|
619
|
+
c_encoding,
|
620
|
+
c_options
|
621
|
+
);
|
622
|
+
|
623
|
+
if (reader == NULL) {
|
549
624
|
xmlFreeTextReader(reader);
|
550
625
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
551
626
|
}
|
552
627
|
|
553
|
-
rb_reader =
|
628
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
554
629
|
args[0] = rb_buffer;
|
555
630
|
args[1] = rb_url;
|
556
631
|
args[2] = encoding;
|
@@ -565,37 +640,38 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
|
|
565
640
|
*
|
566
641
|
* Create a new reader that parses +io+
|
567
642
|
*/
|
568
|
-
static VALUE
|
643
|
+
static VALUE
|
644
|
+
from_io(int argc, VALUE *argv, VALUE klass)
|
569
645
|
{
|
570
646
|
VALUE rb_io, rb_url, encoding, rb_options;
|
571
647
|
xmlTextReaderPtr reader;
|
572
|
-
const char *
|
573
|
-
const char *
|
648
|
+
const char *c_url = NULL;
|
649
|
+
const char *c_encoding = NULL;
|
574
650
|
int c_options = 0;
|
575
651
|
VALUE rb_reader, args[3];
|
576
652
|
|
577
653
|
rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
|
578
654
|
|
579
|
-
if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
|
580
|
-
if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
|
581
|
-
if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
|
582
|
-
if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
|
655
|
+
if (!RTEST(rb_io)) { rb_raise(rb_eArgError, "io cannot be nil"); }
|
656
|
+
if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
|
657
|
+
if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
|
658
|
+
if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
|
583
659
|
|
584
660
|
reader = xmlReaderForIO(
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
if(reader == NULL) {
|
661
|
+
(xmlInputReadCallback)noko_io_read,
|
662
|
+
(xmlInputCloseCallback)noko_io_close,
|
663
|
+
(void *)rb_io,
|
664
|
+
c_url,
|
665
|
+
c_encoding,
|
666
|
+
c_options
|
667
|
+
);
|
668
|
+
|
669
|
+
if (reader == NULL) {
|
594
670
|
xmlFreeTextReader(reader);
|
595
671
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
596
672
|
}
|
597
673
|
|
598
|
-
rb_reader =
|
674
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
599
675
|
args[0] = rb_io;
|
600
676
|
args[1] = rb_url;
|
601
677
|
args[2] = encoding;
|
@@ -610,59 +686,75 @@ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
|
|
610
686
|
*
|
611
687
|
* Returns true if the current node is empty, otherwise false.
|
612
688
|
*/
|
613
|
-
static VALUE
|
689
|
+
static VALUE
|
690
|
+
empty_element_p(VALUE self)
|
614
691
|
{
|
615
692
|
xmlTextReaderPtr reader;
|
616
693
|
|
617
|
-
|
694
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
618
695
|
|
619
|
-
if(xmlTextReaderIsEmptyElement(reader))
|
696
|
+
if (xmlTextReaderIsEmptyElement(reader)) {
|
620
697
|
return Qtrue;
|
698
|
+
}
|
621
699
|
|
622
700
|
return Qfalse;
|
623
701
|
}
|
624
702
|
|
625
|
-
VALUE
|
626
|
-
|
627
|
-
void init_xml_reader()
|
703
|
+
static VALUE
|
704
|
+
rb_xml_reader_encoding(VALUE rb_reader)
|
628
705
|
{
|
629
|
-
|
630
|
-
|
706
|
+
xmlTextReaderPtr c_reader;
|
707
|
+
const char *parser_encoding;
|
708
|
+
VALUE constructor_encoding;
|
631
709
|
|
710
|
+
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
711
|
+
if (RTEST(constructor_encoding)) {
|
712
|
+
return constructor_encoding;
|
713
|
+
}
|
714
|
+
|
715
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
716
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
717
|
+
if (parser_encoding == NULL) { return Qnil; }
|
718
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
719
|
+
}
|
720
|
+
|
721
|
+
void
|
722
|
+
noko_init_xml_reader(void)
|
723
|
+
{
|
632
724
|
/*
|
633
725
|
* The Reader parser allows you to effectively pull parse an XML document.
|
634
726
|
* Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
|
635
727
|
* node. Note that you may only iterate over the document once!
|
636
728
|
*/
|
637
|
-
|
638
|
-
|
639
|
-
cNokogiriXmlReader
|
640
|
-
|
641
|
-
rb_define_singleton_method(
|
642
|
-
rb_define_singleton_method(
|
643
|
-
|
644
|
-
rb_define_method(
|
645
|
-
rb_define_method(
|
646
|
-
rb_define_method(
|
647
|
-
rb_define_method(
|
648
|
-
rb_define_method(
|
649
|
-
rb_define_method(
|
650
|
-
rb_define_method(
|
651
|
-
rb_define_method(
|
652
|
-
rb_define_method(
|
653
|
-
rb_define_method(
|
654
|
-
rb_define_method(
|
655
|
-
rb_define_method(
|
656
|
-
rb_define_method(
|
657
|
-
rb_define_method(
|
658
|
-
rb_define_method(
|
659
|
-
rb_define_method(
|
660
|
-
rb_define_method(
|
661
|
-
rb_define_method(
|
662
|
-
rb_define_method(
|
663
|
-
rb_define_method(
|
664
|
-
rb_define_method(
|
665
|
-
rb_define_method(
|
666
|
-
|
667
|
-
|
729
|
+
cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
|
730
|
+
|
731
|
+
rb_undef_alloc_func(cNokogiriXmlReader);
|
732
|
+
|
733
|
+
rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1);
|
734
|
+
rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1);
|
735
|
+
|
736
|
+
rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
|
737
|
+
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
|
738
|
+
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
739
|
+
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
|
740
|
+
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
741
|
+
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
742
|
+
rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
|
743
|
+
rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
|
744
|
+
rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
|
745
|
+
rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
|
746
|
+
rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
|
747
|
+
rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
|
748
|
+
rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
|
749
|
+
rb_define_method(cNokogiriXmlReader, "name", name, 0);
|
750
|
+
rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
|
751
|
+
rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
|
752
|
+
rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
|
753
|
+
rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
|
754
|
+
rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
|
755
|
+
rb_define_method(cNokogiriXmlReader, "read", read_more, 0);
|
756
|
+
rb_define_method(cNokogiriXmlReader, "state", state, 0);
|
757
|
+
rb_define_method(cNokogiriXmlReader, "value", value, 0);
|
758
|
+
rb_define_method(cNokogiriXmlReader, "value?", value_eh, 0);
|
759
|
+
rb_define_method(cNokogiriXmlReader, "xml_version", xml_version, 0);
|
668
760
|
}
|