nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -1,13 +1,30 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlReader;
|
4
|
+
|
5
|
+
static void
|
6
|
+
xml_reader_deallocate(void *data)
|
4
7
|
{
|
5
|
-
|
8
|
+
// free the document separately because we _may_ have triggered preservation by calling
|
9
|
+
// xmlTextReaderCurrentDoc during a read_more.
|
10
|
+
xmlTextReaderPtr reader = data;
|
11
|
+
xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
|
6
12
|
xmlFreeTextReader(reader);
|
7
|
-
|
13
|
+
if (doc) {
|
14
|
+
xmlFreeDoc(doc);
|
15
|
+
}
|
8
16
|
}
|
9
17
|
|
10
|
-
static
|
18
|
+
static const rb_data_type_t xml_text_reader_type = {
|
19
|
+
.wrap_struct_name = "xmlTextReader",
|
20
|
+
.function = {
|
21
|
+
.dfree = xml_reader_deallocate,
|
22
|
+
},
|
23
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
24
|
+
};
|
25
|
+
|
26
|
+
static int
|
27
|
+
has_attributes(xmlTextReaderPtr reader)
|
11
28
|
{
|
12
29
|
/*
|
13
30
|
* this implementation of xmlTextReaderHasAttributes explicitly includes
|
@@ -16,47 +33,40 @@ static int has_attributes(xmlTextReaderPtr reader)
|
|
16
33
|
*/
|
17
34
|
xmlNodePtr node ;
|
18
35
|
node = xmlTextReaderCurrentNode(reader);
|
19
|
-
if (node == NULL)
|
20
|
-
return(0);
|
36
|
+
if (node == NULL) {
|
37
|
+
return (0);
|
38
|
+
}
|
21
39
|
|
22
40
|
if ((node->type == XML_ELEMENT_NODE) &&
|
23
|
-
((node->properties != NULL) || (node->nsDef != NULL)))
|
24
|
-
return(1);
|
25
|
-
|
41
|
+
((node->properties != NULL) || (node->nsDef != NULL))) {
|
42
|
+
return (1);
|
43
|
+
}
|
44
|
+
return (0);
|
26
45
|
}
|
27
46
|
|
28
|
-
|
47
|
+
// TODO: merge this function into the `namespaces` method implementation
|
48
|
+
static void
|
49
|
+
Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
|
29
50
|
{
|
30
51
|
xmlNsPtr ns;
|
31
|
-
|
32
|
-
char *key ;
|
33
|
-
size_t keylen ;
|
52
|
+
VALUE key;
|
34
53
|
|
35
|
-
if (node->type != XML_ELEMENT_NODE) return ;
|
54
|
+
if (node->type != XML_ELEMENT_NODE) { return ; }
|
36
55
|
|
37
56
|
ns = node->nsDef;
|
38
57
|
while (ns != NULL) {
|
39
58
|
|
40
|
-
|
41
|
-
if (keylen > XMLNS_BUFFER_LEN) {
|
42
|
-
key = (char*)malloc(keylen) ;
|
43
|
-
} else {
|
44
|
-
key = buffer ;
|
45
|
-
}
|
46
|
-
|
59
|
+
key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding());
|
47
60
|
if (ns->prefix) {
|
48
|
-
|
49
|
-
|
50
|
-
sprintf(key, "%s", XMLNS_PREFIX);
|
61
|
+
rb_str_cat_cstr(key, ":");
|
62
|
+
rb_str_cat_cstr(key, (const char *)ns->prefix);
|
51
63
|
}
|
52
64
|
|
65
|
+
key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding());
|
53
66
|
rb_hash_aset(attr_hash,
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
if (key != buffer) {
|
58
|
-
free(key);
|
59
|
-
}
|
67
|
+
key,
|
68
|
+
(ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
|
69
|
+
);
|
60
70
|
ns = ns->next ;
|
61
71
|
}
|
62
72
|
}
|
@@ -68,15 +78,16 @@ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
|
|
68
78
|
*
|
69
79
|
* Was an attribute generated from the default value in the DTD or schema?
|
70
80
|
*/
|
71
|
-
static VALUE
|
81
|
+
static VALUE
|
82
|
+
default_eh(VALUE self)
|
72
83
|
{
|
73
84
|
xmlTextReaderPtr reader;
|
74
85
|
int eh;
|
75
86
|
|
76
|
-
|
87
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
77
88
|
eh = xmlTextReaderIsDefault(reader);
|
78
|
-
if(eh == 0) return Qfalse;
|
79
|
-
if(eh == 1) return Qtrue;
|
89
|
+
if (eh == 0) { return Qfalse; }
|
90
|
+
if (eh == 1) { return Qtrue; }
|
80
91
|
|
81
92
|
return Qnil;
|
82
93
|
}
|
@@ -87,15 +98,16 @@ static VALUE default_eh(VALUE self)
|
|
87
98
|
*
|
88
99
|
* Does this node have a text value?
|
89
100
|
*/
|
90
|
-
static VALUE
|
101
|
+
static VALUE
|
102
|
+
value_eh(VALUE self)
|
91
103
|
{
|
92
104
|
xmlTextReaderPtr reader;
|
93
105
|
int eh;
|
94
106
|
|
95
|
-
|
107
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
96
108
|
eh = xmlTextReaderHasValue(reader);
|
97
|
-
if(eh == 0) return Qfalse;
|
98
|
-
if(eh == 1) return Qtrue;
|
109
|
+
if (eh == 0) { return Qfalse; }
|
110
|
+
if (eh == 1) { return Qtrue; }
|
99
111
|
|
100
112
|
return Qnil;
|
101
113
|
}
|
@@ -106,15 +118,16 @@ static VALUE value_eh(VALUE self)
|
|
106
118
|
*
|
107
119
|
* Does this node have attributes?
|
108
120
|
*/
|
109
|
-
static VALUE
|
121
|
+
static VALUE
|
122
|
+
attributes_eh(VALUE self)
|
110
123
|
{
|
111
124
|
xmlTextReaderPtr reader;
|
112
125
|
int eh;
|
113
126
|
|
114
|
-
|
127
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
115
128
|
eh = has_attributes(reader);
|
116
|
-
if(eh == 0) return Qfalse;
|
117
|
-
if(eh == 1) return Qtrue;
|
129
|
+
if (eh == 0) { return Qfalse; }
|
130
|
+
if (eh == 1) { return Qtrue; }
|
118
131
|
|
119
132
|
return Qnil;
|
120
133
|
}
|
@@ -125,52 +138,94 @@ static VALUE attributes_eh(VALUE self)
|
|
125
138
|
*
|
126
139
|
* Get a hash of namespaces for this Node
|
127
140
|
*/
|
128
|
-
static VALUE
|
141
|
+
static VALUE
|
142
|
+
rb_xml_reader_namespaces(VALUE rb_reader)
|
129
143
|
{
|
130
|
-
|
131
|
-
|
132
|
-
|
144
|
+
VALUE rb_namespaces = rb_hash_new() ;
|
145
|
+
xmlTextReaderPtr c_reader;
|
146
|
+
xmlNodePtr c_node;
|
147
|
+
VALUE rb_errors;
|
133
148
|
|
134
|
-
|
149
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
135
150
|
|
136
|
-
|
151
|
+
if (! has_attributes(c_reader)) {
|
152
|
+
return rb_namespaces ;
|
153
|
+
}
|
137
154
|
|
138
|
-
|
139
|
-
return attr ;
|
155
|
+
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
140
156
|
|
141
|
-
|
142
|
-
|
157
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
158
|
+
c_node = xmlTextReaderExpand(c_reader);
|
159
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
143
160
|
|
144
|
-
|
161
|
+
if (c_node == NULL) {
|
162
|
+
if (RARRAY_LEN(rb_errors) > 0) {
|
163
|
+
VALUE rb_error = rb_ary_entry(rb_errors, 0);
|
164
|
+
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
|
165
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
166
|
+
}
|
167
|
+
return Qnil;
|
168
|
+
}
|
169
|
+
|
170
|
+
Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
|
145
171
|
|
146
|
-
return
|
172
|
+
return rb_namespaces ;
|
147
173
|
}
|
148
174
|
|
149
175
|
/*
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
176
|
+
:call-seq: attribute_hash() → Hash<String ⇒ String>
|
177
|
+
|
178
|
+
Get the attributes of the current node as a Hash of names and values.
|
179
|
+
|
180
|
+
See related: #attributes and #namespaces
|
154
181
|
*/
|
155
|
-
static VALUE
|
182
|
+
static VALUE
|
183
|
+
rb_xml_reader_attribute_hash(VALUE rb_reader)
|
156
184
|
{
|
157
|
-
|
158
|
-
|
159
|
-
|
185
|
+
VALUE rb_attributes = rb_hash_new();
|
186
|
+
xmlTextReaderPtr c_reader;
|
187
|
+
xmlNodePtr c_node;
|
188
|
+
xmlAttrPtr c_property;
|
189
|
+
VALUE rb_errors;
|
160
190
|
|
161
|
-
|
191
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
162
192
|
|
163
|
-
|
193
|
+
if (!has_attributes(c_reader)) {
|
194
|
+
return rb_attributes;
|
195
|
+
}
|
164
196
|
|
165
|
-
|
166
|
-
return attr ;
|
197
|
+
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
167
198
|
|
168
|
-
|
169
|
-
|
199
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
200
|
+
c_node = xmlTextReaderExpand(c_reader);
|
201
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
170
202
|
|
171
|
-
|
203
|
+
if (c_node == NULL) {
|
204
|
+
if (RARRAY_LEN(rb_errors) > 0) {
|
205
|
+
VALUE rb_error = rb_ary_entry(rb_errors, 0);
|
206
|
+
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
|
207
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
208
|
+
}
|
209
|
+
return Qnil;
|
210
|
+
}
|
211
|
+
|
212
|
+
c_property = c_node->properties;
|
213
|
+
while (c_property != NULL) {
|
214
|
+
VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
|
215
|
+
VALUE rb_value = Qnil;
|
216
|
+
xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
|
217
|
+
|
218
|
+
if (c_value) {
|
219
|
+
rb_value = NOKOGIRI_STR_NEW2(c_value);
|
220
|
+
xmlFree(c_value);
|
221
|
+
}
|
172
222
|
|
173
|
-
|
223
|
+
rb_hash_aset(rb_attributes, rb_name, rb_value);
|
224
|
+
|
225
|
+
c_property = c_property->next;
|
226
|
+
}
|
227
|
+
|
228
|
+
return rb_attributes;
|
174
229
|
}
|
175
230
|
|
176
231
|
/*
|
@@ -179,22 +234,23 @@ static VALUE attribute_nodes(VALUE self)
|
|
179
234
|
*
|
180
235
|
* Get the value of attribute at +index+
|
181
236
|
*/
|
182
|
-
static VALUE
|
237
|
+
static VALUE
|
238
|
+
attribute_at(VALUE self, VALUE index)
|
183
239
|
{
|
184
240
|
xmlTextReaderPtr reader;
|
185
241
|
xmlChar *value;
|
186
242
|
VALUE rb_value;
|
187
243
|
|
188
|
-
|
244
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
189
245
|
|
190
|
-
if(NIL_P(index)) return Qnil;
|
246
|
+
if (NIL_P(index)) { return Qnil; }
|
191
247
|
index = rb_Integer(index);
|
192
248
|
|
193
249
|
value = xmlTextReaderGetAttributeNo(
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
if(value == NULL) return Qnil;
|
250
|
+
reader,
|
251
|
+
(int)NUM2INT(index)
|
252
|
+
);
|
253
|
+
if (value == NULL) { return Qnil; }
|
198
254
|
|
199
255
|
rb_value = NOKOGIRI_STR_NEW2(value);
|
200
256
|
xmlFree(value);
|
@@ -207,19 +263,20 @@ static VALUE attribute_at(VALUE self, VALUE index)
|
|
207
263
|
*
|
208
264
|
* Get the value of attribute named +name+
|
209
265
|
*/
|
210
|
-
static VALUE
|
266
|
+
static VALUE
|
267
|
+
reader_attribute(VALUE self, VALUE name)
|
211
268
|
{
|
212
269
|
xmlTextReaderPtr reader;
|
213
270
|
xmlChar *value ;
|
214
271
|
VALUE rb_value;
|
215
272
|
|
216
|
-
|
273
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
217
274
|
|
218
|
-
if(NIL_P(name)) return Qnil;
|
275
|
+
if (NIL_P(name)) { return Qnil; }
|
219
276
|
name = StringValue(name) ;
|
220
277
|
|
221
|
-
value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValueCStr(name));
|
222
|
-
if(value == NULL) return Qnil;
|
278
|
+
value = xmlTextReaderGetAttribute(reader, (xmlChar *)StringValueCStr(name));
|
279
|
+
if (value == NULL) { return Qnil; }
|
223
280
|
|
224
281
|
rb_value = NOKOGIRI_STR_NEW2(value);
|
225
282
|
xmlFree(value);
|
@@ -232,16 +289,17 @@ static VALUE reader_attribute(VALUE self, VALUE name)
|
|
232
289
|
*
|
233
290
|
* Get the number of attributes for the current node
|
234
291
|
*/
|
235
|
-
static VALUE
|
292
|
+
static VALUE
|
293
|
+
attribute_count(VALUE self)
|
236
294
|
{
|
237
295
|
xmlTextReaderPtr reader;
|
238
296
|
int count;
|
239
297
|
|
240
|
-
|
298
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
241
299
|
count = xmlTextReaderAttributeCount(reader);
|
242
|
-
if(count == -1) return Qnil;
|
300
|
+
if (count == -1) { return Qnil; }
|
243
301
|
|
244
|
-
return INT2NUM(
|
302
|
+
return INT2NUM(count);
|
245
303
|
}
|
246
304
|
|
247
305
|
/*
|
@@ -250,16 +308,17 @@ static VALUE attribute_count(VALUE self)
|
|
250
308
|
*
|
251
309
|
* Get the depth of the node
|
252
310
|
*/
|
253
|
-
static VALUE
|
311
|
+
static VALUE
|
312
|
+
depth(VALUE self)
|
254
313
|
{
|
255
314
|
xmlTextReaderPtr reader;
|
256
315
|
int depth;
|
257
316
|
|
258
|
-
|
317
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
259
318
|
depth = xmlTextReaderDepth(reader);
|
260
|
-
if(depth == -1) return Qnil;
|
319
|
+
if (depth == -1) { return Qnil; }
|
261
320
|
|
262
|
-
return INT2NUM(
|
321
|
+
return INT2NUM(depth);
|
263
322
|
}
|
264
323
|
|
265
324
|
/*
|
@@ -268,14 +327,15 @@ static VALUE depth(VALUE self)
|
|
268
327
|
*
|
269
328
|
* Get the XML version of the document being read
|
270
329
|
*/
|
271
|
-
static VALUE
|
330
|
+
static VALUE
|
331
|
+
xml_version(VALUE self)
|
272
332
|
{
|
273
333
|
xmlTextReaderPtr reader;
|
274
334
|
const char *version;
|
275
335
|
|
276
|
-
|
336
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
277
337
|
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
278
|
-
if(version == NULL) return Qnil;
|
338
|
+
if (version == NULL) { return Qnil; }
|
279
339
|
|
280
340
|
return NOKOGIRI_STR_NEW2(version);
|
281
341
|
}
|
@@ -286,14 +346,15 @@ static VALUE xml_version(VALUE self)
|
|
286
346
|
*
|
287
347
|
* Get the xml:lang scope within which the node resides.
|
288
348
|
*/
|
289
|
-
static VALUE
|
349
|
+
static VALUE
|
350
|
+
lang(VALUE self)
|
290
351
|
{
|
291
352
|
xmlTextReaderPtr reader;
|
292
353
|
const char *lang;
|
293
354
|
|
294
|
-
|
355
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
295
356
|
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
296
|
-
if(lang == NULL) return Qnil;
|
357
|
+
if (lang == NULL) { return Qnil; }
|
297
358
|
|
298
359
|
return NOKOGIRI_STR_NEW2(lang);
|
299
360
|
}
|
@@ -304,14 +365,15 @@ static VALUE lang(VALUE self)
|
|
304
365
|
*
|
305
366
|
* Get the text value of the node if present. Returns a utf-8 encoded string.
|
306
367
|
*/
|
307
|
-
static VALUE
|
368
|
+
static VALUE
|
369
|
+
value(VALUE self)
|
308
370
|
{
|
309
371
|
xmlTextReaderPtr reader;
|
310
372
|
const char *value;
|
311
373
|
|
312
|
-
|
374
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
313
375
|
value = (const char *)xmlTextReaderConstValue(reader);
|
314
|
-
if(value == NULL) return Qnil;
|
376
|
+
if (value == NULL) { return Qnil; }
|
315
377
|
|
316
378
|
return NOKOGIRI_STR_NEW2(value);
|
317
379
|
}
|
@@ -322,14 +384,15 @@ static VALUE value(VALUE self)
|
|
322
384
|
*
|
323
385
|
* Get the shorthand reference to the namespace associated with the node.
|
324
386
|
*/
|
325
|
-
static VALUE
|
387
|
+
static VALUE
|
388
|
+
prefix(VALUE self)
|
326
389
|
{
|
327
390
|
xmlTextReaderPtr reader;
|
328
391
|
const char *prefix;
|
329
392
|
|
330
|
-
|
393
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
331
394
|
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
332
|
-
if(prefix == NULL) return Qnil;
|
395
|
+
if (prefix == NULL) { return Qnil; }
|
333
396
|
|
334
397
|
return NOKOGIRI_STR_NEW2(prefix);
|
335
398
|
}
|
@@ -340,14 +403,15 @@ static VALUE prefix(VALUE self)
|
|
340
403
|
*
|
341
404
|
* Get the URI defining the namespace associated with the node
|
342
405
|
*/
|
343
|
-
static VALUE
|
406
|
+
static VALUE
|
407
|
+
namespace_uri(VALUE self)
|
344
408
|
{
|
345
409
|
xmlTextReaderPtr reader;
|
346
410
|
const char *uri;
|
347
411
|
|
348
|
-
|
412
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
349
413
|
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
350
|
-
if(uri == NULL) return Qnil;
|
414
|
+
if (uri == NULL) { return Qnil; }
|
351
415
|
|
352
416
|
return NOKOGIRI_STR_NEW2(uri);
|
353
417
|
}
|
@@ -358,14 +422,15 @@ static VALUE namespace_uri(VALUE self)
|
|
358
422
|
*
|
359
423
|
* Get the local name of the node
|
360
424
|
*/
|
361
|
-
static VALUE
|
425
|
+
static VALUE
|
426
|
+
local_name(VALUE self)
|
362
427
|
{
|
363
428
|
xmlTextReaderPtr reader;
|
364
429
|
const char *name;
|
365
430
|
|
366
|
-
|
431
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
367
432
|
name = (const char *)xmlTextReaderConstLocalName(reader);
|
368
|
-
if(name == NULL) return Qnil;
|
433
|
+
if (name == NULL) { return Qnil; }
|
369
434
|
|
370
435
|
return NOKOGIRI_STR_NEW2(name);
|
371
436
|
}
|
@@ -376,14 +441,15 @@ static VALUE local_name(VALUE self)
|
|
376
441
|
*
|
377
442
|
* Get the name of the node. Returns a utf-8 encoded string.
|
378
443
|
*/
|
379
|
-
static VALUE
|
444
|
+
static VALUE
|
445
|
+
name(VALUE self)
|
380
446
|
{
|
381
447
|
xmlTextReaderPtr reader;
|
382
448
|
const char *name;
|
383
449
|
|
384
|
-
|
450
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
385
451
|
name = (const char *)xmlTextReaderConstName(reader);
|
386
|
-
if(name == NULL) return Qnil;
|
452
|
+
if (name == NULL) { return Qnil; }
|
387
453
|
|
388
454
|
return NOKOGIRI_STR_NEW2(name);
|
389
455
|
}
|
@@ -394,16 +460,24 @@ static VALUE name(VALUE self)
|
|
394
460
|
*
|
395
461
|
* Get the xml:base of the node
|
396
462
|
*/
|
397
|
-
static VALUE
|
463
|
+
static VALUE
|
464
|
+
rb_xml_reader_base_uri(VALUE rb_reader)
|
398
465
|
{
|
399
|
-
|
400
|
-
|
466
|
+
VALUE rb_base_uri;
|
467
|
+
xmlTextReaderPtr c_reader;
|
468
|
+
xmlChar *c_base_uri;
|
469
|
+
|
470
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
471
|
+
|
472
|
+
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
473
|
+
if (c_base_uri == NULL) {
|
474
|
+
return Qnil;
|
475
|
+
}
|
401
476
|
|
402
|
-
|
403
|
-
|
404
|
-
if (base_uri == NULL) return Qnil;
|
477
|
+
rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
|
478
|
+
xmlFree(c_base_uri);
|
405
479
|
|
406
|
-
return
|
480
|
+
return rb_base_uri;
|
407
481
|
}
|
408
482
|
|
409
483
|
/*
|
@@ -412,11 +486,12 @@ static VALUE base_uri(VALUE self)
|
|
412
486
|
*
|
413
487
|
* Get the state of the reader
|
414
488
|
*/
|
415
|
-
static VALUE
|
489
|
+
static VALUE
|
490
|
+
state(VALUE self)
|
416
491
|
{
|
417
492
|
xmlTextReaderPtr reader;
|
418
|
-
|
419
|
-
return INT2NUM(
|
493
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
494
|
+
return INT2NUM(xmlTextReaderReadState(reader));
|
420
495
|
}
|
421
496
|
|
422
497
|
/*
|
@@ -425,11 +500,12 @@ static VALUE state(VALUE self)
|
|
425
500
|
*
|
426
501
|
* Get the type of readers current node
|
427
502
|
*/
|
428
|
-
static VALUE
|
503
|
+
static VALUE
|
504
|
+
node_type(VALUE self)
|
429
505
|
{
|
430
506
|
xmlTextReaderPtr reader;
|
431
|
-
|
432
|
-
return INT2NUM(
|
507
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
508
|
+
return INT2NUM(xmlTextReaderNodeType(reader));
|
433
509
|
}
|
434
510
|
|
435
511
|
/*
|
@@ -438,31 +514,42 @@ static VALUE node_type(VALUE self)
|
|
438
514
|
*
|
439
515
|
* Move the Reader forward through the XML document.
|
440
516
|
*/
|
441
|
-
static VALUE
|
517
|
+
static VALUE
|
518
|
+
read_more(VALUE rb_reader)
|
442
519
|
{
|
443
|
-
xmlTextReaderPtr
|
444
|
-
|
445
|
-
VALUE error_list;
|
446
|
-
int ret;
|
520
|
+
xmlTextReaderPtr c_reader;
|
521
|
+
libxmlStructuredErrorHandlerState handler_state;
|
447
522
|
|
448
|
-
|
523
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
449
524
|
|
450
|
-
|
525
|
+
VALUE rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
526
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
451
527
|
|
452
|
-
|
453
|
-
ret = xmlTextReaderRead(reader);
|
454
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
528
|
+
int status = xmlTextReaderRead(c_reader);
|
455
529
|
|
456
|
-
|
457
|
-
if(ret == 0) return Qnil;
|
530
|
+
noko__structured_error_func_restore(&handler_state);
|
458
531
|
|
459
|
-
|
460
|
-
if(
|
461
|
-
|
462
|
-
|
463
|
-
|
532
|
+
xmlDocPtr c_document = xmlTextReaderCurrentDoc(c_reader);
|
533
|
+
if (c_document && c_document->encoding == NULL) {
|
534
|
+
VALUE constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
535
|
+
if (RTEST(constructor_encoding)) {
|
536
|
+
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
537
|
+
} else {
|
538
|
+
rb_iv_set(rb_reader, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
|
539
|
+
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
540
|
+
}
|
541
|
+
}
|
464
542
|
|
465
|
-
return
|
543
|
+
if (status == 1) { return rb_reader; }
|
544
|
+
if (status == 0) { return Qnil; }
|
545
|
+
|
546
|
+
/* if we're here, there was an error */
|
547
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
548
|
+
if (RB_TEST(exception)) {
|
549
|
+
rb_exc_raise(exception);
|
550
|
+
} else {
|
551
|
+
rb_raise(rb_eRuntimeError, "Error pulling: %d", status);
|
552
|
+
}
|
466
553
|
}
|
467
554
|
|
468
555
|
/*
|
@@ -472,19 +559,20 @@ static VALUE read_more(VALUE self)
|
|
472
559
|
* Read the contents of the current node, including child nodes and markup.
|
473
560
|
* Returns a utf-8 encoded string.
|
474
561
|
*/
|
475
|
-
static VALUE
|
562
|
+
static VALUE
|
563
|
+
inner_xml(VALUE self)
|
476
564
|
{
|
477
565
|
xmlTextReaderPtr reader;
|
478
|
-
xmlChar*
|
566
|
+
xmlChar *value;
|
479
567
|
VALUE str;
|
480
568
|
|
481
|
-
|
569
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
482
570
|
|
483
571
|
value = xmlTextReaderReadInnerXml(reader);
|
484
572
|
|
485
573
|
str = Qnil;
|
486
|
-
if(value) {
|
487
|
-
str = NOKOGIRI_STR_NEW2((char*)value);
|
574
|
+
if (value) {
|
575
|
+
str = NOKOGIRI_STR_NEW2((char *)value);
|
488
576
|
xmlFree(value);
|
489
577
|
}
|
490
578
|
|
@@ -498,18 +586,19 @@ static VALUE inner_xml(VALUE self)
|
|
498
586
|
* Read the current node and its contents, including child nodes and markup.
|
499
587
|
* Returns a utf-8 encoded string.
|
500
588
|
*/
|
501
|
-
static VALUE
|
589
|
+
static VALUE
|
590
|
+
outer_xml(VALUE self)
|
502
591
|
{
|
503
592
|
xmlTextReaderPtr reader;
|
504
593
|
xmlChar *value;
|
505
594
|
VALUE str = Qnil;
|
506
595
|
|
507
|
-
|
596
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
508
597
|
|
509
598
|
value = xmlTextReaderReadOuterXml(reader);
|
510
599
|
|
511
|
-
if(value) {
|
512
|
-
str = NOKOGIRI_STR_NEW2((char*)value);
|
600
|
+
if (value) {
|
601
|
+
str = NOKOGIRI_STR_NEW2((char *)value);
|
513
602
|
xmlFree(value);
|
514
603
|
}
|
515
604
|
return str;
|
@@ -519,38 +608,41 @@ static VALUE outer_xml(VALUE self)
|
|
519
608
|
* call-seq:
|
520
609
|
* from_memory(string, url = nil, encoding = nil, options = 0)
|
521
610
|
*
|
522
|
-
* Create a new
|
611
|
+
* Create a new Reader to parse a String.
|
523
612
|
*/
|
524
|
-
static VALUE
|
613
|
+
static VALUE
|
614
|
+
from_memory(int argc, VALUE *argv, VALUE klass)
|
525
615
|
{
|
616
|
+
/* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
|
617
|
+
* become private. */
|
526
618
|
VALUE rb_buffer, rb_url, encoding, rb_options;
|
527
619
|
xmlTextReaderPtr reader;
|
528
|
-
const char *
|
529
|
-
const char *
|
620
|
+
const char *c_url = NULL;
|
621
|
+
const char *c_encoding = NULL;
|
530
622
|
int c_options = 0;
|
531
623
|
VALUE rb_reader, args[3];
|
532
624
|
|
533
625
|
rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
|
534
626
|
|
535
|
-
if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
|
536
|
-
if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
|
537
|
-
if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
|
538
|
-
if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
|
627
|
+
if (!RTEST(rb_buffer)) { rb_raise(rb_eArgError, "string cannot be nil"); }
|
628
|
+
if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
|
629
|
+
if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
|
630
|
+
if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
|
539
631
|
|
540
632
|
reader = xmlReaderForMemory(
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
if(reader == NULL) {
|
633
|
+
StringValuePtr(rb_buffer),
|
634
|
+
(int)RSTRING_LEN(rb_buffer),
|
635
|
+
c_url,
|
636
|
+
c_encoding,
|
637
|
+
c_options
|
638
|
+
);
|
639
|
+
|
640
|
+
if (reader == NULL) {
|
549
641
|
xmlFreeTextReader(reader);
|
550
642
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
551
643
|
}
|
552
644
|
|
553
|
-
rb_reader =
|
645
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
|
554
646
|
args[0] = rb_buffer;
|
555
647
|
args[1] = rb_url;
|
556
648
|
args[2] = encoding;
|
@@ -563,39 +655,42 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
|
|
563
655
|
* call-seq:
|
564
656
|
* from_io(io, url = nil, encoding = nil, options = 0)
|
565
657
|
*
|
566
|
-
* Create a new
|
658
|
+
* Create a new Reader to parse an IO stream.
|
567
659
|
*/
|
568
|
-
static VALUE
|
660
|
+
static VALUE
|
661
|
+
from_io(int argc, VALUE *argv, VALUE klass)
|
569
662
|
{
|
663
|
+
/* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
|
664
|
+
* become private. */
|
570
665
|
VALUE rb_io, rb_url, encoding, rb_options;
|
571
666
|
xmlTextReaderPtr reader;
|
572
|
-
const char *
|
573
|
-
const char *
|
667
|
+
const char *c_url = NULL;
|
668
|
+
const char *c_encoding = NULL;
|
574
669
|
int c_options = 0;
|
575
670
|
VALUE rb_reader, args[3];
|
576
671
|
|
577
672
|
rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
|
578
673
|
|
579
|
-
if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
|
580
|
-
if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
|
581
|
-
if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
|
582
|
-
if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
|
674
|
+
if (!RTEST(rb_io)) { rb_raise(rb_eArgError, "io cannot be nil"); }
|
675
|
+
if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
|
676
|
+
if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
|
677
|
+
if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
|
583
678
|
|
584
679
|
reader = xmlReaderForIO(
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
if(reader == NULL) {
|
680
|
+
(xmlInputReadCallback)noko_io_read,
|
681
|
+
(xmlInputCloseCallback)noko_io_close,
|
682
|
+
(void *)rb_io,
|
683
|
+
c_url,
|
684
|
+
c_encoding,
|
685
|
+
c_options
|
686
|
+
);
|
687
|
+
|
688
|
+
if (reader == NULL) {
|
594
689
|
xmlFreeTextReader(reader);
|
595
690
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
596
691
|
}
|
597
692
|
|
598
|
-
rb_reader =
|
693
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
|
599
694
|
args[0] = rb_io;
|
600
695
|
args[1] = rb_url;
|
601
696
|
args[2] = encoding;
|
@@ -610,59 +705,73 @@ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
|
|
610
705
|
*
|
611
706
|
* Returns true if the current node is empty, otherwise false.
|
612
707
|
*/
|
613
|
-
static VALUE
|
708
|
+
static VALUE
|
709
|
+
empty_element_p(VALUE self)
|
614
710
|
{
|
615
711
|
xmlTextReaderPtr reader;
|
616
712
|
|
617
|
-
|
713
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
618
714
|
|
619
|
-
if(xmlTextReaderIsEmptyElement(reader))
|
715
|
+
if (xmlTextReaderIsEmptyElement(reader)) {
|
620
716
|
return Qtrue;
|
717
|
+
}
|
621
718
|
|
622
719
|
return Qfalse;
|
623
720
|
}
|
624
721
|
|
625
|
-
VALUE
|
626
|
-
|
627
|
-
void init_xml_reader()
|
722
|
+
static VALUE
|
723
|
+
rb_xml_reader_encoding(VALUE rb_reader)
|
628
724
|
{
|
629
|
-
|
630
|
-
|
725
|
+
xmlTextReaderPtr c_reader;
|
726
|
+
const char *parser_encoding;
|
727
|
+
VALUE constructor_encoding;
|
728
|
+
|
729
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
730
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
731
|
+
if (parser_encoding) {
|
732
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
733
|
+
}
|
631
734
|
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
rb_define_method(
|
651
|
-
rb_define_method(
|
652
|
-
rb_define_method(
|
653
|
-
rb_define_method(
|
654
|
-
rb_define_method(
|
655
|
-
rb_define_method(
|
656
|
-
rb_define_method(
|
657
|
-
rb_define_method(
|
658
|
-
rb_define_method(
|
659
|
-
rb_define_method(
|
660
|
-
rb_define_method(
|
661
|
-
rb_define_method(
|
662
|
-
rb_define_method(
|
663
|
-
rb_define_method(
|
664
|
-
rb_define_method(
|
665
|
-
rb_define_method(
|
666
|
-
|
667
|
-
|
735
|
+
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
736
|
+
if (RTEST(constructor_encoding)) {
|
737
|
+
return constructor_encoding;
|
738
|
+
}
|
739
|
+
|
740
|
+
return Qnil;
|
741
|
+
}
|
742
|
+
|
743
|
+
void
|
744
|
+
noko_init_xml_reader(void)
|
745
|
+
{
|
746
|
+
cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
|
747
|
+
|
748
|
+
rb_undef_alloc_func(cNokogiriXmlReader);
|
749
|
+
|
750
|
+
rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1);
|
751
|
+
rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1);
|
752
|
+
|
753
|
+
rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
|
754
|
+
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
|
755
|
+
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
756
|
+
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
|
757
|
+
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
758
|
+
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
759
|
+
rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
|
760
|
+
rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
|
761
|
+
rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
|
762
|
+
rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
|
763
|
+
rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
|
764
|
+
rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
|
765
|
+
rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
|
766
|
+
rb_define_method(cNokogiriXmlReader, "name", name, 0);
|
767
|
+
rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
|
768
|
+
rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
|
769
|
+
rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
|
770
|
+
rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
|
771
|
+
rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
|
772
|
+
rb_define_method(cNokogiriXmlReader, "read", read_more, 0);
|
773
|
+
rb_define_method(cNokogiriXmlReader, "state", state, 0);
|
774
|
+
rb_define_method(cNokogiriXmlReader, "value", value, 0);
|
775
|
+
rb_define_method(cNokogiriXmlReader, "value?", value_eh, 0);
|
776
|
+
rb_define_method(cNokogiriXmlReader, "xml_version", xml_version, 0);
|
668
777
|
}
|