nokogiri 1.0.0 → 1.6.8.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.autotest +26 -0
- data/.cross_rubies +9 -0
- data/.editorconfig +17 -0
- data/.gemtest +0 -0
- data/.travis.yml +51 -0
- data/CHANGELOG.rdoc +1160 -0
- data/CONTRIBUTING.md +42 -0
- data/C_CODING_STYLE.rdoc +33 -0
- data/Gemfile +22 -0
- data/LICENSE.txt +31 -0
- data/Manifest.txt +284 -40
- data/README.md +166 -0
- data/ROADMAP.md +111 -0
- data/Rakefile +310 -199
- data/STANDARD_RESPONSES.md +47 -0
- data/Y_U_NO_GEMSPEC.md +155 -0
- data/appveyor.yml +22 -0
- data/bin/nokogiri +118 -0
- data/build_all +45 -0
- data/dependencies.yml +29 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +664 -34
- data/ext/nokogiri/html_document.c +120 -33
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +279 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +116 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +145 -0
- data/ext/nokogiri/nokogiri.h +131 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +23 -19
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +69 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +501 -54
- data/ext/nokogiri/xml_document.h +14 -1
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +109 -24
- data/ext/nokogiri/xml_dtd.h +3 -1
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +60 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +117 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1285 -315
- data/ext/nokogiri/xml_node.h +4 -6
- data/ext/nokogiri/xml_node_set.c +415 -54
- data/ext/nokogiri/xml_node_set.h +6 -2
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +316 -77
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +161 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +215 -80
- data/ext/nokogiri/xml_sax_parser.h +30 -1
- data/ext/nokogiri/xml_sax_parser_context.c +262 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +45 -175
- data/ext/nokogiri/xml_syntax_error.h +4 -2
- data/ext/nokogiri/xml_text.c +37 -14
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +230 -13
- data/ext/nokogiri/xml_xpath_context.h +2 -1
- data/ext/nokogiri/xslt_stylesheet.c +196 -34
- data/ext/nokogiri/xslt_stylesheet.h +6 -1
- data/lib/nokogiri/css/node.rb +18 -61
- data/lib/nokogiri/css/parser.rb +725 -17
- data/lib/nokogiri/css/parser.y +126 -63
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +148 -5
- data/lib/nokogiri/css/tokenizer.rex +31 -39
- data/lib/nokogiri/css/xpath_visitor.rb +109 -51
- data/lib/nokogiri/css.rb +24 -3
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +329 -3
- data/lib/nokogiri/html/document_fragment.rb +39 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +35 -4
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/html/sax/push_parser.rb +36 -0
- data/lib/nokogiri/html.rb +18 -76
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +106 -1
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +395 -31
- data/lib/nokogiri/xml/cdata.rb +4 -2
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +267 -12
- data/lib/nokogiri/xml/document_fragment.rb +149 -0
- data/lib/nokogiri/xml/dtd.rb +27 -1
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node.rb +748 -109
- data/lib/nokogiri/xml/node_set.rb +200 -72
- data/lib/nokogiri/xml/parse_options.rb +120 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +102 -4
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax/document.rb +114 -2
- data/lib/nokogiri/xml/sax/parser.rb +97 -7
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/sax.rb +2 -7
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/searchable.rb +221 -0
- data/lib/nokogiri/xml/syntax_error.rb +27 -1
- data/lib/nokogiri/xml/text.rb +4 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath.rb +4 -0
- data/lib/nokogiri/xml/xpath_context.rb +3 -1
- data/lib/nokogiri/xml.rb +45 -38
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/nokogiri/xslt.rb +47 -2
- data/lib/nokogiri.rb +117 -24
- data/lib/xsd/xmlparser/nokogiri.rb +102 -0
- data/patches/sort-patches-by-date +25 -0
- data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
- data/suppressions/README.txt +1 -0
- data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
- data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
- data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
- data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +73 -6
- data/test/css/test_parser.rb +184 -39
- data/test/css/test_tokenizer.rb +72 -19
- data/test/css/test_xpath_visitor.rb +44 -2
- data/test/decorators/test_slop.rb +20 -0
- data/test/files/2ch.html +108 -0
- data/test/files/GH_1042.html +18 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/atom.xml +344 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/namespace_pressure_test.xml +1684 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/saml/saml20assertion_schema.xsd +283 -0
- data/test/files/saml/saml20protocol_schema.xsd +302 -0
- data/test/files/saml/xenc_schema.xsd +146 -0
- data/test/files/saml/xmldsig_schema.xsd +318 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/shift_jis_no_charset.html +9 -0
- data/test/files/slow-xpath.xml +25509 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/test_document_url/bar.xml +2 -0
- data/test/files/test_document_url/document.dtd +4 -0
- data/test/files/test_document_url/document.xml +6 -0
- data/test/files/tlm.html +2 -1
- data/test/files/to_be_xincluded.xml +2 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/files/xinclude.xml +4 -0
- data/test/helper.rb +124 -13
- data/test/html/sax/test_parser.rb +118 -4
- data/test/html/sax/test_parser_context.rb +46 -0
- data/test/html/sax/test_push_parser.rb +87 -0
- data/test/html/test_builder.rb +94 -8
- data/test/html/test_document.rb +626 -11
- data/test/html/test_document_encoding.rb +145 -0
- data/test/html/test_document_fragment.rb +301 -0
- data/test/html/test_element_description.rb +105 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +212 -0
- data/test/html/test_node_encoding.rb +85 -0
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
- data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
- data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
- data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
- data/test/namespaces/test_namespaces_preservation.rb +31 -0
- data/test/test_convert_xpath.rb +2 -47
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +48 -0
- data/test/test_memory_leak.rb +156 -0
- data/test/test_nokogiri.rb +103 -1
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +293 -8
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +309 -8
- data/test/xml/sax/test_parser_context.rb +115 -0
- data/test/xml/sax/test_push_parser.rb +157 -0
- data/test/xml/test_attr.rb +67 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +327 -2
- data/test/xml/test_c14n.rb +180 -0
- data/test/xml/test_cdata.rb +32 -2
- data/test/xml/test_comment.rb +40 -0
- data/test/xml/test_document.rb +846 -35
- data/test/xml/test_document_encoding.rb +31 -0
- data/test/xml/test_document_fragment.rb +271 -0
- data/test/xml/test_dtd.rb +153 -9
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +251 -0
- data/test/xml/test_namespace.rb +96 -0
- data/test/xml/test_node.rb +1126 -105
- data/test/xml/test_node_attributes.rb +115 -0
- data/test/xml/test_node_encoding.rb +69 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +549 -0
- data/test/xml/test_node_set.rb +668 -9
- data/test/xml/test_parse_options.rb +64 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader.rb +589 -0
- data/test/xml/test_reader_encoding.rb +134 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +142 -0
- data/test/xml/test_syntax_error.rb +30 -0
- data/test/xml/test_text.rb +49 -2
- data/test/xml/test_unparented_node.rb +440 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +445 -0
- data/test/xslt/test_custom_functions.rb +133 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- data/test_all +107 -0
- metadata +459 -115
- data/History.txt +0 -6
- data/README.ja.txt +0 -86
- data/README.txt +0 -87
- data/ext/nokogiri/html_sax_parser.c +0 -32
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/ext/nokogiri/native.c +0 -40
- data/ext/nokogiri/native.h +0 -51
- data/ext/nokogiri/xml_xpath.c +0 -46
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/lib/nokogiri/css/generated_parser.rb +0 -653
- data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators.rb +0 -1
- data/lib/nokogiri/hpricot.rb +0 -47
- data/lib/nokogiri/xml/after_handler.rb +0 -18
- data/lib/nokogiri/xml/before_handler.rb +0 -32
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/entity_declaration.rb +0 -9
- data/nokogiri.gemspec +0 -34
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -7
- data/test/hpricot/test_alter.rb +0 -67
- data/test/hpricot/test_builder.rb +0 -27
- data/test/hpricot/test_parser.rb +0 -423
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -78
- data/test/hpricot/test_xml.rb +0 -30
- data/test/test_reader.rb +0 -222
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -25,6 +25,43 @@ static int has_attributes(xmlTextReaderPtr reader)
|
|
25
25
|
return(0);
|
26
26
|
}
|
27
27
|
|
28
|
+
static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
|
29
|
+
{
|
30
|
+
xmlNsPtr ns;
|
31
|
+
static char buffer[XMLNS_BUFFER_LEN] ;
|
32
|
+
char *key ;
|
33
|
+
size_t keylen ;
|
34
|
+
|
35
|
+
if (node->type != XML_ELEMENT_NODE) return ;
|
36
|
+
|
37
|
+
ns = node->nsDef;
|
38
|
+
while (ns != NULL) {
|
39
|
+
|
40
|
+
keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
|
41
|
+
if (keylen > XMLNS_BUFFER_LEN) {
|
42
|
+
key = (char*)malloc(keylen) ;
|
43
|
+
} else {
|
44
|
+
key = buffer ;
|
45
|
+
}
|
46
|
+
|
47
|
+
if (ns->prefix) {
|
48
|
+
sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
|
49
|
+
} else {
|
50
|
+
sprintf(key, "%s", XMLNS_PREFIX);
|
51
|
+
}
|
52
|
+
|
53
|
+
rb_hash_aset(attr_hash,
|
54
|
+
NOKOGIRI_STR_NEW2(key),
|
55
|
+
(ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
|
56
|
+
);
|
57
|
+
if (key != buffer) {
|
58
|
+
free(key);
|
59
|
+
}
|
60
|
+
ns = ns->next ;
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
|
28
65
|
/*
|
29
66
|
* call-seq:
|
30
67
|
* default?
|
@@ -34,8 +71,10 @@ static int has_attributes(xmlTextReaderPtr reader)
|
|
34
71
|
static VALUE default_eh(VALUE self)
|
35
72
|
{
|
36
73
|
xmlTextReaderPtr reader;
|
74
|
+
int eh;
|
75
|
+
|
37
76
|
Data_Get_Struct(self, xmlTextReader, reader);
|
38
|
-
|
77
|
+
eh = xmlTextReaderIsDefault(reader);
|
39
78
|
if(eh == 0) return Qfalse;
|
40
79
|
if(eh == 1) return Qtrue;
|
41
80
|
|
@@ -51,8 +90,10 @@ static VALUE default_eh(VALUE self)
|
|
51
90
|
static VALUE value_eh(VALUE self)
|
52
91
|
{
|
53
92
|
xmlTextReaderPtr reader;
|
93
|
+
int eh;
|
94
|
+
|
54
95
|
Data_Get_Struct(self, xmlTextReader, reader);
|
55
|
-
|
96
|
+
eh = xmlTextReaderHasValue(reader);
|
56
97
|
if(eh == 0) return Qfalse;
|
57
98
|
if(eh == 1) return Qtrue;
|
58
99
|
|
@@ -68,8 +109,10 @@ static VALUE value_eh(VALUE self)
|
|
68
109
|
static VALUE attributes_eh(VALUE self)
|
69
110
|
{
|
70
111
|
xmlTextReaderPtr reader;
|
112
|
+
int eh;
|
113
|
+
|
71
114
|
Data_Get_Struct(self, xmlTextReader, reader);
|
72
|
-
|
115
|
+
eh = has_attributes(reader);
|
73
116
|
if(eh == 0) return Qfalse;
|
74
117
|
if(eh == 1) return Qtrue;
|
75
118
|
|
@@ -78,13 +121,14 @@ static VALUE attributes_eh(VALUE self)
|
|
78
121
|
|
79
122
|
/*
|
80
123
|
* call-seq:
|
81
|
-
*
|
124
|
+
* namespaces
|
82
125
|
*
|
83
|
-
* Get a
|
126
|
+
* Get a hash of namespaces for this Node
|
84
127
|
*/
|
85
|
-
static VALUE
|
128
|
+
static VALUE namespaces(VALUE self)
|
86
129
|
{
|
87
130
|
xmlTextReaderPtr reader;
|
131
|
+
xmlNodePtr ptr;
|
88
132
|
VALUE attr ;
|
89
133
|
|
90
134
|
Data_Get_Struct(self, xmlTextReader, reader);
|
@@ -94,10 +138,36 @@ static VALUE attributes(VALUE self)
|
|
94
138
|
if (! has_attributes(reader))
|
95
139
|
return attr ;
|
96
140
|
|
97
|
-
|
141
|
+
ptr = xmlTextReaderExpand(reader);
|
98
142
|
if(ptr == NULL) return Qnil;
|
99
143
|
|
100
144
|
Nokogiri_xml_node_namespaces(ptr, attr);
|
145
|
+
|
146
|
+
return attr ;
|
147
|
+
}
|
148
|
+
|
149
|
+
/*
|
150
|
+
* call-seq:
|
151
|
+
* attribute_nodes
|
152
|
+
*
|
153
|
+
* Get a list of attributes for this Node
|
154
|
+
*/
|
155
|
+
static VALUE attribute_nodes(VALUE self)
|
156
|
+
{
|
157
|
+
xmlTextReaderPtr reader;
|
158
|
+
xmlNodePtr ptr;
|
159
|
+
VALUE attr ;
|
160
|
+
|
161
|
+
Data_Get_Struct(self, xmlTextReader, reader);
|
162
|
+
|
163
|
+
attr = rb_ary_new() ;
|
164
|
+
|
165
|
+
if (! has_attributes(reader))
|
166
|
+
return attr ;
|
167
|
+
|
168
|
+
ptr = xmlTextReaderExpand(reader);
|
169
|
+
if(ptr == NULL) return Qnil;
|
170
|
+
|
101
171
|
Nokogiri_xml_node_properties(ptr, attr);
|
102
172
|
|
103
173
|
return attr ;
|
@@ -112,18 +182,21 @@ static VALUE attributes(VALUE self)
|
|
112
182
|
static VALUE attribute_at(VALUE self, VALUE index)
|
113
183
|
{
|
114
184
|
xmlTextReaderPtr reader;
|
185
|
+
xmlChar *value;
|
186
|
+
VALUE rb_value;
|
187
|
+
|
115
188
|
Data_Get_Struct(self, xmlTextReader, reader);
|
116
189
|
|
117
|
-
if(index
|
118
|
-
index =
|
190
|
+
if(NIL_P(index)) return Qnil;
|
191
|
+
index = rb_Integer(index);
|
119
192
|
|
120
|
-
|
193
|
+
value = xmlTextReaderGetAttributeNo(
|
121
194
|
reader,
|
122
|
-
NUM2INT(index)
|
195
|
+
(int)NUM2INT(index)
|
123
196
|
);
|
124
197
|
if(value == NULL) return Qnil;
|
125
198
|
|
126
|
-
|
199
|
+
rb_value = NOKOGIRI_STR_NEW2(value);
|
127
200
|
xmlFree(value);
|
128
201
|
return rb_value;
|
129
202
|
}
|
@@ -138,27 +211,17 @@ static VALUE reader_attribute(VALUE self, VALUE name)
|
|
138
211
|
{
|
139
212
|
xmlTextReaderPtr reader;
|
140
213
|
xmlChar *value ;
|
214
|
+
VALUE rb_value;
|
215
|
+
|
141
216
|
Data_Get_Struct(self, xmlTextReader, reader);
|
142
217
|
|
143
|
-
if(name
|
218
|
+
if(NIL_P(name)) return Qnil;
|
144
219
|
name = StringValue(name) ;
|
145
220
|
|
146
|
-
value = xmlTextReaderGetAttribute(reader, (xmlChar*)
|
147
|
-
if(value == NULL) {
|
148
|
-
/* this section is an attempt to workaround older versions of libxml that
|
149
|
-
don't handle namespaces properly in all attribute-and-friends functions */
|
150
|
-
xmlChar *prefix = NULL ;
|
151
|
-
xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix);
|
152
|
-
if (localname != NULL) {
|
153
|
-
value = xmlTextReaderLookupNamespace(reader, localname);
|
154
|
-
free(localname) ;
|
155
|
-
} else {
|
156
|
-
value = xmlTextReaderLookupNamespace(reader, prefix);
|
157
|
-
}
|
158
|
-
}
|
221
|
+
value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValueCStr(name));
|
159
222
|
if(value == NULL) return Qnil;
|
160
223
|
|
161
|
-
|
224
|
+
rb_value = NOKOGIRI_STR_NEW2(value);
|
162
225
|
xmlFree(value);
|
163
226
|
return rb_value;
|
164
227
|
}
|
@@ -172,11 +235,13 @@ static VALUE reader_attribute(VALUE self, VALUE name)
|
|
172
235
|
static VALUE attribute_count(VALUE self)
|
173
236
|
{
|
174
237
|
xmlTextReaderPtr reader;
|
238
|
+
int count;
|
239
|
+
|
175
240
|
Data_Get_Struct(self, xmlTextReader, reader);
|
176
|
-
|
241
|
+
count = xmlTextReaderAttributeCount(reader);
|
177
242
|
if(count == -1) return Qnil;
|
178
243
|
|
179
|
-
return INT2NUM(count);
|
244
|
+
return INT2NUM((long)count);
|
180
245
|
}
|
181
246
|
|
182
247
|
/*
|
@@ -188,27 +253,13 @@ static VALUE attribute_count(VALUE self)
|
|
188
253
|
static VALUE depth(VALUE self)
|
189
254
|
{
|
190
255
|
xmlTextReaderPtr reader;
|
191
|
-
|
192
|
-
int depth = xmlTextReaderDepth(reader);
|
193
|
-
if(depth == -1) return Qnil;
|
256
|
+
int depth;
|
194
257
|
|
195
|
-
return INT2NUM(depth);
|
196
|
-
}
|
197
|
-
|
198
|
-
/*
|
199
|
-
* call-seq:
|
200
|
-
* encoding
|
201
|
-
*
|
202
|
-
* Get the encoding for the document
|
203
|
-
*/
|
204
|
-
static VALUE encoding(VALUE self)
|
205
|
-
{
|
206
|
-
xmlTextReaderPtr reader;
|
207
258
|
Data_Get_Struct(self, xmlTextReader, reader);
|
208
|
-
|
209
|
-
if(
|
259
|
+
depth = xmlTextReaderDepth(reader);
|
260
|
+
if(depth == -1) return Qnil;
|
210
261
|
|
211
|
-
return
|
262
|
+
return INT2NUM((long)depth);
|
212
263
|
}
|
213
264
|
|
214
265
|
/*
|
@@ -220,11 +271,13 @@ static VALUE encoding(VALUE self)
|
|
220
271
|
static VALUE xml_version(VALUE self)
|
221
272
|
{
|
222
273
|
xmlTextReaderPtr reader;
|
274
|
+
const char *version;
|
275
|
+
|
223
276
|
Data_Get_Struct(self, xmlTextReader, reader);
|
224
|
-
|
277
|
+
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
225
278
|
if(version == NULL) return Qnil;
|
226
279
|
|
227
|
-
return
|
280
|
+
return NOKOGIRI_STR_NEW2(version);
|
228
281
|
}
|
229
282
|
|
230
283
|
/*
|
@@ -236,27 +289,31 @@ static VALUE xml_version(VALUE self)
|
|
236
289
|
static VALUE lang(VALUE self)
|
237
290
|
{
|
238
291
|
xmlTextReaderPtr reader;
|
292
|
+
const char *lang;
|
293
|
+
|
239
294
|
Data_Get_Struct(self, xmlTextReader, reader);
|
240
|
-
|
295
|
+
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
241
296
|
if(lang == NULL) return Qnil;
|
242
297
|
|
243
|
-
return
|
298
|
+
return NOKOGIRI_STR_NEW2(lang);
|
244
299
|
}
|
245
300
|
|
246
301
|
/*
|
247
302
|
* call-seq:
|
248
303
|
* value
|
249
304
|
*
|
250
|
-
* Get the text value of the node if present
|
305
|
+
* Get the text value of the node if present. Returns a utf-8 encoded string.
|
251
306
|
*/
|
252
307
|
static VALUE value(VALUE self)
|
253
308
|
{
|
254
309
|
xmlTextReaderPtr reader;
|
310
|
+
const char *value;
|
311
|
+
|
255
312
|
Data_Get_Struct(self, xmlTextReader, reader);
|
256
|
-
|
313
|
+
value = (const char *)xmlTextReaderConstValue(reader);
|
257
314
|
if(value == NULL) return Qnil;
|
258
315
|
|
259
|
-
return
|
316
|
+
return NOKOGIRI_STR_NEW2(value);
|
260
317
|
}
|
261
318
|
|
262
319
|
/*
|
@@ -268,11 +325,13 @@ static VALUE value(VALUE self)
|
|
268
325
|
static VALUE prefix(VALUE self)
|
269
326
|
{
|
270
327
|
xmlTextReaderPtr reader;
|
328
|
+
const char *prefix;
|
329
|
+
|
271
330
|
Data_Get_Struct(self, xmlTextReader, reader);
|
272
|
-
|
331
|
+
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
273
332
|
if(prefix == NULL) return Qnil;
|
274
333
|
|
275
|
-
return
|
334
|
+
return NOKOGIRI_STR_NEW2(prefix);
|
276
335
|
}
|
277
336
|
|
278
337
|
/*
|
@@ -284,11 +343,13 @@ static VALUE prefix(VALUE self)
|
|
284
343
|
static VALUE namespace_uri(VALUE self)
|
285
344
|
{
|
286
345
|
xmlTextReaderPtr reader;
|
346
|
+
const char *uri;
|
347
|
+
|
287
348
|
Data_Get_Struct(self, xmlTextReader, reader);
|
288
|
-
|
349
|
+
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
289
350
|
if(uri == NULL) return Qnil;
|
290
351
|
|
291
|
-
return
|
352
|
+
return NOKOGIRI_STR_NEW2(uri);
|
292
353
|
}
|
293
354
|
|
294
355
|
/*
|
@@ -300,27 +361,49 @@ static VALUE namespace_uri(VALUE self)
|
|
300
361
|
static VALUE local_name(VALUE self)
|
301
362
|
{
|
302
363
|
xmlTextReaderPtr reader;
|
364
|
+
const char *name;
|
365
|
+
|
303
366
|
Data_Get_Struct(self, xmlTextReader, reader);
|
304
|
-
|
367
|
+
name = (const char *)xmlTextReaderConstLocalName(reader);
|
305
368
|
if(name == NULL) return Qnil;
|
306
369
|
|
307
|
-
return
|
370
|
+
return NOKOGIRI_STR_NEW2(name);
|
308
371
|
}
|
309
372
|
|
310
373
|
/*
|
311
374
|
* call-seq:
|
312
375
|
* name
|
313
376
|
*
|
314
|
-
* Get the name of the node
|
377
|
+
* Get the name of the node. Returns a utf-8 encoded string.
|
315
378
|
*/
|
316
379
|
static VALUE name(VALUE self)
|
317
380
|
{
|
318
381
|
xmlTextReaderPtr reader;
|
382
|
+
const char *name;
|
383
|
+
|
319
384
|
Data_Get_Struct(self, xmlTextReader, reader);
|
320
|
-
|
385
|
+
name = (const char *)xmlTextReaderConstName(reader);
|
321
386
|
if(name == NULL) return Qnil;
|
322
387
|
|
323
|
-
return
|
388
|
+
return NOKOGIRI_STR_NEW2(name);
|
389
|
+
}
|
390
|
+
|
391
|
+
/*
|
392
|
+
* call-seq:
|
393
|
+
* base_uri
|
394
|
+
*
|
395
|
+
* Get the xml:base of the node
|
396
|
+
*/
|
397
|
+
static VALUE base_uri(VALUE self)
|
398
|
+
{
|
399
|
+
xmlTextReaderPtr reader;
|
400
|
+
const char * base_uri;
|
401
|
+
|
402
|
+
Data_Get_Struct(self, xmlTextReader, reader);
|
403
|
+
base_uri = (const char *)xmlTextReaderBaseUri(reader);
|
404
|
+
if (base_uri == NULL) return Qnil;
|
405
|
+
|
406
|
+
return NOKOGIRI_STR_NEW2(base_uri);
|
324
407
|
}
|
325
408
|
|
326
409
|
/*
|
@@ -333,7 +416,20 @@ static VALUE state(VALUE self)
|
|
333
416
|
{
|
334
417
|
xmlTextReaderPtr reader;
|
335
418
|
Data_Get_Struct(self, xmlTextReader, reader);
|
336
|
-
return INT2NUM(xmlTextReaderReadState(reader));
|
419
|
+
return INT2NUM((long)xmlTextReaderReadState(reader));
|
420
|
+
}
|
421
|
+
|
422
|
+
/*
|
423
|
+
* call-seq:
|
424
|
+
* node_type
|
425
|
+
*
|
426
|
+
* Get the type of readers current node
|
427
|
+
*/
|
428
|
+
static VALUE node_type(VALUE self)
|
429
|
+
{
|
430
|
+
xmlTextReaderPtr reader;
|
431
|
+
Data_Get_Struct(self, xmlTextReader, reader);
|
432
|
+
return INT2NUM((long)xmlTextReaderNodeType(reader));
|
337
433
|
}
|
338
434
|
|
339
435
|
/*
|
@@ -345,13 +441,78 @@ static VALUE state(VALUE self)
|
|
345
441
|
static VALUE read_more(VALUE self)
|
346
442
|
{
|
347
443
|
xmlTextReaderPtr reader;
|
444
|
+
xmlErrorPtr error;
|
445
|
+
VALUE error_list;
|
446
|
+
int ret;
|
447
|
+
|
348
448
|
Data_Get_Struct(self, xmlTextReader, reader);
|
349
449
|
|
350
|
-
|
450
|
+
error_list = rb_funcall(self, rb_intern("errors"), 0);
|
451
|
+
|
452
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
453
|
+
ret = xmlTextReaderRead(reader);
|
454
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
455
|
+
|
351
456
|
if(ret == 1) return self;
|
352
457
|
if(ret == 0) return Qnil;
|
353
458
|
|
354
|
-
|
459
|
+
error = xmlGetLastError();
|
460
|
+
if(error)
|
461
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
462
|
+
else
|
463
|
+
rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
|
464
|
+
|
465
|
+
return Qnil;
|
466
|
+
}
|
467
|
+
|
468
|
+
/*
|
469
|
+
* call-seq:
|
470
|
+
* inner_xml
|
471
|
+
*
|
472
|
+
* Read the contents of the current node, including child nodes and markup.
|
473
|
+
* Returns a utf-8 encoded string.
|
474
|
+
*/
|
475
|
+
static VALUE inner_xml(VALUE self)
|
476
|
+
{
|
477
|
+
xmlTextReaderPtr reader;
|
478
|
+
xmlChar* value;
|
479
|
+
VALUE str;
|
480
|
+
|
481
|
+
Data_Get_Struct(self, xmlTextReader, reader);
|
482
|
+
|
483
|
+
value = xmlTextReaderReadInnerXml(reader);
|
484
|
+
|
485
|
+
str = Qnil;
|
486
|
+
if(value) {
|
487
|
+
str = NOKOGIRI_STR_NEW2((char*)value);
|
488
|
+
xmlFree(value);
|
489
|
+
}
|
490
|
+
|
491
|
+
return str;
|
492
|
+
}
|
493
|
+
|
494
|
+
/*
|
495
|
+
* call-seq:
|
496
|
+
* outer_xml
|
497
|
+
*
|
498
|
+
* Read the current node and its contents, including child nodes and markup.
|
499
|
+
* Returns a utf-8 encoded string.
|
500
|
+
*/
|
501
|
+
static VALUE outer_xml(VALUE self)
|
502
|
+
{
|
503
|
+
xmlTextReaderPtr reader;
|
504
|
+
xmlChar *value;
|
505
|
+
VALUE str = Qnil;
|
506
|
+
|
507
|
+
Data_Get_Struct(self, xmlTextReader, reader);
|
508
|
+
|
509
|
+
value = xmlTextReaderReadOuterXml(reader);
|
510
|
+
|
511
|
+
if(value) {
|
512
|
+
str = NOKOGIRI_STR_NEW2((char*)value);
|
513
|
+
xmlFree(value);
|
514
|
+
}
|
515
|
+
return str;
|
355
516
|
}
|
356
517
|
|
357
518
|
/*
|
@@ -363,21 +524,22 @@ static VALUE read_more(VALUE self)
|
|
363
524
|
static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
|
364
525
|
{
|
365
526
|
VALUE rb_buffer, rb_url, encoding, rb_options;
|
366
|
-
|
527
|
+
xmlTextReaderPtr reader;
|
367
528
|
const char * c_url = NULL;
|
368
529
|
const char * c_encoding = NULL;
|
369
|
-
int c_options = 0;
|
530
|
+
int c_options = 0;
|
531
|
+
VALUE rb_reader, args[3];
|
370
532
|
|
371
533
|
rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
|
372
534
|
|
373
|
-
|
374
|
-
if (RTEST(rb_url)) c_url =
|
375
|
-
if (RTEST(encoding)) c_encoding =
|
376
|
-
if (RTEST(rb_options)) c_options = NUM2INT(rb_options);
|
535
|
+
if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
|
536
|
+
if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
|
537
|
+
if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
|
538
|
+
if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
|
377
539
|
|
378
|
-
|
540
|
+
reader = xmlReaderForMemory(
|
379
541
|
StringValuePtr(rb_buffer),
|
380
|
-
|
542
|
+
(int)RSTRING_LEN(rb_buffer),
|
381
543
|
c_url,
|
382
544
|
c_encoding,
|
383
545
|
c_options
|
@@ -388,7 +550,76 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
|
|
388
550
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
389
551
|
}
|
390
552
|
|
391
|
-
|
553
|
+
rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
|
554
|
+
args[0] = rb_buffer;
|
555
|
+
args[1] = rb_url;
|
556
|
+
args[2] = encoding;
|
557
|
+
rb_obj_call_init(rb_reader, 3, args);
|
558
|
+
|
559
|
+
return rb_reader;
|
560
|
+
}
|
561
|
+
|
562
|
+
/*
|
563
|
+
* call-seq:
|
564
|
+
* from_io(io, url = nil, encoding = nil, options = 0)
|
565
|
+
*
|
566
|
+
* Create a new reader that parses +io+
|
567
|
+
*/
|
568
|
+
static VALUE from_io(int argc, VALUE *argv, VALUE klass)
|
569
|
+
{
|
570
|
+
VALUE rb_io, rb_url, encoding, rb_options;
|
571
|
+
xmlTextReaderPtr reader;
|
572
|
+
const char * c_url = NULL;
|
573
|
+
const char * c_encoding = NULL;
|
574
|
+
int c_options = 0;
|
575
|
+
VALUE rb_reader, args[3];
|
576
|
+
|
577
|
+
rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
|
578
|
+
|
579
|
+
if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
|
580
|
+
if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
|
581
|
+
if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
|
582
|
+
if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
|
583
|
+
|
584
|
+
reader = xmlReaderForIO(
|
585
|
+
(xmlInputReadCallback)io_read_callback,
|
586
|
+
(xmlInputCloseCallback)io_close_callback,
|
587
|
+
(void *)rb_io,
|
588
|
+
c_url,
|
589
|
+
c_encoding,
|
590
|
+
c_options
|
591
|
+
);
|
592
|
+
|
593
|
+
if(reader == NULL) {
|
594
|
+
xmlFreeTextReader(reader);
|
595
|
+
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
596
|
+
}
|
597
|
+
|
598
|
+
rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
|
599
|
+
args[0] = rb_io;
|
600
|
+
args[1] = rb_url;
|
601
|
+
args[2] = encoding;
|
602
|
+
rb_obj_call_init(rb_reader, 3, args);
|
603
|
+
|
604
|
+
return rb_reader;
|
605
|
+
}
|
606
|
+
|
607
|
+
/*
|
608
|
+
* call-seq:
|
609
|
+
* reader.empty_element? # => true or false
|
610
|
+
*
|
611
|
+
* Returns true if the current node is empty, otherwise false.
|
612
|
+
*/
|
613
|
+
static VALUE empty_element_p(VALUE self)
|
614
|
+
{
|
615
|
+
xmlTextReaderPtr reader;
|
616
|
+
|
617
|
+
Data_Get_Struct(self, xmlTextReader, reader);
|
618
|
+
|
619
|
+
if(xmlTextReaderIsEmptyElement(reader))
|
620
|
+
return Qtrue;
|
621
|
+
|
622
|
+
return Qfalse;
|
392
623
|
}
|
393
624
|
|
394
625
|
VALUE cNokogiriXmlReader;
|
@@ -408,8 +639,13 @@ void init_xml_reader()
|
|
408
639
|
cNokogiriXmlReader = klass;
|
409
640
|
|
410
641
|
rb_define_singleton_method(klass, "from_memory", from_memory, -1);
|
642
|
+
rb_define_singleton_method(klass, "from_io", from_io, -1);
|
643
|
+
|
411
644
|
rb_define_method(klass, "read", read_more, 0);
|
645
|
+
rb_define_method(klass, "inner_xml", inner_xml, 0);
|
646
|
+
rb_define_method(klass, "outer_xml", outer_xml, 0);
|
412
647
|
rb_define_method(klass, "state", state, 0);
|
648
|
+
rb_define_method(klass, "node_type", node_type, 0);
|
413
649
|
rb_define_method(klass, "name", name, 0);
|
414
650
|
rb_define_method(klass, "local_name", local_name, 0);
|
415
651
|
rb_define_method(klass, "namespace_uri", namespace_uri, 0);
|
@@ -417,13 +653,16 @@ void init_xml_reader()
|
|
417
653
|
rb_define_method(klass, "value", value, 0);
|
418
654
|
rb_define_method(klass, "lang", lang, 0);
|
419
655
|
rb_define_method(klass, "xml_version", xml_version, 0);
|
420
|
-
rb_define_method(klass, "encoding", encoding, 0);
|
421
656
|
rb_define_method(klass, "depth", depth, 0);
|
422
657
|
rb_define_method(klass, "attribute_count", attribute_count, 0);
|
423
658
|
rb_define_method(klass, "attribute", reader_attribute, 1);
|
659
|
+
rb_define_method(klass, "namespaces", namespaces, 0);
|
424
660
|
rb_define_method(klass, "attribute_at", attribute_at, 1);
|
425
|
-
rb_define_method(klass, "
|
661
|
+
rb_define_method(klass, "empty_element?", empty_element_p, 0);
|
426
662
|
rb_define_method(klass, "attributes?", attributes_eh, 0);
|
427
663
|
rb_define_method(klass, "value?", value_eh, 0);
|
428
664
|
rb_define_method(klass, "default?", default_eh, 0);
|
665
|
+
rb_define_method(klass, "base_uri", base_uri, 0);
|
666
|
+
|
667
|
+
rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
|
429
668
|
}
|