nokogiri 1.11.3 → 1.13.8
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +14 -11
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -64
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +237 -133
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +8 -8
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +70 -38
- data/ext/nokogiri/nokogiri.h +27 -9
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_attribute_decl.c +3 -3
- data/ext/nokogiri/xml_cdata.c +1 -1
- data/ext/nokogiri/xml_document.c +50 -50
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +10 -10
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_element_decl.c +3 -3
- data/ext/nokogiri/xml_encoding_handler.c +31 -12
- data/ext/nokogiri/xml_entity_decl.c +5 -5
- data/ext/nokogiri/xml_namespace.c +4 -2
- data/ext/nokogiri/xml_node.c +833 -492
- data/ext/nokogiri/xml_node_set.c +24 -24
- data/ext/nokogiri/xml_reader.c +90 -11
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +12 -3
- data/ext/nokogiri/xml_schema.c +5 -3
- data/ext/nokogiri/xml_text.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +110 -85
- data/ext/nokogiri/xslt_stylesheet.c +109 -10
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +361 -342
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +49 -17
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +8 -3
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +37 -27
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +92 -81
- data/lib/nokogiri/{html → html4}/document_fragment.rb +13 -9
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +3 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +16 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +11 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +91 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +100 -0
- data/lib/nokogiri/html5.rb +478 -0
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +31 -14
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +71 -31
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +183 -96
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +9 -5
- data/lib/nokogiri/xml/node.rb +525 -354
- data/lib/nokogiri/xml/node_set.rb +50 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +13 -6
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +20 -24
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +44 -49
- data/lib/nokogiri/xml/sax/parser.rb +37 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +13 -1
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +37 -37
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/nokogiri/xslt.rb +28 -20
- data/lib/nokogiri.rb +48 -43
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +3 -3
- data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +204 -93
- data/lib/nokogiri/html/element_description_defaults.rb +0 -672
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
- data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
- data/patches/libxml2/0011-update-automake-files-for-arm64.patch +0 -2511
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
data/ext/nokogiri/xml_node_set.c
CHANGED
@@ -20,20 +20,20 @@ ruby_object_get(xmlNodePtr c_node)
|
|
20
20
|
{
|
21
21
|
/* see xmlElementType in libxml2 tree.h */
|
22
22
|
switch (c_node->type) {
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
23
|
+
case XML_NAMESPACE_DECL:
|
24
|
+
/* _private is later in the namespace struct */
|
25
|
+
return (VALUE)(((xmlNsPtr)c_node)->_private);
|
26
|
+
|
27
|
+
case XML_DOCUMENT_NODE:
|
28
|
+
case XML_HTML_DOCUMENT_NODE:
|
29
|
+
/* in documents we use _private to store a tuple */
|
30
|
+
if (DOC_RUBY_OBJECT_TEST(((xmlDocPtr)c_node))) {
|
31
|
+
return DOC_RUBY_OBJECT((xmlDocPtr)c_node);
|
32
|
+
}
|
33
|
+
return (VALUE)NULL;
|
34
|
+
|
35
|
+
default:
|
36
|
+
return (VALUE)(c_node->_private);
|
37
37
|
}
|
38
38
|
}
|
39
39
|
|
@@ -156,7 +156,7 @@ push(VALUE self, VALUE rb_node)
|
|
156
156
|
Check_Node_Set_Node_Type(rb_node);
|
157
157
|
|
158
158
|
Data_Get_Struct(self, xmlNodeSet, node_set);
|
159
|
-
|
159
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, node);
|
160
160
|
|
161
161
|
xmlXPathNodeSetAdd(node_set, node);
|
162
162
|
|
@@ -179,7 +179,7 @@ delete (VALUE self, VALUE rb_node)
|
|
179
179
|
Check_Node_Set_Node_Type(rb_node);
|
180
180
|
|
181
181
|
Data_Get_Struct(self, xmlNodeSet, node_set);
|
182
|
-
|
182
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, node);
|
183
183
|
|
184
184
|
if (xmlXPathNodeSetContains(node_set, node)) {
|
185
185
|
xpath_node_set_del(node_set, node);
|
@@ -228,7 +228,7 @@ include_eh(VALUE self, VALUE rb_node)
|
|
228
228
|
Check_Node_Set_Node_Type(rb_node);
|
229
229
|
|
230
230
|
Data_Get_Struct(self, xmlNodeSet, node_set);
|
231
|
-
|
231
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, node);
|
232
232
|
|
233
233
|
return (xmlXPathNodeSetContains(node_set, node) ? Qtrue : Qfalse);
|
234
234
|
}
|
@@ -373,12 +373,12 @@ slice(int argc, VALUE *argv, VALUE self)
|
|
373
373
|
|
374
374
|
/* if arg is Range */
|
375
375
|
switch (rb_range_beg_len(arg, &beg, &len, (long)node_set->nodeNr, 0)) {
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
376
|
+
case Qfalse:
|
377
|
+
break;
|
378
|
+
case Qnil:
|
379
|
+
return Qnil;
|
380
|
+
default:
|
381
|
+
return subseq(self, beg, len);
|
382
382
|
}
|
383
383
|
|
384
384
|
return index_at(self, NUM2LONG(arg));
|
@@ -430,7 +430,7 @@ unlink_nodeset(VALUE self)
|
|
430
430
|
xmlNodePtr node_ptr;
|
431
431
|
node = noko_xml_node_wrap(Qnil, node_set->nodeTab[j]);
|
432
432
|
rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */
|
433
|
-
|
433
|
+
Noko_Node_Get_Struct(node, xmlNode, node_ptr);
|
434
434
|
node_set->nodeTab[j] = node_ptr ;
|
435
435
|
}
|
436
436
|
}
|
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -31,6 +31,7 @@ has_attributes(xmlTextReaderPtr reader)
|
|
31
31
|
return (0);
|
32
32
|
}
|
33
33
|
|
34
|
+
// TODO: merge this function into the `namespaces` method implementation
|
34
35
|
static void
|
35
36
|
Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
|
36
37
|
{
|
@@ -148,9 +149,13 @@ namespaces(VALUE self)
|
|
148
149
|
}
|
149
150
|
|
150
151
|
/*
|
151
|
-
|
152
|
-
|
153
|
-
|
152
|
+
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
|
153
|
+
|
154
|
+
Get the attributes of the current node as an Array of XML:Attr
|
155
|
+
|
156
|
+
⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
|
157
|
+
|
158
|
+
See related: #attribute_hash, #attributes
|
154
159
|
*/
|
155
160
|
static VALUE
|
156
161
|
rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
@@ -160,6 +165,10 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
|
160
165
|
VALUE attr_nodes;
|
161
166
|
int j;
|
162
167
|
|
168
|
+
// TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
|
169
|
+
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
|
170
|
+
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
|
171
|
+
|
163
172
|
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
164
173
|
|
165
174
|
if (! has_attributes(c_reader)) {
|
@@ -181,6 +190,47 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
|
181
190
|
return attr_nodes;
|
182
191
|
}
|
183
192
|
|
193
|
+
/*
|
194
|
+
:call-seq: attribute_hash() → Hash<String ⇒ String>
|
195
|
+
|
196
|
+
Get the attributes of the current node as a Hash of names and values.
|
197
|
+
|
198
|
+
See related: #attributes and #namespaces
|
199
|
+
*/
|
200
|
+
static VALUE
|
201
|
+
rb_xml_reader_attribute_hash(VALUE rb_reader)
|
202
|
+
{
|
203
|
+
VALUE rb_attributes = rb_hash_new();
|
204
|
+
xmlTextReaderPtr c_reader;
|
205
|
+
xmlNodePtr c_node;
|
206
|
+
xmlAttrPtr c_property;
|
207
|
+
|
208
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
209
|
+
|
210
|
+
if (!has_attributes(c_reader)) {
|
211
|
+
return rb_attributes;
|
212
|
+
}
|
213
|
+
|
214
|
+
c_node = xmlTextReaderExpand(c_reader);
|
215
|
+
c_property = c_node->properties;
|
216
|
+
while (c_property != NULL) {
|
217
|
+
VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
|
218
|
+
VALUE rb_value = Qnil;
|
219
|
+
xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
|
220
|
+
|
221
|
+
if (c_value) {
|
222
|
+
rb_value = NOKOGIRI_STR_NEW2(c_value);
|
223
|
+
xmlFree(c_value);
|
224
|
+
}
|
225
|
+
|
226
|
+
rb_hash_aset(rb_attributes, rb_name, rb_value);
|
227
|
+
|
228
|
+
c_property = c_property->next;
|
229
|
+
}
|
230
|
+
|
231
|
+
return rb_attributes;
|
232
|
+
}
|
233
|
+
|
184
234
|
/*
|
185
235
|
* call-seq:
|
186
236
|
* attribute_at(index)
|
@@ -414,16 +464,23 @@ name(VALUE self)
|
|
414
464
|
* Get the xml:base of the node
|
415
465
|
*/
|
416
466
|
static VALUE
|
417
|
-
|
467
|
+
rb_xml_reader_base_uri(VALUE rb_reader)
|
418
468
|
{
|
419
|
-
|
420
|
-
|
469
|
+
VALUE rb_base_uri;
|
470
|
+
xmlTextReaderPtr c_reader;
|
471
|
+
xmlChar *c_base_uri;
|
421
472
|
|
422
|
-
Data_Get_Struct(
|
423
|
-
|
424
|
-
|
473
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
474
|
+
|
475
|
+
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
476
|
+
if (c_base_uri == NULL) {
|
477
|
+
return Qnil;
|
478
|
+
}
|
479
|
+
|
480
|
+
rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
|
481
|
+
xmlFree(c_base_uri);
|
425
482
|
|
426
|
-
return
|
483
|
+
return rb_base_uri;
|
427
484
|
}
|
428
485
|
|
429
486
|
/*
|
@@ -652,6 +709,24 @@ empty_element_p(VALUE self)
|
|
652
709
|
return Qfalse;
|
653
710
|
}
|
654
711
|
|
712
|
+
static VALUE
|
713
|
+
rb_xml_reader_encoding(VALUE rb_reader)
|
714
|
+
{
|
715
|
+
xmlTextReaderPtr c_reader;
|
716
|
+
const char *parser_encoding;
|
717
|
+
VALUE constructor_encoding;
|
718
|
+
|
719
|
+
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
720
|
+
if (RTEST(constructor_encoding)) {
|
721
|
+
return constructor_encoding;
|
722
|
+
}
|
723
|
+
|
724
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
725
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
726
|
+
if (parser_encoding == NULL) { return Qnil; }
|
727
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
728
|
+
}
|
729
|
+
|
655
730
|
void
|
656
731
|
noko_init_xml_reader()
|
657
732
|
{
|
@@ -662,6 +737,8 @@ noko_init_xml_reader()
|
|
662
737
|
*/
|
663
738
|
cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
|
664
739
|
|
740
|
+
rb_undef_alloc_func(cNokogiriXmlReader);
|
741
|
+
|
665
742
|
rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1);
|
666
743
|
rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1);
|
667
744
|
|
@@ -669,11 +746,13 @@ noko_init_xml_reader()
|
|
669
746
|
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
|
670
747
|
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
671
748
|
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
|
749
|
+
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
|
672
750
|
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
673
|
-
rb_define_method(cNokogiriXmlReader, "base_uri",
|
751
|
+
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
674
752
|
rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
|
675
753
|
rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
|
676
754
|
rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
|
755
|
+
rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
|
677
756
|
rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
|
678
757
|
rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
|
679
758
|
rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
|
@@ -30,12 +30,12 @@ start_document(void *ctx)
|
|
30
30
|
version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
|
31
31
|
|
32
32
|
switch (ctxt->standalone) {
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
33
|
+
case 0:
|
34
|
+
standalone = NOKOGIRI_STR_NEW2("no");
|
35
|
+
break;
|
36
|
+
case 1:
|
37
|
+
standalone = NOKOGIRI_STR_NEW2("yes");
|
38
|
+
break;
|
39
39
|
}
|
40
40
|
|
41
41
|
rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlSaxParserContext ;
|
4
4
|
|
5
|
+
static ID id_read;
|
6
|
+
|
5
7
|
static void
|
6
8
|
deallocate(xmlParserCtxtPtr ctxt)
|
7
9
|
{
|
@@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
26
28
|
xmlParserCtxtPtr ctxt;
|
27
29
|
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
28
30
|
|
31
|
+
if (!rb_respond_to(io, id_read)) {
|
32
|
+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
33
|
+
}
|
34
|
+
|
29
35
|
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
30
36
|
(xmlInputReadCallback)noko_io_read,
|
31
37
|
(xmlInputCloseCallback)noko_io_close,
|
@@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data)
|
|
62
68
|
{
|
63
69
|
xmlParserCtxtPtr ctxt;
|
64
70
|
|
65
|
-
|
66
|
-
|
67
|
-
}
|
71
|
+
Check_Type(data, T_STRING);
|
72
|
+
|
68
73
|
if (!(int)RSTRING_LEN(data)) {
|
69
74
|
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
70
75
|
}
|
@@ -265,6 +270,8 @@ noko_init_xml_sax_parser_context()
|
|
265
270
|
{
|
266
271
|
cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
|
267
272
|
|
273
|
+
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
274
|
+
|
268
275
|
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
|
269
276
|
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
|
270
277
|
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
|
@@ -276,4 +283,6 @@ noko_init_xml_sax_parser_context()
|
|
276
283
|
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
|
277
284
|
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
|
278
285
|
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
|
286
|
+
|
287
|
+
id_read = rb_intern("read");
|
279
288
|
}
|
data/ext/nokogiri/xml_schema.c
CHANGED
@@ -25,7 +25,7 @@ validate_document(VALUE self, VALUE document)
|
|
25
25
|
VALUE errors;
|
26
26
|
|
27
27
|
Data_Get_Struct(self, xmlSchema, schema);
|
28
|
-
|
28
|
+
Noko_Node_Get_Struct(document, xmlDoc, doc);
|
29
29
|
|
30
30
|
errors = rb_ary_new();
|
31
31
|
|
@@ -179,7 +179,7 @@ has_blank_nodes_p(VALUE cache)
|
|
179
179
|
for (i = 0; i < RARRAY_LEN(cache); i++) {
|
180
180
|
xmlNodePtr node;
|
181
181
|
VALUE element = rb_ary_entry(cache, i);
|
182
|
-
|
182
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
183
183
|
if (xmlIsBlankNode(node)) {
|
184
184
|
return 1;
|
185
185
|
}
|
@@ -210,7 +210,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
|
|
210
210
|
|
211
211
|
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
212
212
|
|
213
|
-
|
213
|
+
Noko_Node_Get_Struct(document, xmlDoc, doc);
|
214
214
|
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
215
215
|
|
216
216
|
if (scanned_args == 1) {
|
@@ -274,6 +274,8 @@ noko_init_xml_schema()
|
|
274
274
|
{
|
275
275
|
cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);
|
276
276
|
|
277
|
+
rb_undef_alloc_func(cNokogiriXmlSchema);
|
278
|
+
|
277
279
|
rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1);
|
278
280
|
rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1);
|
279
281
|
|
data/ext/nokogiri/xml_text.c
CHANGED
@@ -20,7 +20,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
20
20
|
|
21
21
|
rb_scan_args(argc, argv, "2*", &string, &document, &rest);
|
22
22
|
|
23
|
-
|
23
|
+
Noko_Node_Get_Struct(document, xmlDoc, doc);
|
24
24
|
|
25
25
|
node = xmlNewText((xmlChar *)StringValueCStr(string));
|
26
26
|
node->doc = doc->doc;
|
@@ -86,6 +86,26 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
86
86
|
xmlXPathFreeObject(needle);
|
87
87
|
}
|
88
88
|
|
89
|
+
|
90
|
+
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
|
91
|
+
static void
|
92
|
+
xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
93
|
+
{
|
94
|
+
xmlXPathObjectPtr element_name;
|
95
|
+
|
96
|
+
assert(ctxt->context->node);
|
97
|
+
|
98
|
+
CHECK_ARITY(1);
|
99
|
+
CAST_TO_STRING;
|
100
|
+
CHECK_TYPE(XPATH_STRING);
|
101
|
+
element_name = valuePop(ctxt);
|
102
|
+
|
103
|
+
valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval)));
|
104
|
+
|
105
|
+
xmlXPathFreeObject(element_name);
|
106
|
+
}
|
107
|
+
|
108
|
+
|
89
109
|
/*
|
90
110
|
* call-seq:
|
91
111
|
* register_ns(prefix, uri)
|
@@ -128,11 +148,44 @@ register_variable(VALUE self, VALUE name, VALUE value)
|
|
128
148
|
return self;
|
129
149
|
}
|
130
150
|
|
151
|
+
|
152
|
+
/*
|
153
|
+
* convert an XPath object into a Ruby object of the appropriate type.
|
154
|
+
* returns Qundef if no conversion was possible.
|
155
|
+
*/
|
156
|
+
static VALUE
|
157
|
+
xpath2ruby(xmlXPathObjectPtr xobj, xmlXPathContextPtr xctx)
|
158
|
+
{
|
159
|
+
VALUE retval;
|
160
|
+
|
161
|
+
assert(xctx->doc);
|
162
|
+
assert(DOC_RUBY_OBJECT_TEST(xctx->doc));
|
163
|
+
|
164
|
+
switch (xobj->type) {
|
165
|
+
case XPATH_STRING:
|
166
|
+
retval = NOKOGIRI_STR_NEW2(xobj->stringval);
|
167
|
+
xmlFree(xobj->stringval);
|
168
|
+
return retval;
|
169
|
+
|
170
|
+
case XPATH_NODESET:
|
171
|
+
return noko_xml_node_set_wrap(xobj->nodesetval,
|
172
|
+
DOC_RUBY_OBJECT(xctx->doc));
|
173
|
+
|
174
|
+
case XPATH_NUMBER:
|
175
|
+
return rb_float_new(xobj->floatval);
|
176
|
+
|
177
|
+
case XPATH_BOOLEAN:
|
178
|
+
return (xobj->boolval == 1) ? Qtrue : Qfalse;
|
179
|
+
|
180
|
+
default:
|
181
|
+
return Qundef;
|
182
|
+
}
|
183
|
+
}
|
184
|
+
|
131
185
|
void
|
132
186
|
Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
133
187
|
const char *function_name)
|
134
188
|
{
|
135
|
-
int i;
|
136
189
|
VALUE result, doc;
|
137
190
|
VALUE *argv;
|
138
191
|
VALUE node_set = Qnil;
|
@@ -143,81 +196,66 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
|
|
143
196
|
assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc));
|
144
197
|
|
145
198
|
argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE));
|
146
|
-
for (
|
147
|
-
rb_gc_register_address(&argv[
|
199
|
+
for (int j = 0 ; j < nargs ; ++j) {
|
200
|
+
rb_gc_register_address(&argv[j]);
|
148
201
|
}
|
149
202
|
|
150
203
|
doc = DOC_RUBY_OBJECT(ctx->context->doc);
|
151
204
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
break;
|
160
|
-
case XPATH_BOOLEAN:
|
161
|
-
argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
|
162
|
-
break;
|
163
|
-
case XPATH_NUMBER:
|
164
|
-
argv[i] = rb_float_new(obj->floatval);
|
165
|
-
break;
|
166
|
-
case XPATH_NODESET:
|
167
|
-
argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
|
168
|
-
break;
|
169
|
-
default:
|
170
|
-
argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
171
|
-
}
|
172
|
-
xmlXPathFreeNodeSetList(obj);
|
173
|
-
} while (i-- > 0);
|
205
|
+
for (int j = nargs - 1 ; j >= 0 ; --j) {
|
206
|
+
obj = valuePop(ctx);
|
207
|
+
argv[j] = xpath2ruby(obj, ctx->context);
|
208
|
+
if (argv[j] == Qundef) {
|
209
|
+
argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
210
|
+
}
|
211
|
+
xmlXPathFreeNodeSetList(obj);
|
174
212
|
}
|
175
213
|
|
176
214
|
result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv);
|
177
215
|
|
178
|
-
for (
|
179
|
-
rb_gc_unregister_address(&argv[
|
216
|
+
for (int j = 0 ; j < nargs ; ++j) {
|
217
|
+
rb_gc_unregister_address(&argv[j]);
|
180
218
|
}
|
181
219
|
free(argv);
|
182
220
|
|
183
221
|
switch (TYPE(result)) {
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
break;
|
195
|
-
case T_TRUE:
|
196
|
-
xmlXPathReturnTrue(ctx);
|
197
|
-
break;
|
198
|
-
case T_FALSE:
|
199
|
-
xmlXPathReturnFalse(ctx);
|
200
|
-
break;
|
201
|
-
case T_NIL:
|
202
|
-
break;
|
203
|
-
case T_ARRAY: {
|
204
|
-
VALUE args[2];
|
205
|
-
args[0] = doc;
|
206
|
-
args[1] = result;
|
207
|
-
node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet);
|
208
|
-
Data_Get_Struct(node_set, xmlNodeSet, xml_node_set);
|
209
|
-
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
|
210
|
-
}
|
211
|
-
break;
|
212
|
-
case T_DATA:
|
213
|
-
if (rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) {
|
214
|
-
Data_Get_Struct(result, xmlNodeSet, xml_node_set);
|
215
|
-
/* Copy the node set, otherwise it will get GC'd. */
|
216
|
-
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
|
222
|
+
case T_FLOAT:
|
223
|
+
case T_BIGNUM:
|
224
|
+
case T_FIXNUM:
|
225
|
+
xmlXPathReturnNumber(ctx, NUM2DBL(result));
|
226
|
+
break;
|
227
|
+
case T_STRING:
|
228
|
+
xmlXPathReturnString(
|
229
|
+
ctx,
|
230
|
+
xmlCharStrdup(StringValueCStr(result))
|
231
|
+
);
|
217
232
|
break;
|
233
|
+
case T_TRUE:
|
234
|
+
xmlXPathReturnTrue(ctx);
|
235
|
+
break;
|
236
|
+
case T_FALSE:
|
237
|
+
xmlXPathReturnFalse(ctx);
|
238
|
+
break;
|
239
|
+
case T_NIL:
|
240
|
+
break;
|
241
|
+
case T_ARRAY: {
|
242
|
+
VALUE args[2];
|
243
|
+
args[0] = doc;
|
244
|
+
args[1] = result;
|
245
|
+
node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet);
|
246
|
+
Data_Get_Struct(node_set, xmlNodeSet, xml_node_set);
|
247
|
+
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
|
218
248
|
}
|
219
|
-
|
220
|
-
|
249
|
+
break;
|
250
|
+
case T_DATA:
|
251
|
+
if (rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) {
|
252
|
+
Data_Get_Struct(result, xmlNodeSet, xml_node_set);
|
253
|
+
/* Copy the node set, otherwise it will get GC'd. */
|
254
|
+
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
|
255
|
+
break;
|
256
|
+
}
|
257
|
+
default:
|
258
|
+
rb_raise(rb_eRuntimeError, "Invalid return type");
|
221
259
|
}
|
222
260
|
}
|
223
261
|
|
@@ -275,7 +313,7 @@ static VALUE
|
|
275
313
|
evaluate(int argc, VALUE *argv, VALUE self)
|
276
314
|
{
|
277
315
|
VALUE search_path, xpath_handler;
|
278
|
-
VALUE
|
316
|
+
VALUE retval = Qnil;
|
279
317
|
xmlXPathContextPtr ctx;
|
280
318
|
xmlXPathObjectPtr xpath;
|
281
319
|
xmlChar *query;
|
@@ -310,31 +348,14 @@ evaluate(int argc, VALUE *argv, VALUE self)
|
|
310
348
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
311
349
|
}
|
312
350
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
switch (xpath->type) {
|
317
|
-
case XPATH_STRING:
|
318
|
-
thing = NOKOGIRI_STR_NEW2(xpath->stringval);
|
319
|
-
xmlFree(xpath->stringval);
|
320
|
-
break;
|
321
|
-
case XPATH_NODESET:
|
322
|
-
thing = noko_xml_node_set_wrap(xpath->nodesetval,
|
323
|
-
DOC_RUBY_OBJECT(ctx->doc));
|
324
|
-
break;
|
325
|
-
case XPATH_NUMBER:
|
326
|
-
thing = rb_float_new(xpath->floatval);
|
327
|
-
break;
|
328
|
-
case XPATH_BOOLEAN:
|
329
|
-
thing = xpath->boolval == 1 ? Qtrue : Qfalse;
|
330
|
-
break;
|
331
|
-
default:
|
332
|
-
thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
351
|
+
retval = xpath2ruby(xpath, ctx);
|
352
|
+
if (retval == Qundef) {
|
353
|
+
retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
333
354
|
}
|
334
355
|
|
335
356
|
xmlXPathFreeNodeSetList(xpath);
|
336
357
|
|
337
|
-
return
|
358
|
+
return retval;
|
338
359
|
}
|
339
360
|
|
340
361
|
/*
|
@@ -350,7 +371,7 @@ new (VALUE klass, VALUE nodeobj)
|
|
350
371
|
xmlXPathContextPtr ctx;
|
351
372
|
VALUE self;
|
352
373
|
|
353
|
-
|
374
|
+
Noko_Node_Get_Struct(nodeobj, xmlNode, node);
|
354
375
|
|
355
376
|
xmlXPathInit();
|
356
377
|
|
@@ -360,6 +381,8 @@ new (VALUE klass, VALUE nodeobj)
|
|
360
381
|
xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
361
382
|
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
362
383
|
xpath_builtin_css_class);
|
384
|
+
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
385
|
+
xpath_builtin_local_name_is);
|
363
386
|
|
364
387
|
self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
|
365
388
|
return self;
|
@@ -373,6 +396,8 @@ noko_init_xml_xpath_context(void)
|
|
373
396
|
*/
|
374
397
|
cNokogiriXmlXpathContext = rb_define_class_under(mNokogiriXml, "XPathContext", rb_cObject);
|
375
398
|
|
399
|
+
rb_undef_alloc_func(cNokogiriXmlXpathContext);
|
400
|
+
|
376
401
|
rb_define_singleton_method(cNokogiriXmlXpathContext, "new", new, 1);
|
377
402
|
|
378
403
|
rb_define_method(cNokogiriXmlXpathContext, "evaluate", evaluate, -1);
|