nokogiri 1.0.0 → 1.6.8.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.autotest +26 -0
- data/.cross_rubies +9 -0
- data/.editorconfig +17 -0
- data/.gemtest +0 -0
- data/.travis.yml +51 -0
- data/CHANGELOG.rdoc +1160 -0
- data/CONTRIBUTING.md +42 -0
- data/C_CODING_STYLE.rdoc +33 -0
- data/Gemfile +22 -0
- data/LICENSE.txt +31 -0
- data/Manifest.txt +284 -40
- data/README.md +166 -0
- data/ROADMAP.md +111 -0
- data/Rakefile +310 -199
- data/STANDARD_RESPONSES.md +47 -0
- data/Y_U_NO_GEMSPEC.md +155 -0
- data/appveyor.yml +22 -0
- data/bin/nokogiri +118 -0
- data/build_all +45 -0
- data/dependencies.yml +29 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +664 -34
- data/ext/nokogiri/html_document.c +120 -33
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +279 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +116 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +145 -0
- data/ext/nokogiri/nokogiri.h +131 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +23 -19
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +69 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +501 -54
- data/ext/nokogiri/xml_document.h +14 -1
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +109 -24
- data/ext/nokogiri/xml_dtd.h +3 -1
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +60 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +117 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1285 -315
- data/ext/nokogiri/xml_node.h +4 -6
- data/ext/nokogiri/xml_node_set.c +415 -54
- data/ext/nokogiri/xml_node_set.h +6 -2
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +316 -77
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +161 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +215 -80
- data/ext/nokogiri/xml_sax_parser.h +30 -1
- data/ext/nokogiri/xml_sax_parser_context.c +262 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +45 -175
- data/ext/nokogiri/xml_syntax_error.h +4 -2
- data/ext/nokogiri/xml_text.c +37 -14
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +230 -13
- data/ext/nokogiri/xml_xpath_context.h +2 -1
- data/ext/nokogiri/xslt_stylesheet.c +196 -34
- data/ext/nokogiri/xslt_stylesheet.h +6 -1
- data/lib/nokogiri/css/node.rb +18 -61
- data/lib/nokogiri/css/parser.rb +725 -17
- data/lib/nokogiri/css/parser.y +126 -63
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +148 -5
- data/lib/nokogiri/css/tokenizer.rex +31 -39
- data/lib/nokogiri/css/xpath_visitor.rb +109 -51
- data/lib/nokogiri/css.rb +24 -3
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +329 -3
- data/lib/nokogiri/html/document_fragment.rb +39 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +35 -4
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/html/sax/push_parser.rb +36 -0
- data/lib/nokogiri/html.rb +18 -76
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +106 -1
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +395 -31
- data/lib/nokogiri/xml/cdata.rb +4 -2
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +267 -12
- data/lib/nokogiri/xml/document_fragment.rb +149 -0
- data/lib/nokogiri/xml/dtd.rb +27 -1
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node.rb +748 -109
- data/lib/nokogiri/xml/node_set.rb +200 -72
- data/lib/nokogiri/xml/parse_options.rb +120 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +102 -4
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax/document.rb +114 -2
- data/lib/nokogiri/xml/sax/parser.rb +97 -7
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/sax.rb +2 -7
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/searchable.rb +221 -0
- data/lib/nokogiri/xml/syntax_error.rb +27 -1
- data/lib/nokogiri/xml/text.rb +4 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath.rb +4 -0
- data/lib/nokogiri/xml/xpath_context.rb +3 -1
- data/lib/nokogiri/xml.rb +45 -38
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/nokogiri/xslt.rb +47 -2
- data/lib/nokogiri.rb +117 -24
- data/lib/xsd/xmlparser/nokogiri.rb +102 -0
- data/patches/sort-patches-by-date +25 -0
- data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
- data/suppressions/README.txt +1 -0
- data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
- data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
- data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
- data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +73 -6
- data/test/css/test_parser.rb +184 -39
- data/test/css/test_tokenizer.rb +72 -19
- data/test/css/test_xpath_visitor.rb +44 -2
- data/test/decorators/test_slop.rb +20 -0
- data/test/files/2ch.html +108 -0
- data/test/files/GH_1042.html +18 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/atom.xml +344 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/namespace_pressure_test.xml +1684 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/saml/saml20assertion_schema.xsd +283 -0
- data/test/files/saml/saml20protocol_schema.xsd +302 -0
- data/test/files/saml/xenc_schema.xsd +146 -0
- data/test/files/saml/xmldsig_schema.xsd +318 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/shift_jis_no_charset.html +9 -0
- data/test/files/slow-xpath.xml +25509 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/test_document_url/bar.xml +2 -0
- data/test/files/test_document_url/document.dtd +4 -0
- data/test/files/test_document_url/document.xml +6 -0
- data/test/files/tlm.html +2 -1
- data/test/files/to_be_xincluded.xml +2 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/files/xinclude.xml +4 -0
- data/test/helper.rb +124 -13
- data/test/html/sax/test_parser.rb +118 -4
- data/test/html/sax/test_parser_context.rb +46 -0
- data/test/html/sax/test_push_parser.rb +87 -0
- data/test/html/test_builder.rb +94 -8
- data/test/html/test_document.rb +626 -11
- data/test/html/test_document_encoding.rb +145 -0
- data/test/html/test_document_fragment.rb +301 -0
- data/test/html/test_element_description.rb +105 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +212 -0
- data/test/html/test_node_encoding.rb +85 -0
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
- data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
- data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
- data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
- data/test/namespaces/test_namespaces_preservation.rb +31 -0
- data/test/test_convert_xpath.rb +2 -47
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +48 -0
- data/test/test_memory_leak.rb +156 -0
- data/test/test_nokogiri.rb +103 -1
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +293 -8
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +309 -8
- data/test/xml/sax/test_parser_context.rb +115 -0
- data/test/xml/sax/test_push_parser.rb +157 -0
- data/test/xml/test_attr.rb +67 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +327 -2
- data/test/xml/test_c14n.rb +180 -0
- data/test/xml/test_cdata.rb +32 -2
- data/test/xml/test_comment.rb +40 -0
- data/test/xml/test_document.rb +846 -35
- data/test/xml/test_document_encoding.rb +31 -0
- data/test/xml/test_document_fragment.rb +271 -0
- data/test/xml/test_dtd.rb +153 -9
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +251 -0
- data/test/xml/test_namespace.rb +96 -0
- data/test/xml/test_node.rb +1126 -105
- data/test/xml/test_node_attributes.rb +115 -0
- data/test/xml/test_node_encoding.rb +69 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +549 -0
- data/test/xml/test_node_set.rb +668 -9
- data/test/xml/test_parse_options.rb +64 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader.rb +589 -0
- data/test/xml/test_reader_encoding.rb +134 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +142 -0
- data/test/xml/test_syntax_error.rb +30 -0
- data/test/xml/test_text.rb +49 -2
- data/test/xml/test_unparented_node.rb +440 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +445 -0
- data/test/xslt/test_custom_functions.rb +133 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- data/test_all +107 -0
- metadata +459 -115
- data/History.txt +0 -6
- data/README.ja.txt +0 -86
- data/README.txt +0 -87
- data/ext/nokogiri/html_sax_parser.c +0 -32
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/ext/nokogiri/native.c +0 -40
- data/ext/nokogiri/native.h +0 -51
- data/ext/nokogiri/xml_xpath.c +0 -46
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/lib/nokogiri/css/generated_parser.rb +0 -653
- data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators.rb +0 -1
- data/lib/nokogiri/hpricot.rb +0 -47
- data/lib/nokogiri/xml/after_handler.rb +0 -18
- data/lib/nokogiri/xml/before_handler.rb +0 -32
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/entity_declaration.rb +0 -9
- data/nokogiri.gemspec +0 -34
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -7
- data/test/hpricot/test_alter.rb +0 -67
- data/test/hpricot/test_builder.rb +0 -27
- data/test/hpricot/test_parser.rb +0 -423
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -78
- data/test/hpricot/test_xml.rb +0 -30
- data/test/test_reader.rb +0 -222
data/ext/nokogiri/xml_node.h
CHANGED
@@ -1,15 +1,13 @@
|
|
1
1
|
#ifndef NOKOGIRI_XML_NODE
|
2
2
|
#define NOKOGIRI_XML_NODE
|
3
3
|
|
4
|
-
#include <
|
4
|
+
#include <nokogiri.h>
|
5
5
|
|
6
6
|
void init_xml_node();
|
7
|
-
VALUE Nokogiri_wrap_xml_node(xmlNodePtr root);
|
8
7
|
|
9
8
|
extern VALUE cNokogiriXmlNode ;
|
10
|
-
VALUE
|
9
|
+
extern VALUE cNokogiriXmlElement ;
|
10
|
+
|
11
|
+
VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node) ;
|
11
12
|
void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_hash) ;
|
12
|
-
void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) ;
|
13
|
-
int Nokogiri_xml_node_owned_get(xmlNodePtr node) ;
|
14
|
-
void Nokogiri_xml_node_owned_set(xmlNodePtr node) ;
|
15
13
|
#endif
|
data/ext/nokogiri/xml_node_set.c
CHANGED
@@ -1,5 +1,69 @@
|
|
1
1
|
#include <xml_node_set.h>
|
2
|
+
#include <xml_namespace.h>
|
2
3
|
#include <libxml/xpathInternals.h>
|
4
|
+
|
5
|
+
static ID decorate ;
|
6
|
+
static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val);
|
7
|
+
|
8
|
+
|
9
|
+
static void Check_Node_Set_Node_Type(VALUE node)
|
10
|
+
{
|
11
|
+
if (!(rb_obj_is_kind_of(node, cNokogiriXmlNode) ||
|
12
|
+
rb_obj_is_kind_of(node, cNokogiriXmlNamespace))) {
|
13
|
+
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node or Nokogiri::XML::Namespace");
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
|
18
|
+
static void deallocate(xmlNodeSetPtr node_set)
|
19
|
+
{
|
20
|
+
/*
|
21
|
+
*
|
22
|
+
* since xpath queries return copies of the xmlNs structs,
|
23
|
+
* xmlXPathFreeNodeSet() frees those xmlNs structs that are in the
|
24
|
+
* NodeSet.
|
25
|
+
*
|
26
|
+
* this is bad if someone is still trying to use the Namespace object wrapped
|
27
|
+
* around the xmlNs, so we need to avoid that.
|
28
|
+
*
|
29
|
+
* here we reproduce xmlXPathFreeNodeSet() without the xmlNs logic.
|
30
|
+
*
|
31
|
+
* this doesn't cause a leak because Namespace objects that are in an XPath
|
32
|
+
* query NodeSet are given their own lifecycle in
|
33
|
+
* Nokogiri_wrap_xml_namespace().
|
34
|
+
*/
|
35
|
+
NOKOGIRI_DEBUG_START(node_set) ;
|
36
|
+
if (node_set->nodeTab != NULL)
|
37
|
+
xmlFree(node_set->nodeTab);
|
38
|
+
|
39
|
+
xmlFree(node_set);
|
40
|
+
NOKOGIRI_DEBUG_END(node_set) ;
|
41
|
+
}
|
42
|
+
|
43
|
+
static VALUE allocate(VALUE klass)
|
44
|
+
{
|
45
|
+
return Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL), Qnil);
|
46
|
+
}
|
47
|
+
|
48
|
+
|
49
|
+
/*
|
50
|
+
* call-seq:
|
51
|
+
* dup
|
52
|
+
*
|
53
|
+
* Duplicate this node set
|
54
|
+
*/
|
55
|
+
static VALUE duplicate(VALUE self)
|
56
|
+
{
|
57
|
+
xmlNodeSetPtr node_set;
|
58
|
+
xmlNodeSetPtr dupl;
|
59
|
+
|
60
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
61
|
+
|
62
|
+
dupl = xmlXPathNodeSetMerge(NULL, node_set);
|
63
|
+
|
64
|
+
return Nokogiri_wrap_xml_node_set(dupl, rb_iv_get(self, "@document"));
|
65
|
+
}
|
66
|
+
|
3
67
|
/*
|
4
68
|
* call-seq:
|
5
69
|
* length
|
@@ -9,12 +73,10 @@
|
|
9
73
|
static VALUE length(VALUE self)
|
10
74
|
{
|
11
75
|
xmlNodeSetPtr node_set;
|
12
|
-
Data_Get_Struct(self, xmlNodeSet, node_set);
|
13
76
|
|
14
|
-
|
15
|
-
return INT2NUM(node_set->nodeNr);
|
77
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
16
78
|
|
17
|
-
return INT2NUM(0);
|
79
|
+
return node_set ? INT2NUM(node_set->nodeNr) : INT2NUM(0);
|
18
80
|
}
|
19
81
|
|
20
82
|
/*
|
@@ -28,97 +90,396 @@ static VALUE push(VALUE self, VALUE rb_node)
|
|
28
90
|
xmlNodeSetPtr node_set;
|
29
91
|
xmlNodePtr node;
|
30
92
|
|
93
|
+
Check_Node_Set_Node_Type(rb_node);
|
94
|
+
|
31
95
|
Data_Get_Struct(self, xmlNodeSet, node_set);
|
32
96
|
Data_Get_Struct(rb_node, xmlNode, node);
|
97
|
+
|
33
98
|
xmlXPathNodeSetAdd(node_set, node);
|
99
|
+
|
34
100
|
return self;
|
35
101
|
}
|
36
102
|
|
103
|
+
/*
|
104
|
+
* call-seq:
|
105
|
+
* delete(node)
|
106
|
+
*
|
107
|
+
* Delete +node+ from the Nodeset, if it is a member. Returns the deleted node
|
108
|
+
* if found, otherwise returns nil.
|
109
|
+
*/
|
110
|
+
static VALUE
|
111
|
+
delete(VALUE self, VALUE rb_node)
|
112
|
+
{
|
113
|
+
xmlNodeSetPtr node_set;
|
114
|
+
xmlNodePtr node;
|
115
|
+
|
116
|
+
Check_Node_Set_Node_Type(rb_node);
|
117
|
+
|
118
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
119
|
+
Data_Get_Struct(rb_node, xmlNode, node);
|
120
|
+
|
121
|
+
if (xmlXPathNodeSetContains(node_set, node)) {
|
122
|
+
xpath_node_set_del(node_set, node);
|
123
|
+
return rb_node;
|
124
|
+
}
|
125
|
+
return Qnil ;
|
126
|
+
}
|
127
|
+
|
128
|
+
|
129
|
+
/*
|
130
|
+
* call-seq:
|
131
|
+
* &(node_set)
|
132
|
+
*
|
133
|
+
* Set Intersection — Returns a new NodeSet containing nodes common to the two NodeSets.
|
134
|
+
*/
|
135
|
+
static VALUE intersection(VALUE self, VALUE rb_other)
|
136
|
+
{
|
137
|
+
xmlNodeSetPtr node_set, other ;
|
138
|
+
xmlNodeSetPtr intersection;
|
139
|
+
|
140
|
+
if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
|
141
|
+
rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
|
142
|
+
|
143
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
144
|
+
Data_Get_Struct(rb_other, xmlNodeSet, other);
|
145
|
+
|
146
|
+
intersection = xmlXPathIntersection(node_set, other);
|
147
|
+
return Nokogiri_wrap_xml_node_set(intersection, rb_iv_get(self, "@document"));
|
148
|
+
}
|
149
|
+
|
150
|
+
|
37
151
|
/*
|
38
152
|
* call-seq:
|
39
|
-
*
|
153
|
+
* include?(node)
|
40
154
|
*
|
41
|
-
*
|
155
|
+
* Returns true if any member of node set equals +node+.
|
42
156
|
*/
|
43
|
-
static VALUE
|
157
|
+
static VALUE include_eh(VALUE self, VALUE rb_node)
|
44
158
|
{
|
45
|
-
int i = NUM2INT(number);
|
46
159
|
xmlNodeSetPtr node_set;
|
160
|
+
xmlNodePtr node;
|
161
|
+
|
162
|
+
Check_Node_Set_Node_Type(rb_node);
|
163
|
+
|
164
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
165
|
+
Data_Get_Struct(rb_node, xmlNode, node);
|
166
|
+
|
167
|
+
return (xmlXPathNodeSetContains(node_set, node) ? Qtrue : Qfalse);
|
168
|
+
}
|
169
|
+
|
170
|
+
|
171
|
+
/*
|
172
|
+
* call-seq:
|
173
|
+
* |(node_set)
|
174
|
+
*
|
175
|
+
* Returns a new set built by merging the set and the elements of the given
|
176
|
+
* set.
|
177
|
+
*/
|
178
|
+
static VALUE set_union(VALUE self, VALUE rb_other)
|
179
|
+
{
|
180
|
+
xmlNodeSetPtr node_set, other;
|
181
|
+
xmlNodeSetPtr new;
|
182
|
+
|
183
|
+
if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
|
184
|
+
rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
|
185
|
+
|
47
186
|
Data_Get_Struct(self, xmlNodeSet, node_set);
|
187
|
+
Data_Get_Struct(rb_other, xmlNodeSet, other);
|
188
|
+
|
189
|
+
new = xmlXPathNodeSetMerge(NULL, node_set);
|
190
|
+
new = xmlXPathNodeSetMerge(new, other);
|
191
|
+
|
192
|
+
return Nokogiri_wrap_xml_node_set(new, rb_iv_get(self, "@document"));
|
193
|
+
}
|
194
|
+
|
195
|
+
/*
|
196
|
+
* call-seq:
|
197
|
+
* -(node_set)
|
198
|
+
*
|
199
|
+
* Difference - returns a new NodeSet that is a copy of this NodeSet, removing
|
200
|
+
* each item that also appears in +node_set+
|
201
|
+
*/
|
202
|
+
static VALUE minus(VALUE self, VALUE rb_other)
|
203
|
+
{
|
204
|
+
xmlNodeSetPtr node_set, other;
|
205
|
+
xmlNodeSetPtr new;
|
206
|
+
int j ;
|
48
207
|
|
49
|
-
if(
|
208
|
+
if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
|
209
|
+
rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
|
210
|
+
|
211
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
212
|
+
Data_Get_Struct(rb_other, xmlNodeSet, other);
|
213
|
+
|
214
|
+
new = xmlXPathNodeSetMerge(NULL, node_set);
|
215
|
+
for (j = 0 ; j < other->nodeNr ; ++j) {
|
216
|
+
xpath_node_set_del(new, other->nodeTab[j]);
|
217
|
+
}
|
218
|
+
|
219
|
+
return Nokogiri_wrap_xml_node_set(new, rb_iv_get(self, "@document"));
|
220
|
+
}
|
221
|
+
|
222
|
+
|
223
|
+
static VALUE index_at(VALUE self, long offset)
|
224
|
+
{
|
225
|
+
xmlNodeSetPtr node_set;
|
226
|
+
|
227
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
228
|
+
|
229
|
+
if (offset >= node_set->nodeNr || abs((int)offset) > node_set->nodeNr) {
|
50
230
|
return Qnil;
|
231
|
+
}
|
51
232
|
|
52
|
-
if(
|
53
|
-
i = i + node_set->nodeNr;
|
233
|
+
if (offset < 0) { offset += node_set->nodeNr ; }
|
54
234
|
|
55
|
-
return
|
235
|
+
return Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[offset], self);
|
56
236
|
}
|
57
237
|
|
58
|
-
static
|
238
|
+
static VALUE subseq(VALUE self, long beg, long len)
|
239
|
+
{
|
240
|
+
long j;
|
241
|
+
xmlNodeSetPtr node_set;
|
242
|
+
xmlNodeSetPtr new_set ;
|
243
|
+
|
244
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
245
|
+
|
246
|
+
if (beg > node_set->nodeNr) return Qnil ;
|
247
|
+
if (beg < 0 || len < 0) return Qnil ;
|
248
|
+
|
249
|
+
if ((beg + len) > node_set->nodeNr) {
|
250
|
+
len = node_set->nodeNr - beg ;
|
251
|
+
}
|
252
|
+
|
253
|
+
new_set = xmlXPathNodeSetCreate(NULL);
|
254
|
+
for (j = beg ; j < beg+len ; ++j) {
|
255
|
+
xmlXPathNodeSetAddUnique(new_set, node_set->nodeTab[j]);
|
256
|
+
}
|
257
|
+
return Nokogiri_wrap_xml_node_set(new_set, rb_iv_get(self, "@document"));
|
258
|
+
}
|
259
|
+
|
260
|
+
/*
|
261
|
+
* call-seq:
|
262
|
+
* [index] -> Node or nil
|
263
|
+
* [start, length] -> NodeSet or nil
|
264
|
+
* [range] -> NodeSet or nil
|
265
|
+
* slice(index) -> Node or nil
|
266
|
+
* slice(start, length) -> NodeSet or nil
|
267
|
+
* slice(range) -> NodeSet or nil
|
268
|
+
*
|
269
|
+
* Element reference - returns the node at +index+, or returns a NodeSet
|
270
|
+
* containing nodes starting at +start+ and continuing for +length+ elements, or
|
271
|
+
* returns a NodeSet containing nodes specified by +range+. Negative +indices+
|
272
|
+
* count backward from the end of the +node_set+ (-1 is the last node). Returns
|
273
|
+
* nil if the +index+ (or +start+) are out of range.
|
274
|
+
*/
|
275
|
+
static VALUE slice(int argc, VALUE *argv, VALUE self)
|
59
276
|
{
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
277
|
+
VALUE arg ;
|
278
|
+
long beg, len ;
|
279
|
+
xmlNodeSetPtr node_set;
|
280
|
+
|
281
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
282
|
+
|
283
|
+
if (argc == 2) {
|
284
|
+
beg = NUM2LONG(argv[0]);
|
285
|
+
len = NUM2LONG(argv[1]);
|
286
|
+
if (beg < 0) {
|
287
|
+
beg += node_set->nodeNr ;
|
64
288
|
}
|
289
|
+
return subseq(self, beg, len);
|
290
|
+
}
|
291
|
+
|
292
|
+
if (argc != 1) {
|
293
|
+
rb_scan_args(argc, argv, "11", NULL, NULL);
|
294
|
+
}
|
295
|
+
arg = argv[0];
|
296
|
+
|
297
|
+
if (FIXNUM_P(arg)) {
|
298
|
+
return index_at(self, FIX2LONG(arg));
|
299
|
+
}
|
300
|
+
|
301
|
+
/* if arg is Range */
|
302
|
+
switch (rb_range_beg_len(arg, &beg, &len, (long)node_set->nodeNr, 0)) {
|
303
|
+
case Qfalse:
|
304
|
+
break;
|
305
|
+
case Qnil:
|
306
|
+
return Qnil;
|
307
|
+
default:
|
308
|
+
return subseq(self, beg, len);
|
309
|
+
}
|
310
|
+
|
311
|
+
return index_at(self, NUM2LONG(arg));
|
65
312
|
}
|
66
313
|
|
67
|
-
|
314
|
+
|
315
|
+
/*
|
316
|
+
* call-seq:
|
317
|
+
* to_a
|
318
|
+
*
|
319
|
+
* Return this list as an Array
|
320
|
+
*/
|
321
|
+
static VALUE to_array(VALUE self, VALUE rb_node)
|
322
|
+
{
|
323
|
+
xmlNodeSetPtr node_set ;
|
324
|
+
VALUE list;
|
325
|
+
int i;
|
326
|
+
|
327
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
328
|
+
|
329
|
+
list = rb_ary_new2(node_set->nodeNr);
|
330
|
+
for(i = 0; i < node_set->nodeNr; i++) {
|
331
|
+
VALUE elt = Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[i], self);
|
332
|
+
rb_ary_push( list, elt );
|
333
|
+
}
|
334
|
+
|
335
|
+
return list;
|
336
|
+
}
|
337
|
+
|
338
|
+
/*
|
339
|
+
* call-seq:
|
340
|
+
* unlink
|
341
|
+
*
|
342
|
+
* Unlink this NodeSet and all Node objects it contains from their current context.
|
343
|
+
*/
|
344
|
+
static VALUE unlink_nodeset(VALUE self)
|
345
|
+
{
|
346
|
+
xmlNodeSetPtr node_set;
|
347
|
+
int j, nodeNr ;
|
348
|
+
|
349
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
350
|
+
|
351
|
+
nodeNr = node_set->nodeNr ;
|
352
|
+
for (j = 0 ; j < nodeNr ; j++) {
|
353
|
+
if (! Nokogiri_namespace_eh(node_set->nodeTab[j])) {
|
354
|
+
VALUE node ;
|
355
|
+
xmlNodePtr node_ptr;
|
356
|
+
node = Nokogiri_wrap_xml_node(Qnil, node_set->nodeTab[j]);
|
357
|
+
rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */
|
358
|
+
Data_Get_Struct(node, xmlNode, node_ptr);
|
359
|
+
node_set->nodeTab[j] = node_ptr ;
|
360
|
+
}
|
361
|
+
}
|
362
|
+
return self ;
|
363
|
+
}
|
364
|
+
|
365
|
+
|
366
|
+
static void reify_node_set_namespaces(VALUE self)
|
68
367
|
{
|
69
368
|
/*
|
70
|
-
*
|
71
|
-
*
|
72
|
-
* assumption lies in the operation where it dereferences nodeTab pointers
|
73
|
-
* while searching for namespace nodes to free.
|
74
|
-
*
|
75
|
-
* however, since Ruby's GC mechanism cannot guarantee the strict order in
|
76
|
-
* which ruby objects will be GC'd, nodes may be garbage collected before a
|
77
|
-
* nodeset containing pointers to those nodes. (this is true regardless of
|
78
|
-
* how we declare dependencies between objects with rb_gc_mark().)
|
369
|
+
* as mentioned in deallocate() above, xmlNs structs returned in an XPath
|
370
|
+
* NodeSet are duplicates, and we don't clean them up at deallocate() time.
|
79
371
|
*
|
80
|
-
* as a result,
|
81
|
-
*
|
372
|
+
* as a result, we need to make sure the Ruby manages this memory. we do this
|
373
|
+
* by forcing the creation of a Ruby object wrapped around the xmlNs.
|
82
374
|
*
|
83
|
-
*
|
84
|
-
*
|
375
|
+
* we also have to make sure that the NodeSet has a reference to the
|
376
|
+
* Namespace object, otherwise GC will kick in and the Namespace won't be
|
377
|
+
* marked.
|
85
378
|
*
|
86
|
-
*
|
87
|
-
*
|
88
|
-
* leak memory if we simply free the node set's memory directly. that's only
|
89
|
-
* quasi-evil!
|
90
|
-
*
|
91
|
-
* there's probably a lesson in here somewhere about intermingling, within a
|
92
|
-
* single array, structs with different memory-ownership semantics. or more
|
93
|
-
* generally, a lesson about building an API in C/C++ that does not contain
|
94
|
-
* assumptions about the strict order in which memory will be released. hey,
|
95
|
-
* that sounds like a great idea for a blog post! get to it!
|
96
|
-
*
|
97
|
-
* "In Valgrind We Trust." seriously.
|
379
|
+
* we *could* do this safely with *all* the nodes in the NodeSet, but we only
|
380
|
+
* *need* to do it for xmlNs structs, and so you get the code we have here.
|
98
381
|
*/
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
382
|
+
int j ;
|
383
|
+
xmlNodeSetPtr node_set ;
|
384
|
+
VALUE namespace_cache ;
|
385
|
+
|
386
|
+
Data_Get_Struct(self, xmlNodeSet, node_set);
|
387
|
+
|
388
|
+
namespace_cache = rb_iv_get(self, "@namespace_cache");
|
389
|
+
|
390
|
+
for (j = 0 ; j < node_set->nodeNr ; j++) {
|
391
|
+
if (Nokogiri_namespace_eh(node_set->nodeTab[j])) {
|
392
|
+
rb_ary_push(namespace_cache, Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[j], self));
|
393
|
+
}
|
394
|
+
}
|
104
395
|
}
|
105
396
|
|
106
|
-
|
397
|
+
|
398
|
+
VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document)
|
107
399
|
{
|
108
|
-
|
400
|
+
VALUE new_set ;
|
401
|
+
|
402
|
+
if (node_set == NULL) {
|
403
|
+
node_set = xmlXPathNodeSetCreate(NULL);
|
404
|
+
}
|
405
|
+
|
406
|
+
new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, 0, deallocate, node_set);
|
407
|
+
|
408
|
+
if (!NIL_P(document)) {
|
409
|
+
rb_iv_set(new_set, "@document", document);
|
410
|
+
rb_funcall(document, decorate, 1, new_set);
|
411
|
+
}
|
412
|
+
|
413
|
+
rb_iv_set(new_set, "@namespace_cache", rb_ary_new());
|
414
|
+
reify_node_set_namespaces(new_set);
|
415
|
+
|
416
|
+
return new_set ;
|
109
417
|
}
|
110
418
|
|
111
|
-
VALUE
|
419
|
+
VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set)
|
112
420
|
{
|
113
|
-
|
421
|
+
xmlDocPtr document ;
|
422
|
+
|
423
|
+
if (Nokogiri_namespace_eh(node)) {
|
424
|
+
Data_Get_Struct(rb_iv_get(node_set, "@document"), xmlDoc, document);
|
425
|
+
return Nokogiri_wrap_xml_namespace(document, (xmlNsPtr)node);
|
426
|
+
} else {
|
427
|
+
return Nokogiri_wrap_xml_node(Qnil, node);
|
428
|
+
}
|
114
429
|
}
|
115
430
|
|
431
|
+
|
432
|
+
static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
|
433
|
+
{
|
434
|
+
/*
|
435
|
+
* as mentioned a few times above, we do not want to free xmlNs structs
|
436
|
+
* outside of the Namespace lifecycle.
|
437
|
+
*
|
438
|
+
* xmlXPathNodeSetDel() frees xmlNs structs, and so here we reproduce that
|
439
|
+
* function with the xmlNs logic.
|
440
|
+
*/
|
441
|
+
int i;
|
442
|
+
|
443
|
+
if (cur == NULL) return;
|
444
|
+
if (val == NULL) return;
|
445
|
+
|
446
|
+
/*
|
447
|
+
* find node in nodeTab
|
448
|
+
*/
|
449
|
+
for (i = 0;i < cur->nodeNr;i++)
|
450
|
+
if (cur->nodeTab[i] == val) break;
|
451
|
+
|
452
|
+
if (i >= cur->nodeNr) { /* not found */
|
453
|
+
return;
|
454
|
+
}
|
455
|
+
cur->nodeNr--;
|
456
|
+
for (;i < cur->nodeNr;i++)
|
457
|
+
cur->nodeTab[i] = cur->nodeTab[i + 1];
|
458
|
+
cur->nodeTab[cur->nodeNr] = NULL;
|
459
|
+
}
|
460
|
+
|
461
|
+
|
116
462
|
VALUE cNokogiriXmlNodeSet ;
|
117
463
|
void init_xml_node_set(void)
|
118
464
|
{
|
119
|
-
VALUE
|
465
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
466
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
467
|
+
VALUE klass = rb_define_class_under(xml, "NodeSet", rb_cObject);
|
468
|
+
cNokogiriXmlNodeSet = klass;
|
469
|
+
|
120
470
|
rb_define_alloc_func(klass, allocate);
|
121
471
|
rb_define_method(klass, "length", length, 0);
|
122
|
-
rb_define_method(klass, "[]",
|
472
|
+
rb_define_method(klass, "[]", slice, -1);
|
473
|
+
rb_define_method(klass, "slice", slice, -1);
|
123
474
|
rb_define_method(klass, "push", push, 1);
|
475
|
+
rb_define_method(klass, "|", set_union, 1);
|
476
|
+
rb_define_method(klass, "-", minus, 1);
|
477
|
+
rb_define_method(klass, "unlink", unlink_nodeset, 0);
|
478
|
+
rb_define_method(klass, "to_a", to_array, 0);
|
479
|
+
rb_define_method(klass, "dup", duplicate, 0);
|
480
|
+
rb_define_method(klass, "delete", delete, 1);
|
481
|
+
rb_define_method(klass, "&", intersection, 1);
|
482
|
+
rb_define_method(klass, "include?", include_eh, 1);
|
483
|
+
|
484
|
+
decorate = rb_intern("decorate");
|
124
485
|
}
|
data/ext/nokogiri/xml_node_set.h
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
#ifndef NOKOGIRI_XML_NODE_SET
|
2
2
|
#define NOKOGIRI_XML_NODE_SET
|
3
3
|
|
4
|
-
#include <
|
4
|
+
#include <nokogiri.h>
|
5
5
|
void init_xml_node_set();
|
6
6
|
|
7
7
|
extern VALUE cNokogiriXmlNodeSet ;
|
8
|
-
VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set) ;
|
8
|
+
VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) ;
|
9
|
+
VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set) ;
|
10
|
+
VALUE Nokogiri_wrap_xml_node_set_namespace(xmlNsPtr node, VALUE node_set) ;
|
11
|
+
int Nokogiri_namespace_eh(xmlNodePtr node) ;
|
12
|
+
|
9
13
|
#endif
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#include <xml_processing_instruction.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* new(document, name, content)
|
6
|
+
*
|
7
|
+
* Create a new ProcessingInstruction element on the +document+ with +name+
|
8
|
+
* and +content+
|
9
|
+
*/
|
10
|
+
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
11
|
+
{
|
12
|
+
xmlDocPtr xml_doc;
|
13
|
+
xmlNodePtr node;
|
14
|
+
VALUE document;
|
15
|
+
VALUE name;
|
16
|
+
VALUE content;
|
17
|
+
VALUE rest;
|
18
|
+
VALUE rb_node;
|
19
|
+
|
20
|
+
rb_scan_args(argc, argv, "3*", &document, &name, &content, &rest);
|
21
|
+
|
22
|
+
Data_Get_Struct(document, xmlDoc, xml_doc);
|
23
|
+
|
24
|
+
node = xmlNewDocPI(
|
25
|
+
xml_doc,
|
26
|
+
(const xmlChar *)StringValueCStr(name),
|
27
|
+
(const xmlChar *)StringValueCStr(content)
|
28
|
+
);
|
29
|
+
|
30
|
+
nokogiri_root_node(node);
|
31
|
+
|
32
|
+
rb_node = Nokogiri_wrap_xml_node(klass, node);
|
33
|
+
rb_obj_call_init(rb_node, argc, argv);
|
34
|
+
|
35
|
+
if(rb_block_given_p()) rb_yield(rb_node);
|
36
|
+
|
37
|
+
return rb_node;
|
38
|
+
}
|
39
|
+
|
40
|
+
VALUE cNokogiriXmlProcessingInstruction;
|
41
|
+
void init_xml_processing_instruction()
|
42
|
+
{
|
43
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
44
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
45
|
+
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
46
|
+
|
47
|
+
/*
|
48
|
+
* ProcessingInstruction represents a ProcessingInstruction node in an xml
|
49
|
+
* document.
|
50
|
+
*/
|
51
|
+
VALUE klass = rb_define_class_under(xml, "ProcessingInstruction", node);
|
52
|
+
|
53
|
+
cNokogiriXmlProcessingInstruction = klass;
|
54
|
+
|
55
|
+
rb_define_singleton_method(klass, "new", new, -1);
|
56
|
+
}
|