superfeedr-nokogiri 1.4.0.20091116183308
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +330 -0
- data/CHANGELOG.rdoc +314 -0
- data/Manifest.txt +269 -0
- data/README.ja.rdoc +105 -0
- data/README.rdoc +118 -0
- data/Rakefile +244 -0
- data/bin/nokogiri +49 -0
- data/ext/nokogiri/extconf.rb +145 -0
- data/ext/nokogiri/html_document.c +145 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +92 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +89 -0
- data/ext/nokogiri/nokogiri.h +145 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +67 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +54 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +52 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +388 -0
- data/ext/nokogiri/xml_document.h +24 -0
- data/ext/nokogiri/xml_document_fragment.c +46 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +192 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_entity_decl.c +97 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +31 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_namespace.c +74 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +1060 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +397 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +593 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +159 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +286 -0
- data/ext/nokogiri/xml_sax_parser.h +43 -0
- data/ext/nokogiri/xml_sax_parser_context.c +155 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +114 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +156 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +261 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +239 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +116 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +646 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +162 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +356 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +135 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +444 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +227 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
- data/lib/nokogiri/ffi/xml/schema.rb +92 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/html.rb +35 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +88 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +48 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +33 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +405 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +131 -0
- data/lib/nokogiri/xml/document_fragment.rb +69 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +15 -0
- data/lib/nokogiri/xml/fragment_handler.rb +71 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +665 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +307 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +85 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +74 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +160 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +61 -0
- data/lib/nokogiri/xml/syntax_error.rb +38 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +71 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +183 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +136 -0
- data/test/html/sax/test_parser.rb +64 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +390 -0
- data/test/html/test_document_encoding.rb +77 -0
- data/test/html/test_document_fragment.rb +132 -0
- data/test/html/test_element_description.rb +94 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +228 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +134 -0
- data/test/test_reader.rb +358 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +307 -0
- data/test/xml/sax/test_parser_context.rb +56 -0
- data/test/xml/sax/test_push_parser.rb +131 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +167 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +607 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +138 -0
- data/test/xml/test_dtd.rb +82 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +83 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +889 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_set.rb +531 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +89 -0
- data/test/xml/test_syntax_error.rb +27 -0
- data/test/xml/test_text.rb +30 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +430 -0
@@ -0,0 +1,388 @@
|
|
1
|
+
#include <xml_document.h>
|
2
|
+
|
3
|
+
static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
|
4
|
+
{
|
5
|
+
switch(node->type) {
|
6
|
+
case XML_ATTRIBUTE_NODE:
|
7
|
+
xmlFreePropList((xmlAttrPtr)node);
|
8
|
+
break;
|
9
|
+
default:
|
10
|
+
if(node->parent == NULL) {
|
11
|
+
xmlAddChild((xmlNodePtr)doc, node);
|
12
|
+
}
|
13
|
+
}
|
14
|
+
return ST_CONTINUE;
|
15
|
+
}
|
16
|
+
|
17
|
+
static void dealloc(xmlDocPtr doc)
|
18
|
+
{
|
19
|
+
NOKOGIRI_DEBUG_START(doc);
|
20
|
+
|
21
|
+
st_table *node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
22
|
+
|
23
|
+
xmlDeregisterNodeFunc func = xmlDeregisterNodeDefault(NULL);
|
24
|
+
|
25
|
+
st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
|
26
|
+
st_free_table(node_hash);
|
27
|
+
|
28
|
+
free(doc->_private);
|
29
|
+
doc->_private = NULL;
|
30
|
+
xmlFreeDoc(doc);
|
31
|
+
|
32
|
+
xmlDeregisterNodeDefault(func);
|
33
|
+
|
34
|
+
NOKOGIRI_DEBUG_END(doc);
|
35
|
+
}
|
36
|
+
|
37
|
+
static void recursively_remove_namespaces_from_node(xmlNodePtr node)
|
38
|
+
{
|
39
|
+
xmlNodePtr child ;
|
40
|
+
|
41
|
+
xmlSetNs(node, NULL);
|
42
|
+
|
43
|
+
for (child = node->children ; child ; child = child->next)
|
44
|
+
recursively_remove_namespaces_from_node(child);
|
45
|
+
}
|
46
|
+
|
47
|
+
/*
|
48
|
+
* call-seq:
|
49
|
+
* url
|
50
|
+
*
|
51
|
+
* Get the url name for this document.
|
52
|
+
*/
|
53
|
+
static VALUE url(VALUE self)
|
54
|
+
{
|
55
|
+
xmlDocPtr doc;
|
56
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
57
|
+
|
58
|
+
if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
|
59
|
+
|
60
|
+
return Qnil;
|
61
|
+
}
|
62
|
+
|
63
|
+
/*
|
64
|
+
* call-seq:
|
65
|
+
* root=
|
66
|
+
*
|
67
|
+
* Set the root element on this document
|
68
|
+
*/
|
69
|
+
static VALUE set_root(VALUE self, VALUE root)
|
70
|
+
{
|
71
|
+
xmlDocPtr doc;
|
72
|
+
xmlNodePtr new_root;
|
73
|
+
|
74
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
75
|
+
Data_Get_Struct(root, xmlNode, new_root);
|
76
|
+
|
77
|
+
xmlNodePtr old_root = NULL;
|
78
|
+
|
79
|
+
/* If the new root's document is not the same as the current document,
|
80
|
+
* then we need to dup the node in to this document. */
|
81
|
+
if(new_root->doc != doc) {
|
82
|
+
old_root = xmlDocGetRootElement(doc);
|
83
|
+
if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
|
84
|
+
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
xmlDocSetRootElement(doc, new_root);
|
89
|
+
if(old_root) NOKOGIRI_ROOT_NODE(old_root);
|
90
|
+
return root;
|
91
|
+
}
|
92
|
+
|
93
|
+
/*
|
94
|
+
* call-seq:
|
95
|
+
* root
|
96
|
+
*
|
97
|
+
* Get the root node for this document.
|
98
|
+
*/
|
99
|
+
static VALUE root(VALUE self)
|
100
|
+
{
|
101
|
+
xmlDocPtr doc;
|
102
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
103
|
+
|
104
|
+
xmlNodePtr root = xmlDocGetRootElement(doc);
|
105
|
+
|
106
|
+
if(!root) return Qnil;
|
107
|
+
return Nokogiri_wrap_xml_node(Qnil, root) ;
|
108
|
+
}
|
109
|
+
|
110
|
+
/*
|
111
|
+
* call-seq:
|
112
|
+
* encoding= encoding
|
113
|
+
*
|
114
|
+
* Set the encoding string for this Document
|
115
|
+
*/
|
116
|
+
static VALUE set_encoding(VALUE self, VALUE encoding)
|
117
|
+
{
|
118
|
+
xmlDocPtr doc;
|
119
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
120
|
+
|
121
|
+
doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding));
|
122
|
+
|
123
|
+
return encoding;
|
124
|
+
}
|
125
|
+
|
126
|
+
/*
|
127
|
+
* call-seq:
|
128
|
+
* encoding
|
129
|
+
*
|
130
|
+
* Get the encoding for this Document
|
131
|
+
*/
|
132
|
+
static VALUE encoding(VALUE self)
|
133
|
+
{
|
134
|
+
xmlDocPtr doc;
|
135
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
136
|
+
|
137
|
+
if(!doc->encoding) return Qnil;
|
138
|
+
return NOKOGIRI_STR_NEW2(doc->encoding);
|
139
|
+
}
|
140
|
+
|
141
|
+
/*
|
142
|
+
* call-seq:
|
143
|
+
* version
|
144
|
+
*
|
145
|
+
* Get the XML version for this Document
|
146
|
+
*/
|
147
|
+
static VALUE version(VALUE self)
|
148
|
+
{
|
149
|
+
xmlDocPtr doc;
|
150
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
151
|
+
|
152
|
+
if(!doc->version) return Qnil;
|
153
|
+
return NOKOGIRI_STR_NEW2(doc->version);
|
154
|
+
}
|
155
|
+
|
156
|
+
/*
|
157
|
+
* call-seq:
|
158
|
+
* read_io(io, url, encoding, options)
|
159
|
+
*
|
160
|
+
* Create a new document from an IO object
|
161
|
+
*/
|
162
|
+
static VALUE read_io( VALUE klass,
|
163
|
+
VALUE io,
|
164
|
+
VALUE url,
|
165
|
+
VALUE encoding,
|
166
|
+
VALUE options )
|
167
|
+
{
|
168
|
+
const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
|
169
|
+
const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
|
170
|
+
VALUE error_list = rb_ary_new();
|
171
|
+
|
172
|
+
xmlResetLastError();
|
173
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
174
|
+
|
175
|
+
xmlDocPtr doc = xmlReadIO(
|
176
|
+
(xmlInputReadCallback)io_read_callback,
|
177
|
+
(xmlInputCloseCallback)io_close_callback,
|
178
|
+
(void *)io,
|
179
|
+
c_url,
|
180
|
+
c_enc,
|
181
|
+
(int)NUM2INT(options)
|
182
|
+
);
|
183
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
184
|
+
|
185
|
+
if(doc == NULL) {
|
186
|
+
xmlFreeDoc(doc);
|
187
|
+
|
188
|
+
xmlErrorPtr error = xmlGetLastError();
|
189
|
+
if(error)
|
190
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
|
191
|
+
else
|
192
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
193
|
+
|
194
|
+
return Qnil;
|
195
|
+
}
|
196
|
+
|
197
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
198
|
+
rb_iv_set(document, "@errors", error_list);
|
199
|
+
return document;
|
200
|
+
}
|
201
|
+
|
202
|
+
/*
|
203
|
+
* call-seq:
|
204
|
+
* read_memory(string, url, encoding, options)
|
205
|
+
*
|
206
|
+
* Create a new document from a String
|
207
|
+
*/
|
208
|
+
static VALUE read_memory( VALUE klass,
|
209
|
+
VALUE string,
|
210
|
+
VALUE url,
|
211
|
+
VALUE encoding,
|
212
|
+
VALUE options )
|
213
|
+
{
|
214
|
+
const char * c_buffer = StringValuePtr(string);
|
215
|
+
const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
|
216
|
+
const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
|
217
|
+
int len = RSTRING_LEN(string);
|
218
|
+
VALUE error_list = rb_ary_new();
|
219
|
+
|
220
|
+
xmlResetLastError();
|
221
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
222
|
+
xmlDocPtr doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
|
223
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
224
|
+
|
225
|
+
if(doc == NULL) {
|
226
|
+
xmlFreeDoc(doc);
|
227
|
+
|
228
|
+
xmlErrorPtr error = xmlGetLastError();
|
229
|
+
if(error)
|
230
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
|
231
|
+
else
|
232
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
233
|
+
|
234
|
+
return Qnil;
|
235
|
+
}
|
236
|
+
|
237
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
238
|
+
rb_iv_set(document, "@errors", error_list);
|
239
|
+
return document;
|
240
|
+
}
|
241
|
+
|
242
|
+
/*
|
243
|
+
* call-seq:
|
244
|
+
* dup
|
245
|
+
*
|
246
|
+
* Copy this Document. An optional depth may be passed in, but it defaults
|
247
|
+
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
|
248
|
+
*/
|
249
|
+
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
|
250
|
+
{
|
251
|
+
VALUE level;
|
252
|
+
|
253
|
+
if(rb_scan_args(argc, argv, "01", &level) == 0)
|
254
|
+
level = INT2NUM((long)1);
|
255
|
+
|
256
|
+
xmlDocPtr doc, dup;
|
257
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
258
|
+
|
259
|
+
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
260
|
+
if(dup == NULL) return Qnil;
|
261
|
+
|
262
|
+
dup->type = doc->type;
|
263
|
+
return Nokogiri_wrap_xml_document(RBASIC(self)->klass, dup);
|
264
|
+
}
|
265
|
+
|
266
|
+
/*
|
267
|
+
* call-seq:
|
268
|
+
* new(version = default)
|
269
|
+
*
|
270
|
+
* Create a new document with +version+ (defaults to "1.0")
|
271
|
+
*/
|
272
|
+
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
273
|
+
{
|
274
|
+
VALUE version, rest, rb_doc ;
|
275
|
+
|
276
|
+
rb_scan_args(argc, argv, "0*", &rest);
|
277
|
+
version = rb_ary_entry(rest, (long)0);
|
278
|
+
if (NIL_P(Qnil)) {
|
279
|
+
version = rb_str_new2("1.0");
|
280
|
+
}
|
281
|
+
|
282
|
+
xmlDocPtr doc = xmlNewDoc((xmlChar *)StringValuePtr(version));
|
283
|
+
rb_doc = Nokogiri_wrap_xml_document(klass, doc);
|
284
|
+
rb_obj_call_init(rb_doc, argc, argv);
|
285
|
+
return rb_doc ;
|
286
|
+
}
|
287
|
+
|
288
|
+
/*
|
289
|
+
* call-seq:
|
290
|
+
* remove_namespaces!
|
291
|
+
*
|
292
|
+
* Remove all namespaces from all nodes in the document.
|
293
|
+
*
|
294
|
+
* This could be useful for developers who either don't understand namespaces
|
295
|
+
* or don't care about them.
|
296
|
+
*
|
297
|
+
* The following example shows a use case, and you can decide for yourself
|
298
|
+
* whether this is a good thing or not:
|
299
|
+
*
|
300
|
+
* doc = Nokogiri::XML <<-EOXML
|
301
|
+
* <root>
|
302
|
+
* <car xmlns:part="http://general-motors.com/">
|
303
|
+
* <part:tire>Michelin Model XGV</part:tire>
|
304
|
+
* </car>
|
305
|
+
* <bicycle xmlns:part="http://schwinn.com/">
|
306
|
+
* <part:tire>I'm a bicycle tire!</part:tire>
|
307
|
+
* </bicycle>
|
308
|
+
* </root>
|
309
|
+
* EOXML
|
310
|
+
*
|
311
|
+
* doc.xpath("//tire").to_s # => ""
|
312
|
+
* doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
|
313
|
+
* doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
|
314
|
+
*
|
315
|
+
* doc.remove_namespaces!
|
316
|
+
*
|
317
|
+
* doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
|
318
|
+
* doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
|
319
|
+
* doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
|
320
|
+
*
|
321
|
+
* For more information on why this probably is *not* a good thing in general,
|
322
|
+
* please direct your browser to
|
323
|
+
* http://tenderlovemaking.com/2009/04/23/namespaces-in-xml/
|
324
|
+
*/
|
325
|
+
VALUE remove_namespaces_bang(VALUE self)
|
326
|
+
{
|
327
|
+
xmlDocPtr doc ;
|
328
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
329
|
+
|
330
|
+
recursively_remove_namespaces_from_node(doc);
|
331
|
+
return self;
|
332
|
+
}
|
333
|
+
|
334
|
+
|
335
|
+
VALUE cNokogiriXmlDocument ;
|
336
|
+
void init_xml_document()
|
337
|
+
{
|
338
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
339
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
340
|
+
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
341
|
+
|
342
|
+
/*
|
343
|
+
* Nokogiri::XML::Document wraps an xml document.
|
344
|
+
*/
|
345
|
+
VALUE klass = rb_define_class_under(xml, "Document", node);
|
346
|
+
|
347
|
+
cNokogiriXmlDocument = klass;
|
348
|
+
|
349
|
+
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
|
350
|
+
rb_define_singleton_method(klass, "read_io", read_io, 4);
|
351
|
+
rb_define_singleton_method(klass, "new", new, -1);
|
352
|
+
|
353
|
+
rb_define_method(klass, "root", root, 0);
|
354
|
+
rb_define_method(klass, "root=", set_root, 1);
|
355
|
+
rb_define_method(klass, "encoding", encoding, 0);
|
356
|
+
rb_define_method(klass, "encoding=", set_encoding, 1);
|
357
|
+
rb_define_method(klass, "version", version, 0);
|
358
|
+
rb_define_method(klass, "dup", duplicate_node, -1);
|
359
|
+
rb_define_method(klass, "url", url, 0);
|
360
|
+
rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
|
361
|
+
}
|
362
|
+
|
363
|
+
|
364
|
+
/* this takes klass as a param because it's used for HtmlDocument, too. */
|
365
|
+
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
|
366
|
+
{
|
367
|
+
nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
|
368
|
+
|
369
|
+
VALUE rb_doc = Data_Wrap_Struct(
|
370
|
+
klass ? klass : cNokogiriXmlDocument,
|
371
|
+
0,
|
372
|
+
dealloc,
|
373
|
+
doc
|
374
|
+
);
|
375
|
+
|
376
|
+
VALUE cache = rb_ary_new();
|
377
|
+
rb_iv_set(rb_doc, "@decorators", Qnil);
|
378
|
+
rb_iv_set(rb_doc, "@node_cache", cache);
|
379
|
+
|
380
|
+
tuple->doc = (void *)rb_doc;
|
381
|
+
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
382
|
+
tuple->node_cache = cache;
|
383
|
+
doc->_private = tuple ;
|
384
|
+
|
385
|
+
rb_obj_call_init(rb_doc, 0, NULL);
|
386
|
+
|
387
|
+
return rb_doc ;
|
388
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef NOKOGIRI_XML_DOCUMENT
|
2
|
+
#define NOKOGIRI_XML_DOCUMENT
|
3
|
+
|
4
|
+
#include <nokogiri.h>
|
5
|
+
#include "st.h"
|
6
|
+
|
7
|
+
struct _nokogiriTuple {
|
8
|
+
xmlDocPtr doc;
|
9
|
+
st_table *unlinkedNodes;
|
10
|
+
VALUE node_cache;
|
11
|
+
};
|
12
|
+
typedef struct _nokogiriTuple nokogiriTuple;
|
13
|
+
typedef nokogiriTuple * nokogiriTuplePtr;
|
14
|
+
|
15
|
+
void init_xml_document();
|
16
|
+
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
|
17
|
+
|
18
|
+
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
|
19
|
+
#define DOC_RUBY_OBJECT(x) ((VALUE)((nokogiriTuplePtr)(x->_private))->doc)
|
20
|
+
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
|
21
|
+
#define DOC_NODE_CACHE(x) ((VALUE)((nokogiriTuplePtr)(x->_private))->node_cache)
|
22
|
+
|
23
|
+
extern VALUE cNokogiriXmlDocument ;
|
24
|
+
#endif
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#include <xml_document_fragment.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* new(document)
|
6
|
+
*
|
7
|
+
* Create a new DocumentFragment element on the +document+
|
8
|
+
*/
|
9
|
+
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
10
|
+
{
|
11
|
+
xmlDocPtr xml_doc;
|
12
|
+
VALUE document;
|
13
|
+
VALUE rest;
|
14
|
+
|
15
|
+
rb_scan_args(argc, argv, "1*", &document, &rest);
|
16
|
+
|
17
|
+
Data_Get_Struct(document, xmlDoc, xml_doc);
|
18
|
+
|
19
|
+
xmlNodePtr node = xmlNewDocFragment(xml_doc->doc);
|
20
|
+
|
21
|
+
NOKOGIRI_ROOT_NODE(node);
|
22
|
+
|
23
|
+
VALUE rb_node = Nokogiri_wrap_xml_node(klass, node);
|
24
|
+
rb_obj_call_init(rb_node, argc, argv);
|
25
|
+
|
26
|
+
if(rb_block_given_p()) rb_yield(rb_node);
|
27
|
+
|
28
|
+
return rb_node;
|
29
|
+
}
|
30
|
+
|
31
|
+
VALUE cNokogiriXmlDocumentFragment;
|
32
|
+
void init_xml_document_fragment()
|
33
|
+
{
|
34
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
35
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
36
|
+
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
37
|
+
|
38
|
+
/*
|
39
|
+
* DocumentFragment represents a DocumentFragment node in an xml document.
|
40
|
+
*/
|
41
|
+
VALUE klass = rb_define_class_under(xml, "DocumentFragment", node);
|
42
|
+
|
43
|
+
cNokogiriXmlDocumentFragment = klass;
|
44
|
+
|
45
|
+
rb_define_singleton_method(klass, "new", new, -1);
|
46
|
+
}
|