nokogiri 1.14.0.rc1-arm-linux
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +287 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +41 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1082 -0
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +114 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
- data/ext/nokogiri/include/libxslt/attributes.h +38 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +259 -0
- data/ext/nokogiri/nokogiri.h +235 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +57 -0
- data/ext/nokogiri/xml_comment.c +62 -0
- data/ext/nokogiri/xml_document.c +689 -0
- data/ext/nokogiri/xml_document_fragment.c +44 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +128 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +104 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +186 -0
- data/ext/nokogiri/xml_node.c +2425 -0
- data/ext/nokogiri/xml_node_set.c +496 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +794 -0
- data/ext/nokogiri/xml_relax_ng.c +183 -0
- data/ext/nokogiri/xml_sax_parser.c +316 -0
- data/ext/nokogiri/xml_sax_parser_context.c +283 -0
- data/ext/nokogiri/xml_sax_push_parser.c +166 -0
- data/ext/nokogiri/xml_schema.c +282 -0
- data/ext/nokogiri/xml_syntax_error.c +85 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_xpath_context.c +413 -0
- data/ext/nokogiri/xslt_stylesheet.c +363 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +54 -0
- data/lib/nokogiri/css/parser.rb +770 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +96 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +56 -0
- data/lib/nokogiri/css/xpath_visitor.rb +359 -0
- data/lib/nokogiri/css.rb +66 -0
- data/lib/nokogiri/decorators/slop.rb +44 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +63 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +98 -0
- data/lib/nokogiri/html5.rb +389 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +20 -0
- data/lib/nokogiri/xml/builder.rb +487 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +471 -0
- data/lib/nokogiri/xml/document_fragment.rb +205 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +38 -0
- data/lib/nokogiri/xml/element_decl.rb +15 -0
- data/lib/nokogiri/xml/entity_decl.rb +21 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +58 -0
- data/lib/nokogiri/xml/node/save_options.rb +68 -0
- data/lib/nokogiri/xml/node.rb +1563 -0
- data/lib/nokogiri/xml/node_set.rb +446 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +57 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +105 -0
- data/lib/nokogiri/xml/relax_ng.rb +38 -0
- data/lib/nokogiri/xml/sax/document.rb +167 -0
- data/lib/nokogiri/xml/sax/parser.rb +125 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
- data/lib/nokogiri/xml/sax.rb +6 -0
- data/lib/nokogiri/xml/schema.rb +73 -0
- data/lib/nokogiri/xml/searchable.rb +270 -0
- data/lib/nokogiri/xml/syntax_error.rb +72 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xml.rb +76 -0
- data/lib/nokogiri/xslt/stylesheet.rb +27 -0
- data/lib/nokogiri/xslt.rb +65 -0
- data/lib/nokogiri.rb +120 -0
- data/lib/xsd/xmlparser/nokogiri.rb +104 -0
- metadata +317 -0
@@ -0,0 +1,689 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlDocument ;
|
4
|
+
|
5
|
+
static int
|
6
|
+
dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
|
7
|
+
{
|
8
|
+
switch (node->type) {
|
9
|
+
case XML_ATTRIBUTE_NODE:
|
10
|
+
xmlFreePropList((xmlAttrPtr)node);
|
11
|
+
break;
|
12
|
+
case XML_NAMESPACE_DECL:
|
13
|
+
xmlFreeNs((xmlNsPtr)node);
|
14
|
+
break;
|
15
|
+
case XML_DTD_NODE:
|
16
|
+
xmlFreeDtd((xmlDtdPtr)node);
|
17
|
+
break;
|
18
|
+
default:
|
19
|
+
if (node->parent == NULL) {
|
20
|
+
xmlAddChild((xmlNodePtr)doc, node);
|
21
|
+
}
|
22
|
+
}
|
23
|
+
return ST_CONTINUE;
|
24
|
+
}
|
25
|
+
|
26
|
+
static int
|
27
|
+
dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
|
28
|
+
{
|
29
|
+
return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
|
30
|
+
}
|
31
|
+
|
32
|
+
static void
|
33
|
+
remove_private(xmlNodePtr node)
|
34
|
+
{
|
35
|
+
xmlNodePtr child;
|
36
|
+
|
37
|
+
for (child = node->children; child; child = child->next) {
|
38
|
+
remove_private(child);
|
39
|
+
}
|
40
|
+
|
41
|
+
if ((node->type == XML_ELEMENT_NODE ||
|
42
|
+
node->type == XML_XINCLUDE_START ||
|
43
|
+
node->type == XML_XINCLUDE_END) &&
|
44
|
+
node->properties) {
|
45
|
+
for (child = (xmlNodePtr)node->properties; child; child = child->next) {
|
46
|
+
remove_private(child);
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
node->_private = NULL;
|
51
|
+
}
|
52
|
+
|
53
|
+
static void
|
54
|
+
mark(xmlDocPtr doc)
|
55
|
+
{
|
56
|
+
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
|
57
|
+
if (tuple) {
|
58
|
+
rb_gc_mark(tuple->doc);
|
59
|
+
rb_gc_mark(tuple->node_cache);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
static void
|
64
|
+
dealloc(xmlDocPtr doc)
|
65
|
+
{
|
66
|
+
st_table *node_hash;
|
67
|
+
|
68
|
+
node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
69
|
+
|
70
|
+
st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
|
71
|
+
st_free_table(node_hash);
|
72
|
+
|
73
|
+
ruby_xfree(doc->_private);
|
74
|
+
|
75
|
+
/* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
|
76
|
+
* have their _private pointers cleared. This is to avoid libxml-ruby's
|
77
|
+
* xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
|
78
|
+
* free context, which can result in segfaults.
|
79
|
+
*/
|
80
|
+
if (xmlDeregisterNodeDefaultValue) {
|
81
|
+
remove_private((xmlNodePtr)doc);
|
82
|
+
}
|
83
|
+
|
84
|
+
xmlFreeDoc(doc);
|
85
|
+
}
|
86
|
+
|
87
|
+
static void
|
88
|
+
recursively_remove_namespaces_from_node(xmlNodePtr node)
|
89
|
+
{
|
90
|
+
xmlNodePtr child ;
|
91
|
+
xmlAttrPtr property ;
|
92
|
+
|
93
|
+
xmlSetNs(node, NULL);
|
94
|
+
|
95
|
+
for (child = node->children ; child ; child = child->next) {
|
96
|
+
recursively_remove_namespaces_from_node(child);
|
97
|
+
}
|
98
|
+
|
99
|
+
if (((node->type == XML_ELEMENT_NODE) ||
|
100
|
+
(node->type == XML_XINCLUDE_START) ||
|
101
|
+
(node->type == XML_XINCLUDE_END)) &&
|
102
|
+
node->nsDef) {
|
103
|
+
xmlNsPtr curr = node->nsDef;
|
104
|
+
while (curr) {
|
105
|
+
noko_xml_document_pin_namespace(curr, node->doc);
|
106
|
+
curr = curr->next;
|
107
|
+
}
|
108
|
+
node->nsDef = NULL;
|
109
|
+
}
|
110
|
+
|
111
|
+
if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
|
112
|
+
property = node->properties ;
|
113
|
+
while (property != NULL) {
|
114
|
+
if (property->ns) { property->ns = NULL ; }
|
115
|
+
property = property->next ;
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
/*
|
121
|
+
* call-seq:
|
122
|
+
* url
|
123
|
+
*
|
124
|
+
* Get the url name for this document.
|
125
|
+
*/
|
126
|
+
static VALUE
|
127
|
+
url(VALUE self)
|
128
|
+
{
|
129
|
+
xmlDocPtr doc;
|
130
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
131
|
+
|
132
|
+
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
|
133
|
+
|
134
|
+
return Qnil;
|
135
|
+
}
|
136
|
+
|
137
|
+
/*
|
138
|
+
* call-seq:
|
139
|
+
* root=
|
140
|
+
*
|
141
|
+
* Set the root element on this document
|
142
|
+
*/
|
143
|
+
static VALUE
|
144
|
+
rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
145
|
+
{
|
146
|
+
xmlDocPtr c_document;
|
147
|
+
xmlNodePtr c_new_root = NULL, c_current_root;
|
148
|
+
|
149
|
+
Data_Get_Struct(self, xmlDoc, c_document);
|
150
|
+
|
151
|
+
c_current_root = xmlDocGetRootElement(c_document);
|
152
|
+
if (c_current_root) {
|
153
|
+
xmlUnlinkNode(c_current_root);
|
154
|
+
noko_xml_document_pin_node(c_current_root);
|
155
|
+
}
|
156
|
+
|
157
|
+
if (!NIL_P(rb_new_root)) {
|
158
|
+
if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
|
159
|
+
rb_raise(rb_eArgError,
|
160
|
+
"expected Nokogiri::XML::Node but received %"PRIsVALUE,
|
161
|
+
rb_obj_class(rb_new_root));
|
162
|
+
}
|
163
|
+
|
164
|
+
Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
|
165
|
+
|
166
|
+
/* If the new root's document is not the same as the current document,
|
167
|
+
* then we need to dup the node in to this document. */
|
168
|
+
if (c_new_root->doc != c_document) {
|
169
|
+
c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
|
170
|
+
if (!c_new_root) {
|
171
|
+
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
|
172
|
+
}
|
173
|
+
}
|
174
|
+
}
|
175
|
+
|
176
|
+
xmlDocSetRootElement(c_document, c_new_root);
|
177
|
+
|
178
|
+
return rb_new_root;
|
179
|
+
}
|
180
|
+
|
181
|
+
/*
|
182
|
+
* call-seq:
|
183
|
+
* root
|
184
|
+
*
|
185
|
+
* Get the root node for this document.
|
186
|
+
*/
|
187
|
+
static VALUE
|
188
|
+
rb_xml_document_root(VALUE self)
|
189
|
+
{
|
190
|
+
xmlDocPtr c_document;
|
191
|
+
xmlNodePtr c_root;
|
192
|
+
|
193
|
+
Data_Get_Struct(self, xmlDoc, c_document);
|
194
|
+
|
195
|
+
c_root = xmlDocGetRootElement(c_document);
|
196
|
+
if (!c_root) {
|
197
|
+
return Qnil;
|
198
|
+
}
|
199
|
+
|
200
|
+
return noko_xml_node_wrap(Qnil, c_root) ;
|
201
|
+
}
|
202
|
+
|
203
|
+
/*
|
204
|
+
* call-seq:
|
205
|
+
* encoding= encoding
|
206
|
+
*
|
207
|
+
* Set the encoding string for this Document
|
208
|
+
*/
|
209
|
+
static VALUE
|
210
|
+
set_encoding(VALUE self, VALUE encoding)
|
211
|
+
{
|
212
|
+
xmlDocPtr doc;
|
213
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
214
|
+
|
215
|
+
if (doc->encoding) {
|
216
|
+
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
217
|
+
}
|
218
|
+
|
219
|
+
doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
|
220
|
+
|
221
|
+
return encoding;
|
222
|
+
}
|
223
|
+
|
224
|
+
/*
|
225
|
+
* call-seq:
|
226
|
+
* encoding
|
227
|
+
*
|
228
|
+
* Get the encoding for this Document
|
229
|
+
*/
|
230
|
+
static VALUE
|
231
|
+
encoding(VALUE self)
|
232
|
+
{
|
233
|
+
xmlDocPtr doc;
|
234
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
235
|
+
|
236
|
+
if (!doc->encoding) { return Qnil; }
|
237
|
+
return NOKOGIRI_STR_NEW2(doc->encoding);
|
238
|
+
}
|
239
|
+
|
240
|
+
/*
|
241
|
+
* call-seq:
|
242
|
+
* version
|
243
|
+
*
|
244
|
+
* Get the XML version for this Document
|
245
|
+
*/
|
246
|
+
static VALUE
|
247
|
+
version(VALUE self)
|
248
|
+
{
|
249
|
+
xmlDocPtr doc;
|
250
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
251
|
+
|
252
|
+
if (!doc->version) { return Qnil; }
|
253
|
+
return NOKOGIRI_STR_NEW2(doc->version);
|
254
|
+
}
|
255
|
+
|
256
|
+
/*
|
257
|
+
* call-seq:
|
258
|
+
* read_io(io, url, encoding, options)
|
259
|
+
*
|
260
|
+
* Create a new document from an IO object
|
261
|
+
*/
|
262
|
+
static VALUE
|
263
|
+
read_io(VALUE klass,
|
264
|
+
VALUE io,
|
265
|
+
VALUE url,
|
266
|
+
VALUE encoding,
|
267
|
+
VALUE options)
|
268
|
+
{
|
269
|
+
const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
|
270
|
+
const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
|
271
|
+
VALUE error_list = rb_ary_new();
|
272
|
+
VALUE document;
|
273
|
+
xmlDocPtr doc;
|
274
|
+
|
275
|
+
xmlResetLastError();
|
276
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
277
|
+
|
278
|
+
doc = xmlReadIO(
|
279
|
+
(xmlInputReadCallback)noko_io_read,
|
280
|
+
(xmlInputCloseCallback)noko_io_close,
|
281
|
+
(void *)io,
|
282
|
+
c_url,
|
283
|
+
c_enc,
|
284
|
+
(int)NUM2INT(options)
|
285
|
+
);
|
286
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
287
|
+
|
288
|
+
if (doc == NULL) {
|
289
|
+
xmlErrorPtr error;
|
290
|
+
|
291
|
+
xmlFreeDoc(doc);
|
292
|
+
|
293
|
+
error = xmlGetLastError();
|
294
|
+
if (error) {
|
295
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
296
|
+
} else {
|
297
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
298
|
+
}
|
299
|
+
|
300
|
+
return Qnil;
|
301
|
+
}
|
302
|
+
|
303
|
+
document = noko_xml_document_wrap(klass, doc);
|
304
|
+
rb_iv_set(document, "@errors", error_list);
|
305
|
+
return document;
|
306
|
+
}
|
307
|
+
|
308
|
+
/*
|
309
|
+
* call-seq:
|
310
|
+
* read_memory(string, url, encoding, options)
|
311
|
+
*
|
312
|
+
* Create a new document from a String
|
313
|
+
*/
|
314
|
+
static VALUE
|
315
|
+
read_memory(VALUE klass,
|
316
|
+
VALUE string,
|
317
|
+
VALUE url,
|
318
|
+
VALUE encoding,
|
319
|
+
VALUE options)
|
320
|
+
{
|
321
|
+
const char *c_buffer = StringValuePtr(string);
|
322
|
+
const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
|
323
|
+
const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
|
324
|
+
int len = (int)RSTRING_LEN(string);
|
325
|
+
VALUE error_list = rb_ary_new();
|
326
|
+
VALUE document;
|
327
|
+
xmlDocPtr doc;
|
328
|
+
|
329
|
+
xmlResetLastError();
|
330
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
331
|
+
doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
|
332
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
333
|
+
|
334
|
+
if (doc == NULL) {
|
335
|
+
xmlErrorPtr error;
|
336
|
+
|
337
|
+
xmlFreeDoc(doc);
|
338
|
+
|
339
|
+
error = xmlGetLastError();
|
340
|
+
if (error) {
|
341
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
342
|
+
} else {
|
343
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
344
|
+
}
|
345
|
+
|
346
|
+
return Qnil;
|
347
|
+
}
|
348
|
+
|
349
|
+
document = noko_xml_document_wrap(klass, doc);
|
350
|
+
rb_iv_set(document, "@errors", error_list);
|
351
|
+
return document;
|
352
|
+
}
|
353
|
+
|
354
|
+
/*
|
355
|
+
* call-seq:
|
356
|
+
* dup
|
357
|
+
*
|
358
|
+
* Copy this Document. An optional depth may be passed in, but it defaults
|
359
|
+
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
|
360
|
+
*/
|
361
|
+
static VALUE
|
362
|
+
duplicate_document(int argc, VALUE *argv, VALUE self)
|
363
|
+
{
|
364
|
+
xmlDocPtr doc, dup;
|
365
|
+
VALUE copy;
|
366
|
+
VALUE level;
|
367
|
+
|
368
|
+
if (rb_scan_args(argc, argv, "01", &level) == 0) {
|
369
|
+
level = INT2NUM((long)1);
|
370
|
+
}
|
371
|
+
|
372
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
373
|
+
|
374
|
+
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
375
|
+
|
376
|
+
if (dup == NULL) { return Qnil; }
|
377
|
+
|
378
|
+
dup->type = doc->type;
|
379
|
+
copy = noko_xml_document_wrap(rb_obj_class(self), dup);
|
380
|
+
rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors"));
|
381
|
+
return copy ;
|
382
|
+
}
|
383
|
+
|
384
|
+
/*
|
385
|
+
* call-seq:
|
386
|
+
* new(version = default)
|
387
|
+
*
|
388
|
+
* Create a new document with +version+ (defaults to "1.0")
|
389
|
+
*/
|
390
|
+
static VALUE
|
391
|
+
new (int argc, VALUE *argv, VALUE klass)
|
392
|
+
{
|
393
|
+
xmlDocPtr doc;
|
394
|
+
VALUE version, rest, rb_doc ;
|
395
|
+
|
396
|
+
rb_scan_args(argc, argv, "0*", &rest);
|
397
|
+
version = rb_ary_entry(rest, (long)0);
|
398
|
+
if (NIL_P(version)) { version = rb_str_new2("1.0"); }
|
399
|
+
|
400
|
+
doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
|
401
|
+
rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
|
402
|
+
return rb_doc ;
|
403
|
+
}
|
404
|
+
|
405
|
+
/*
|
406
|
+
* call-seq:
|
407
|
+
* remove_namespaces!
|
408
|
+
*
|
409
|
+
* Remove all namespaces from all nodes in the document.
|
410
|
+
*
|
411
|
+
* This could be useful for developers who either don't understand namespaces
|
412
|
+
* or don't care about them.
|
413
|
+
*
|
414
|
+
* The following example shows a use case, and you can decide for yourself
|
415
|
+
* whether this is a good thing or not:
|
416
|
+
*
|
417
|
+
* doc = Nokogiri::XML <<-EOXML
|
418
|
+
* <root>
|
419
|
+
* <car xmlns:part="http://general-motors.com/">
|
420
|
+
* <part:tire>Michelin Model XGV</part:tire>
|
421
|
+
* </car>
|
422
|
+
* <bicycle xmlns:part="http://schwinn.com/">
|
423
|
+
* <part:tire>I'm a bicycle tire!</part:tire>
|
424
|
+
* </bicycle>
|
425
|
+
* </root>
|
426
|
+
* EOXML
|
427
|
+
*
|
428
|
+
* doc.xpath("//tire").to_s # => ""
|
429
|
+
* doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
|
430
|
+
* doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
|
431
|
+
*
|
432
|
+
* doc.remove_namespaces!
|
433
|
+
*
|
434
|
+
* doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
|
435
|
+
* doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
|
436
|
+
* doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
|
437
|
+
*
|
438
|
+
* For more information on why this probably is *not* a good thing in general,
|
439
|
+
* please direct your browser to
|
440
|
+
* http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
|
441
|
+
*/
|
442
|
+
static VALUE
|
443
|
+
remove_namespaces_bang(VALUE self)
|
444
|
+
{
|
445
|
+
xmlDocPtr doc ;
|
446
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
447
|
+
|
448
|
+
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
|
449
|
+
return self;
|
450
|
+
}
|
451
|
+
|
452
|
+
/* call-seq: doc.create_entity(name, type, external_id, system_id, content)
|
453
|
+
*
|
454
|
+
* Create a new entity named +name+.
|
455
|
+
*
|
456
|
+
* +type+ is an integer representing the type of entity to be created, and it
|
457
|
+
* defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See
|
458
|
+
* the constants on Nokogiri::XML::EntityDecl for more information.
|
459
|
+
*
|
460
|
+
* +external_id+, +system_id+, and +content+ set the External ID, System ID,
|
461
|
+
* and content respectively. All of these parameters are optional.
|
462
|
+
*/
|
463
|
+
static VALUE
|
464
|
+
create_entity(int argc, VALUE *argv, VALUE self)
|
465
|
+
{
|
466
|
+
VALUE name;
|
467
|
+
VALUE type;
|
468
|
+
VALUE external_id;
|
469
|
+
VALUE system_id;
|
470
|
+
VALUE content;
|
471
|
+
xmlEntityPtr ptr;
|
472
|
+
xmlDocPtr doc ;
|
473
|
+
|
474
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
475
|
+
|
476
|
+
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
|
477
|
+
&content);
|
478
|
+
|
479
|
+
xmlResetLastError();
|
480
|
+
ptr = xmlAddDocEntity(
|
481
|
+
doc,
|
482
|
+
(xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
|
483
|
+
(int)(NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
|
484
|
+
(xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
|
485
|
+
(xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
|
486
|
+
(xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
|
487
|
+
);
|
488
|
+
|
489
|
+
if (NULL == ptr) {
|
490
|
+
xmlErrorPtr error = xmlGetLastError();
|
491
|
+
if (error) {
|
492
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
493
|
+
} else {
|
494
|
+
rb_raise(rb_eRuntimeError, "Could not create entity");
|
495
|
+
}
|
496
|
+
|
497
|
+
return Qnil;
|
498
|
+
}
|
499
|
+
|
500
|
+
return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
|
501
|
+
}
|
502
|
+
|
503
|
+
static int
|
504
|
+
block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
|
505
|
+
{
|
506
|
+
VALUE block = (VALUE)ctx;
|
507
|
+
VALUE rb_node;
|
508
|
+
VALUE rb_parent_node;
|
509
|
+
VALUE ret;
|
510
|
+
|
511
|
+
if (c_node->type == XML_NAMESPACE_DECL) {
|
512
|
+
rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
|
513
|
+
} else {
|
514
|
+
rb_node = noko_xml_node_wrap(Qnil, c_node);
|
515
|
+
}
|
516
|
+
rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
|
517
|
+
|
518
|
+
ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
|
519
|
+
|
520
|
+
return (Qfalse == ret || Qnil == ret) ? 0 : 1;
|
521
|
+
}
|
522
|
+
|
523
|
+
/* call-seq:
|
524
|
+
* doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
|
525
|
+
* doc.canonicalize { |obj, parent| ... }
|
526
|
+
*
|
527
|
+
* Canonicalize a document and return the results. Takes an optional block
|
528
|
+
* that takes two parameters: the +obj+ and that node's +parent+.
|
529
|
+
* The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
|
530
|
+
* The block must return a non-nil, non-false value if the +obj+ passed in
|
531
|
+
* should be included in the canonicalized document.
|
532
|
+
*/
|
533
|
+
static VALUE
|
534
|
+
rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
535
|
+
{
|
536
|
+
VALUE rb_mode;
|
537
|
+
VALUE rb_namespaces;
|
538
|
+
VALUE rb_comments_p;
|
539
|
+
int c_mode = 0;
|
540
|
+
xmlChar **c_namespaces;
|
541
|
+
|
542
|
+
xmlDocPtr c_doc;
|
543
|
+
xmlOutputBufferPtr c_obuf;
|
544
|
+
xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
|
545
|
+
void *rb_callback = NULL;
|
546
|
+
|
547
|
+
VALUE rb_cStringIO;
|
548
|
+
VALUE rb_io;
|
549
|
+
|
550
|
+
rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
|
551
|
+
if (!NIL_P(rb_mode)) {
|
552
|
+
Check_Type(rb_mode, T_FIXNUM);
|
553
|
+
c_mode = NUM2INT(rb_mode);
|
554
|
+
}
|
555
|
+
if (!NIL_P(rb_namespaces)) {
|
556
|
+
Check_Type(rb_namespaces, T_ARRAY);
|
557
|
+
if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
|
558
|
+
rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
|
559
|
+
}
|
560
|
+
}
|
561
|
+
|
562
|
+
Data_Get_Struct(self, xmlDoc, c_doc);
|
563
|
+
|
564
|
+
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
565
|
+
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
|
566
|
+
c_obuf = xmlAllocOutputBuffer(NULL);
|
567
|
+
|
568
|
+
c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
|
569
|
+
c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
|
570
|
+
c_obuf->context = (void *)rb_io;
|
571
|
+
|
572
|
+
if (rb_block_given_p()) {
|
573
|
+
c_callback_wrapper = block_caller;
|
574
|
+
rb_callback = (void *)rb_block_proc();
|
575
|
+
}
|
576
|
+
|
577
|
+
if (NIL_P(rb_namespaces)) {
|
578
|
+
c_namespaces = NULL;
|
579
|
+
} else {
|
580
|
+
long ns_len = RARRAY_LEN(rb_namespaces);
|
581
|
+
c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
|
582
|
+
for (int j = 0 ; j < ns_len ; j++) {
|
583
|
+
VALUE entry = rb_ary_entry(rb_namespaces, j);
|
584
|
+
c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
|
585
|
+
}
|
586
|
+
}
|
587
|
+
|
588
|
+
xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
|
589
|
+
c_mode,
|
590
|
+
c_namespaces,
|
591
|
+
(int)RTEST(rb_comments_p),
|
592
|
+
c_obuf);
|
593
|
+
|
594
|
+
ruby_xfree(c_namespaces);
|
595
|
+
xmlOutputBufferClose(c_obuf);
|
596
|
+
|
597
|
+
return rb_funcall(rb_io, rb_intern("string"), 0);
|
598
|
+
}
|
599
|
+
|
600
|
+
VALUE
|
601
|
+
noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
|
602
|
+
{
|
603
|
+
VALUE rb_document;
|
604
|
+
nokogiriTuplePtr tuple;
|
605
|
+
|
606
|
+
if (!klass) {
|
607
|
+
klass = cNokogiriXmlDocument;
|
608
|
+
}
|
609
|
+
|
610
|
+
rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
|
611
|
+
|
612
|
+
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
|
613
|
+
tuple->doc = rb_document;
|
614
|
+
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
615
|
+
tuple->node_cache = rb_ary_new();
|
616
|
+
|
617
|
+
c_document->_private = tuple ;
|
618
|
+
|
619
|
+
rb_iv_set(rb_document, "@decorators", Qnil);
|
620
|
+
rb_iv_set(rb_document, "@errors", Qnil);
|
621
|
+
rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
|
622
|
+
|
623
|
+
rb_obj_call_init(rb_document, argc, argv);
|
624
|
+
|
625
|
+
return rb_document ;
|
626
|
+
}
|
627
|
+
|
628
|
+
|
629
|
+
/* deprecated. use noko_xml_document_wrap() instead. */
|
630
|
+
VALUE
|
631
|
+
Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
|
632
|
+
{
|
633
|
+
/* TODO: deprecate this method in v2.0 */
|
634
|
+
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
635
|
+
}
|
636
|
+
|
637
|
+
VALUE
|
638
|
+
noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
|
639
|
+
{
|
640
|
+
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
641
|
+
}
|
642
|
+
|
643
|
+
|
644
|
+
void
|
645
|
+
noko_xml_document_pin_node(xmlNodePtr node)
|
646
|
+
{
|
647
|
+
xmlDocPtr doc;
|
648
|
+
nokogiriTuplePtr tuple;
|
649
|
+
|
650
|
+
doc = node->doc;
|
651
|
+
tuple = (nokogiriTuplePtr)doc->_private;
|
652
|
+
st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
|
653
|
+
}
|
654
|
+
|
655
|
+
|
656
|
+
void
|
657
|
+
noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
|
658
|
+
{
|
659
|
+
nokogiriTuplePtr tuple;
|
660
|
+
|
661
|
+
tuple = (nokogiriTuplePtr)doc->_private;
|
662
|
+
st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
|
663
|
+
}
|
664
|
+
|
665
|
+
|
666
|
+
void
|
667
|
+
noko_init_xml_document()
|
668
|
+
{
|
669
|
+
assert(cNokogiriXmlNode);
|
670
|
+
/*
|
671
|
+
* Nokogiri::XML::Document wraps an xml document.
|
672
|
+
*/
|
673
|
+
cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
|
674
|
+
|
675
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", read_memory, 4);
|
676
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4);
|
677
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
|
678
|
+
|
679
|
+
rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
|
680
|
+
rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
|
681
|
+
rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
|
682
|
+
rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
|
683
|
+
rb_define_method(cNokogiriXmlDocument, "version", version, 0);
|
684
|
+
rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
|
685
|
+
rb_define_method(cNokogiriXmlDocument, "dup", duplicate_document, -1);
|
686
|
+
rb_define_method(cNokogiriXmlDocument, "url", url, 0);
|
687
|
+
rb_define_method(cNokogiriXmlDocument, "create_entity", create_entity, -1);
|
688
|
+
rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
|
689
|
+
}
|