nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
data/ext/nokogiri/xml_document.c
CHANGED
@@ -1,91 +1,225 @@
|
|
1
|
-
#include <
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlDocument ;
|
4
|
+
|
5
|
+
static int
|
6
|
+
dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
|
7
|
+
{
|
8
|
+
switch (node->type) {
|
9
|
+
case XML_ATTRIBUTE_NODE:
|
10
|
+
xmlFreePropList((xmlAttrPtr)node);
|
11
|
+
break;
|
12
|
+
case XML_NAMESPACE_DECL:
|
13
|
+
xmlFreeNs((xmlNsPtr)node);
|
14
|
+
break;
|
15
|
+
case XML_DTD_NODE:
|
16
|
+
xmlFreeDtd((xmlDtdPtr)node);
|
17
|
+
break;
|
18
|
+
default:
|
19
|
+
if (node->parent == NULL) {
|
20
|
+
node->next = NULL;
|
21
|
+
node->prev = NULL;
|
22
|
+
xmlAddChild((xmlNodePtr)doc, node);
|
23
|
+
}
|
19
24
|
}
|
20
25
|
return ST_CONTINUE;
|
21
26
|
}
|
22
27
|
|
23
|
-
static
|
28
|
+
static int
|
29
|
+
dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
|
30
|
+
{
|
31
|
+
return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
|
32
|
+
}
|
33
|
+
|
34
|
+
static void
|
35
|
+
remove_private(xmlNodePtr node)
|
24
36
|
{
|
25
37
|
xmlNodePtr child;
|
26
38
|
|
27
|
-
for (child = node->children; child; child = child->next)
|
39
|
+
for (child = node->children; child; child = child->next) {
|
28
40
|
remove_private(child);
|
41
|
+
}
|
29
42
|
|
30
43
|
if ((node->type == XML_ELEMENT_NODE ||
|
31
44
|
node->type == XML_XINCLUDE_START ||
|
32
45
|
node->type == XML_XINCLUDE_END) &&
|
33
46
|
node->properties) {
|
34
|
-
for (child = (xmlNodePtr)node->properties; child; child = child->next)
|
47
|
+
for (child = (xmlNodePtr)node->properties; child; child = child->next) {
|
35
48
|
remove_private(child);
|
49
|
+
}
|
36
50
|
}
|
37
51
|
|
38
52
|
node->_private = NULL;
|
39
53
|
}
|
40
54
|
|
41
|
-
static void
|
55
|
+
static void
|
56
|
+
mark(void *data)
|
42
57
|
{
|
43
|
-
|
58
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
59
|
+
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
|
60
|
+
if (tuple) {
|
61
|
+
rb_gc_mark(tuple->doc);
|
62
|
+
rb_gc_mark(tuple->node_cache);
|
63
|
+
}
|
64
|
+
}
|
44
65
|
|
45
|
-
|
66
|
+
static void
|
67
|
+
dealloc(void *data)
|
68
|
+
{
|
69
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
70
|
+
st_table *node_hash;
|
46
71
|
|
47
72
|
node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
48
73
|
|
49
74
|
st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
|
50
75
|
st_free_table(node_hash);
|
51
76
|
|
52
|
-
|
77
|
+
ruby_xfree(doc->_private);
|
53
78
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
79
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
80
|
+
#pragma GCC diagnostic push
|
81
|
+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
82
|
+
#endif
|
83
|
+
/*
|
84
|
+
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
85
|
+
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
86
|
+
* nokogiri, which will result in segfaults.
|
87
|
+
*
|
88
|
+
* To avoid this, we need to clear the _private pointers from all nodes in this document tree
|
89
|
+
* before that callback gets invoked.
|
90
|
+
*
|
91
|
+
* libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
|
92
|
+
* safeguard (though probably pairing with a runtime check on the libxml-ruby version).
|
58
93
|
*/
|
59
|
-
if (xmlDeregisterNodeDefaultValue)
|
94
|
+
if (xmlDeregisterNodeDefaultValue) {
|
60
95
|
remove_private((xmlNodePtr)doc);
|
96
|
+
}
|
97
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
98
|
+
#pragma GCC diagnostic pop
|
99
|
+
#endif
|
61
100
|
|
62
101
|
xmlFreeDoc(doc);
|
102
|
+
}
|
63
103
|
|
64
|
-
|
104
|
+
static size_t
|
105
|
+
memsize_node(const xmlNodePtr node)
|
106
|
+
{
|
107
|
+
/* note we don't count namespace definitions, just going for a good-enough number here */
|
108
|
+
xmlNodePtr child;
|
109
|
+
xmlAttrPtr property;
|
110
|
+
size_t memsize = 0;
|
111
|
+
|
112
|
+
memsize += (size_t)xmlStrlen(node->name);
|
113
|
+
|
114
|
+
if (node->type == XML_ELEMENT_NODE) {
|
115
|
+
for (property = node->properties; property; property = property->next) {
|
116
|
+
memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
|
117
|
+
}
|
118
|
+
}
|
119
|
+
if (node->type == XML_TEXT_NODE) {
|
120
|
+
memsize += (size_t)xmlStrlen(node->content);
|
121
|
+
}
|
122
|
+
for (child = node->children; child; child = child->next) {
|
123
|
+
memsize += sizeof(xmlNode) + memsize_node(child);
|
124
|
+
}
|
125
|
+
return memsize;
|
126
|
+
}
|
127
|
+
|
128
|
+
static size_t
|
129
|
+
memsize(const void *data)
|
130
|
+
{
|
131
|
+
xmlDocPtr doc = (const xmlDocPtr)data;
|
132
|
+
size_t memsize = sizeof(xmlDoc);
|
133
|
+
/* This may not account for all memory use */
|
134
|
+
memsize += memsize_node((xmlNodePtr)doc);
|
135
|
+
return memsize;
|
65
136
|
}
|
66
137
|
|
67
|
-
static
|
138
|
+
static const rb_data_type_t xml_doc_type = {
|
139
|
+
.wrap_struct_name = "xmlDoc",
|
140
|
+
.function = {
|
141
|
+
.dmark = mark,
|
142
|
+
.dfree = dealloc,
|
143
|
+
.dsize = memsize,
|
144
|
+
},
|
145
|
+
// .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
|
146
|
+
};
|
147
|
+
|
148
|
+
static VALUE
|
149
|
+
_xml_document_alloc(VALUE klass)
|
150
|
+
{
|
151
|
+
return TypedData_Wrap_Struct(klass, &xml_doc_type, NULL);
|
152
|
+
}
|
153
|
+
|
154
|
+
static void
|
155
|
+
_xml_document_data_ptr_set(VALUE rb_document, xmlDocPtr c_document)
|
156
|
+
{
|
157
|
+
nokogiriTuplePtr tuple;
|
158
|
+
|
159
|
+
assert(DATA_PTR(rb_document) == NULL);
|
160
|
+
assert(c_document->_private == NULL);
|
161
|
+
|
162
|
+
DATA_PTR(rb_document) = c_document;
|
163
|
+
|
164
|
+
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
|
165
|
+
tuple->doc = rb_document;
|
166
|
+
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
167
|
+
tuple->node_cache = rb_ary_new();
|
168
|
+
|
169
|
+
c_document->_private = tuple ;
|
170
|
+
|
171
|
+
rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
|
172
|
+
|
173
|
+
return;
|
174
|
+
}
|
175
|
+
|
176
|
+
/* :nodoc: */
|
177
|
+
static VALUE
|
178
|
+
rb_xml_document_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level)
|
179
|
+
{
|
180
|
+
xmlDocPtr c_other, c_self;
|
181
|
+
int c_level;
|
182
|
+
|
183
|
+
c_other = noko_xml_document_unwrap(rb_other);
|
184
|
+
c_level = (int)NUM2INT(rb_level);
|
185
|
+
|
186
|
+
c_self = xmlCopyDoc(c_other, c_level);
|
187
|
+
if (c_self == NULL) { return Qnil; }
|
188
|
+
|
189
|
+
c_self->type = c_other->type;
|
190
|
+
_xml_document_data_ptr_set(rb_self, c_self);
|
191
|
+
|
192
|
+
return rb_self ;
|
193
|
+
}
|
194
|
+
|
195
|
+
static void
|
196
|
+
recursively_remove_namespaces_from_node(xmlNodePtr node)
|
68
197
|
{
|
69
198
|
xmlNodePtr child ;
|
70
199
|
xmlAttrPtr property ;
|
71
200
|
|
72
201
|
xmlSetNs(node, NULL);
|
73
202
|
|
74
|
-
for (child = node->children ; child ; child = child->next)
|
203
|
+
for (child = node->children ; child ; child = child->next) {
|
75
204
|
recursively_remove_namespaces_from_node(child);
|
205
|
+
}
|
76
206
|
|
77
207
|
if (((node->type == XML_ELEMENT_NODE) ||
|
78
208
|
(node->type == XML_XINCLUDE_START) ||
|
79
209
|
(node->type == XML_XINCLUDE_END)) &&
|
80
210
|
node->nsDef) {
|
81
|
-
|
211
|
+
xmlNsPtr curr = node->nsDef;
|
212
|
+
while (curr) {
|
213
|
+
noko_xml_document_pin_namespace(curr, node->doc);
|
214
|
+
curr = curr->next;
|
215
|
+
}
|
82
216
|
node->nsDef = NULL;
|
83
217
|
}
|
84
218
|
|
85
219
|
if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
|
86
220
|
property = node->properties ;
|
87
221
|
while (property != NULL) {
|
88
|
-
if (property->ns) property->ns = NULL ;
|
222
|
+
if (property->ns) { property->ns = NULL ; }
|
89
223
|
property = property->next ;
|
90
224
|
}
|
91
225
|
}
|
@@ -97,12 +231,12 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
97
231
|
*
|
98
232
|
* Get the url name for this document.
|
99
233
|
*/
|
100
|
-
static VALUE
|
234
|
+
static VALUE
|
235
|
+
url(VALUE self)
|
101
236
|
{
|
102
|
-
xmlDocPtr doc;
|
103
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
237
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
104
238
|
|
105
|
-
if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
|
239
|
+
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
|
106
240
|
|
107
241
|
return Qnil;
|
108
242
|
}
|
@@ -113,42 +247,42 @@ static VALUE url(VALUE self)
|
|
113
247
|
*
|
114
248
|
* Set the root element on this document
|
115
249
|
*/
|
116
|
-
static VALUE
|
250
|
+
static VALUE
|
251
|
+
rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
117
252
|
{
|
118
|
-
xmlDocPtr
|
119
|
-
xmlNodePtr
|
120
|
-
xmlNodePtr old_root;
|
121
|
-
|
122
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
123
|
-
|
124
|
-
old_root = NULL;
|
253
|
+
xmlDocPtr c_document;
|
254
|
+
xmlNodePtr c_new_root = NULL, c_current_root;
|
125
255
|
|
126
|
-
|
127
|
-
old_root = xmlDocGetRootElement(doc);
|
256
|
+
c_document = noko_xml_document_unwrap(self);
|
128
257
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
return root;
|
258
|
+
c_current_root = xmlDocGetRootElement(c_document);
|
259
|
+
if (c_current_root) {
|
260
|
+
xmlUnlinkNode(c_current_root);
|
261
|
+
noko_xml_document_pin_node(c_current_root);
|
135
262
|
}
|
136
263
|
|
137
|
-
|
264
|
+
if (!NIL_P(rb_new_root)) {
|
265
|
+
if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
|
266
|
+
rb_raise(rb_eArgError,
|
267
|
+
"expected Nokogiri::XML::Node but received %"PRIsVALUE,
|
268
|
+
rb_obj_class(rb_new_root));
|
269
|
+
}
|
138
270
|
|
271
|
+
Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
|
139
272
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
273
|
+
/* If the new root's document is not the same as the current document,
|
274
|
+
* then we need to dup the node in to this document. */
|
275
|
+
if (c_new_root->doc != c_document) {
|
276
|
+
c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
|
277
|
+
if (!c_new_root) {
|
278
|
+
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
|
279
|
+
}
|
146
280
|
}
|
147
281
|
}
|
148
282
|
|
149
|
-
xmlDocSetRootElement(
|
150
|
-
|
151
|
-
return
|
283
|
+
xmlDocSetRootElement(c_document, c_new_root);
|
284
|
+
|
285
|
+
return rb_new_root;
|
152
286
|
}
|
153
287
|
|
154
288
|
/*
|
@@ -157,17 +291,20 @@ static VALUE set_root(VALUE self, VALUE root)
|
|
157
291
|
*
|
158
292
|
* Get the root node for this document.
|
159
293
|
*/
|
160
|
-
static VALUE
|
294
|
+
static VALUE
|
295
|
+
rb_xml_document_root(VALUE self)
|
161
296
|
{
|
162
|
-
xmlDocPtr
|
163
|
-
xmlNodePtr
|
297
|
+
xmlDocPtr c_document;
|
298
|
+
xmlNodePtr c_root;
|
164
299
|
|
165
|
-
|
300
|
+
c_document = noko_xml_document_unwrap(self);
|
166
301
|
|
167
|
-
|
302
|
+
c_root = xmlDocGetRootElement(c_document);
|
303
|
+
if (!c_root) {
|
304
|
+
return Qnil;
|
305
|
+
}
|
168
306
|
|
169
|
-
|
170
|
-
return Nokogiri_wrap_xml_node(Qnil, root) ;
|
307
|
+
return noko_xml_node_wrap(Qnil, c_root) ;
|
171
308
|
}
|
172
309
|
|
173
310
|
/*
|
@@ -176,13 +313,14 @@ static VALUE root(VALUE self)
|
|
176
313
|
*
|
177
314
|
* Set the encoding string for this Document
|
178
315
|
*/
|
179
|
-
static VALUE
|
316
|
+
static VALUE
|
317
|
+
set_encoding(VALUE self, VALUE encoding)
|
180
318
|
{
|
181
|
-
xmlDocPtr doc;
|
182
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
319
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
183
320
|
|
184
|
-
if (doc->encoding)
|
185
|
-
|
321
|
+
if (doc->encoding) {
|
322
|
+
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
323
|
+
}
|
186
324
|
|
187
325
|
doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
|
188
326
|
|
@@ -195,12 +333,12 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
|
|
195
333
|
*
|
196
334
|
* Get the encoding for this Document
|
197
335
|
*/
|
198
|
-
static VALUE
|
336
|
+
static VALUE
|
337
|
+
encoding(VALUE self)
|
199
338
|
{
|
200
|
-
xmlDocPtr doc;
|
201
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
339
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
202
340
|
|
203
|
-
if(!doc->encoding) return Qnil;
|
341
|
+
if (!doc->encoding) { return Qnil; }
|
204
342
|
return NOKOGIRI_STR_NEW2(doc->encoding);
|
205
343
|
}
|
206
344
|
|
@@ -210,12 +348,12 @@ static VALUE encoding(VALUE self)
|
|
210
348
|
*
|
211
349
|
* Get the XML version for this Document
|
212
350
|
*/
|
213
|
-
static VALUE
|
351
|
+
static VALUE
|
352
|
+
version(VALUE self)
|
214
353
|
{
|
215
|
-
xmlDocPtr doc;
|
216
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
354
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
217
355
|
|
218
|
-
if(!doc->version) return Qnil;
|
356
|
+
if (!doc->version) { return Qnil; }
|
219
357
|
return NOKOGIRI_STR_NEW2(doc->version);
|
220
358
|
}
|
221
359
|
|
@@ -225,48 +363,47 @@ static VALUE version(VALUE self)
|
|
225
363
|
*
|
226
364
|
* Create a new document from an IO object
|
227
365
|
*/
|
228
|
-
static VALUE
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
xmlFreeDoc(
|
257
|
-
|
258
|
-
|
259
|
-
if(
|
260
|
-
rb_exc_raise(
|
261
|
-
else
|
366
|
+
static VALUE
|
367
|
+
noko_xml_document_s_read_io(VALUE rb_class,
|
368
|
+
VALUE rb_io,
|
369
|
+
VALUE rb_url,
|
370
|
+
VALUE rb_encoding,
|
371
|
+
VALUE rb_options)
|
372
|
+
{
|
373
|
+
/* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
|
374
|
+
private. */
|
375
|
+
libxmlStructuredErrorHandlerState handler_state;
|
376
|
+
VALUE rb_errors = rb_ary_new();
|
377
|
+
|
378
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
379
|
+
|
380
|
+
const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
|
381
|
+
const char *c_enc = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
|
382
|
+
xmlDocPtr c_document = xmlReadIO(
|
383
|
+
(xmlInputReadCallback)noko_io_read,
|
384
|
+
(xmlInputCloseCallback)noko_io_close,
|
385
|
+
(void *)rb_io,
|
386
|
+
c_url,
|
387
|
+
c_enc,
|
388
|
+
(int)NUM2INT(rb_options)
|
389
|
+
);
|
390
|
+
|
391
|
+
noko__structured_error_func_restore(&handler_state);
|
392
|
+
|
393
|
+
if (c_document == NULL) {
|
394
|
+
xmlFreeDoc(c_document);
|
395
|
+
|
396
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
397
|
+
if (RB_TEST(exception)) {
|
398
|
+
rb_exc_raise(exception);
|
399
|
+
} else {
|
262
400
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
263
|
-
|
264
|
-
return Qnil;
|
401
|
+
}
|
265
402
|
}
|
266
403
|
|
267
|
-
|
268
|
-
rb_iv_set(
|
269
|
-
return
|
404
|
+
VALUE rb_document = noko_xml_document_wrap(rb_class, c_document);
|
405
|
+
rb_iv_set(rb_document, "@errors", rb_errors);
|
406
|
+
return rb_document;
|
270
407
|
}
|
271
408
|
|
272
409
|
/*
|
@@ -275,92 +412,58 @@ static VALUE read_io( VALUE klass,
|
|
275
412
|
*
|
276
413
|
* Create a new document from a String
|
277
414
|
*/
|
278
|
-
static VALUE
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
VALUE document;
|
290
|
-
xmlDocPtr doc;
|
291
|
-
|
292
|
-
xmlResetLastError();
|
293
|
-
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
294
|
-
doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
|
295
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
415
|
+
static VALUE
|
416
|
+
noko_xml_document_s_read_memory(VALUE rb_class,
|
417
|
+
VALUE rb_input,
|
418
|
+
VALUE rb_url,
|
419
|
+
VALUE rb_encoding,
|
420
|
+
VALUE rb_options)
|
421
|
+
{
|
422
|
+
/* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
|
423
|
+
private. */
|
424
|
+
VALUE rb_errors = rb_ary_new();
|
425
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
296
426
|
|
297
|
-
|
298
|
-
|
427
|
+
const char *c_buffer = StringValuePtr(rb_input);
|
428
|
+
const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
|
429
|
+
const char *c_enc = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
|
430
|
+
int c_buffer_len = (int)RSTRING_LEN(rb_input);
|
431
|
+
xmlDocPtr c_document = xmlReadMemory(c_buffer, c_buffer_len, c_url, c_enc, (int)NUM2INT(rb_options));
|
299
432
|
|
300
|
-
|
433
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
301
434
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
435
|
+
if (c_document == NULL) {
|
436
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
437
|
+
if (RB_TEST(exception)) {
|
438
|
+
rb_exc_raise(exception);
|
439
|
+
} else {
|
306
440
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
307
|
-
|
308
|
-
return Qnil;
|
441
|
+
}
|
309
442
|
}
|
310
443
|
|
311
|
-
document =
|
312
|
-
rb_iv_set(document, "@errors",
|
444
|
+
VALUE document = noko_xml_document_wrap(rb_class, c_document);
|
445
|
+
rb_iv_set(document, "@errors", rb_errors);
|
313
446
|
return document;
|
314
447
|
}
|
315
448
|
|
316
449
|
/*
|
317
450
|
* call-seq:
|
318
|
-
*
|
451
|
+
* new(version = "1.0")
|
319
452
|
*
|
320
|
-
*
|
321
|
-
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
|
453
|
+
* Create a new empty document declaring XML version +version+.
|
322
454
|
*/
|
323
|
-
static VALUE
|
324
|
-
|
325
|
-
xmlDocPtr doc, dup;
|
326
|
-
VALUE copy;
|
327
|
-
VALUE level;
|
328
|
-
VALUE error_list;
|
329
|
-
|
330
|
-
if(rb_scan_args(argc, argv, "01", &level) == 0)
|
331
|
-
level = INT2NUM((long)1);
|
332
|
-
|
333
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
334
|
-
|
335
|
-
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
336
|
-
|
337
|
-
if(dup == NULL) return Qnil;
|
338
|
-
|
339
|
-
dup->type = doc->type;
|
340
|
-
copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
|
341
|
-
error_list = rb_iv_get(self, "@errors");
|
342
|
-
rb_iv_set(copy, "@errors", error_list);
|
343
|
-
return copy ;
|
344
|
-
}
|
345
|
-
|
346
|
-
/*
|
347
|
-
* call-seq:
|
348
|
-
* new(version = default)
|
349
|
-
*
|
350
|
-
* Create a new document with +version+ (defaults to "1.0")
|
351
|
-
*/
|
352
|
-
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
455
|
+
static VALUE
|
456
|
+
new (int argc, VALUE *argv, VALUE klass)
|
353
457
|
{
|
354
458
|
xmlDocPtr doc;
|
355
459
|
VALUE version, rest, rb_doc ;
|
356
460
|
|
357
461
|
rb_scan_args(argc, argv, "0*", &rest);
|
358
462
|
version = rb_ary_entry(rest, (long)0);
|
359
|
-
if (NIL_P(version)) version = rb_str_new2("1.0");
|
463
|
+
if (NIL_P(version)) { version = rb_str_new2("1.0"); }
|
360
464
|
|
361
465
|
doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
|
362
|
-
rb_doc =
|
363
|
-
rb_obj_call_init(rb_doc, argc, argv);
|
466
|
+
rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
|
364
467
|
return rb_doc ;
|
365
468
|
}
|
366
469
|
|
@@ -401,85 +504,87 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
|
|
401
504
|
* please direct your browser to
|
402
505
|
* http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
|
403
506
|
*/
|
404
|
-
|
507
|
+
static VALUE
|
508
|
+
remove_namespaces_bang(VALUE self)
|
405
509
|
{
|
406
|
-
xmlDocPtr doc ;
|
407
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
510
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
408
511
|
|
409
512
|
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
|
410
513
|
return self;
|
411
514
|
}
|
412
515
|
|
413
|
-
/* call-seq:
|
516
|
+
/* call-seq:
|
517
|
+
* doc.create_entity(name, type, external_id, system_id, content)
|
414
518
|
*
|
415
519
|
* Create a new entity named +name+.
|
416
520
|
*
|
417
|
-
* +type+ is an integer representing the type of entity to be created, and it
|
418
|
-
*
|
419
|
-
*
|
521
|
+
* +type+ is an integer representing the type of entity to be created, and it defaults to
|
522
|
+
* +Nokogiri::XML::EntityDecl::INTERNAL_GENERAL+. See the constants on Nokogiri::XML::EntityDecl for
|
523
|
+
* more information.
|
420
524
|
*
|
421
525
|
* +external_id+, +system_id+, and +content+ set the External ID, System ID,
|
422
526
|
* and content respectively. All of these parameters are optional.
|
423
527
|
*/
|
424
|
-
static VALUE
|
425
|
-
|
426
|
-
|
427
|
-
VALUE
|
428
|
-
VALUE
|
429
|
-
VALUE
|
430
|
-
VALUE
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
528
|
+
static VALUE
|
529
|
+
noko_xml_document__create_entity(int argc, VALUE *argv, VALUE rb_document)
|
530
|
+
{
|
531
|
+
VALUE rb_name;
|
532
|
+
VALUE rb_type;
|
533
|
+
VALUE rb_ext_id;
|
534
|
+
VALUE rb_sys_id;
|
535
|
+
VALUE rb_content;
|
536
|
+
|
537
|
+
rb_scan_args(argc, argv, "14",
|
538
|
+
&rb_name,
|
539
|
+
&rb_type, &rb_ext_id, &rb_sys_id, &rb_content);
|
540
|
+
|
541
|
+
xmlDocPtr c_document = noko_xml_document_unwrap(rb_document);
|
542
|
+
|
543
|
+
libxmlStructuredErrorHandlerState handler_state;
|
544
|
+
VALUE rb_errors = rb_ary_new();
|
545
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
546
|
+
|
547
|
+
xmlEntityPtr c_entity = xmlAddDocEntity(
|
548
|
+
c_document,
|
549
|
+
(xmlChar *)(NIL_P(rb_name) ? NULL : StringValueCStr(rb_name)),
|
550
|
+
(int)(NIL_P(rb_type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(rb_type)),
|
551
|
+
(xmlChar *)(NIL_P(rb_ext_id) ? NULL : StringValueCStr(rb_ext_id)),
|
552
|
+
(xmlChar *)(NIL_P(rb_sys_id) ? NULL : StringValueCStr(rb_sys_id)),
|
553
|
+
(xmlChar *)(NIL_P(rb_content) ? NULL : StringValueCStr(rb_content))
|
554
|
+
);
|
555
|
+
|
556
|
+
noko__structured_error_func_restore(&handler_state);
|
557
|
+
|
558
|
+
if (NULL == c_entity) {
|
559
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
560
|
+
if (RB_TEST(exception)) {
|
561
|
+
rb_exc_raise(exception);
|
562
|
+
} else {
|
454
563
|
rb_raise(rb_eRuntimeError, "Could not create entity");
|
455
|
-
|
456
|
-
return Qnil;
|
564
|
+
}
|
457
565
|
}
|
458
566
|
|
459
|
-
return
|
567
|
+
return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)c_entity);
|
460
568
|
}
|
461
569
|
|
462
|
-
static int
|
570
|
+
static int
|
571
|
+
block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
|
463
572
|
{
|
464
|
-
VALUE block;
|
465
|
-
VALUE
|
466
|
-
VALUE
|
573
|
+
VALUE block = (VALUE)ctx;
|
574
|
+
VALUE rb_node;
|
575
|
+
VALUE rb_parent_node;
|
467
576
|
VALUE ret;
|
468
577
|
|
469
|
-
if(
|
470
|
-
|
471
|
-
}
|
472
|
-
|
473
|
-
node = Nokogiri_wrap_xml_node(Qnil, _node);
|
578
|
+
if (c_node->type == XML_NAMESPACE_DECL) {
|
579
|
+
rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
|
580
|
+
} else {
|
581
|
+
rb_node = noko_xml_node_wrap(Qnil, c_node);
|
474
582
|
}
|
475
|
-
|
476
|
-
block = (VALUE)ctx;
|
477
|
-
|
478
|
-
ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
|
583
|
+
rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
|
479
584
|
|
480
|
-
|
585
|
+
ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
|
481
586
|
|
482
|
-
return 1;
|
587
|
+
return (Qfalse == ret || Qnil == ret) ? 0 : 1;
|
483
588
|
}
|
484
589
|
|
485
590
|
/* call-seq:
|
@@ -492,117 +597,188 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
|
|
492
597
|
* The block must return a non-nil, non-false value if the +obj+ passed in
|
493
598
|
* should be included in the canonicalized document.
|
494
599
|
*/
|
495
|
-
static VALUE
|
600
|
+
static VALUE
|
601
|
+
rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
496
602
|
{
|
497
|
-
VALUE
|
498
|
-
VALUE
|
499
|
-
VALUE
|
500
|
-
|
501
|
-
|
603
|
+
VALUE rb_mode;
|
604
|
+
VALUE rb_namespaces;
|
605
|
+
VALUE rb_comments_p;
|
606
|
+
int c_mode = 0;
|
607
|
+
xmlChar **c_namespaces;
|
502
608
|
|
503
|
-
xmlDocPtr
|
504
|
-
xmlOutputBufferPtr
|
505
|
-
xmlC14NIsVisibleCallback
|
506
|
-
void *
|
609
|
+
xmlDocPtr c_doc;
|
610
|
+
xmlOutputBufferPtr c_obuf;
|
611
|
+
xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
|
612
|
+
void *rb_callback = NULL;
|
507
613
|
|
508
614
|
VALUE rb_cStringIO;
|
509
|
-
VALUE
|
615
|
+
VALUE rb_io;
|
510
616
|
|
511
|
-
rb_scan_args(argc, argv, "03", &
|
617
|
+
rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
|
618
|
+
if (!NIL_P(rb_mode)) {
|
619
|
+
Check_Type(rb_mode, T_FIXNUM);
|
620
|
+
c_mode = NUM2INT(rb_mode);
|
621
|
+
}
|
622
|
+
if (!NIL_P(rb_namespaces)) {
|
623
|
+
Check_Type(rb_namespaces, T_ARRAY);
|
624
|
+
if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
|
625
|
+
rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
|
626
|
+
}
|
627
|
+
}
|
512
628
|
|
513
|
-
|
629
|
+
c_doc = noko_xml_document_unwrap(self);
|
514
630
|
|
515
631
|
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
516
|
-
|
517
|
-
|
632
|
+
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
|
633
|
+
c_obuf = xmlAllocOutputBuffer(NULL);
|
518
634
|
|
519
|
-
|
520
|
-
|
521
|
-
|
635
|
+
c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
|
636
|
+
c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
|
637
|
+
c_obuf->context = (void *)rb_io;
|
522
638
|
|
523
|
-
if(rb_block_given_p()) {
|
524
|
-
|
525
|
-
|
639
|
+
if (rb_block_given_p()) {
|
640
|
+
c_callback_wrapper = block_caller;
|
641
|
+
rb_callback = (void *)rb_block_proc();
|
526
642
|
}
|
527
643
|
|
528
|
-
if(NIL_P(
|
529
|
-
|
530
|
-
}
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
VALUE entry = rb_ary_entry(incl_ns, i);
|
537
|
-
ns[i] = (xmlChar*)StringValueCStr(entry);
|
644
|
+
if (NIL_P(rb_namespaces)) {
|
645
|
+
c_namespaces = NULL;
|
646
|
+
} else {
|
647
|
+
long ns_len = RARRAY_LEN(rb_namespaces);
|
648
|
+
c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
|
649
|
+
for (int j = 0 ; j < ns_len ; j++) {
|
650
|
+
VALUE entry = rb_ary_entry(rb_namespaces, j);
|
651
|
+
c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
|
538
652
|
}
|
539
653
|
}
|
540
654
|
|
655
|
+
xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
|
656
|
+
c_mode,
|
657
|
+
c_namespaces,
|
658
|
+
(int)RTEST(rb_comments_p),
|
659
|
+
c_obuf);
|
541
660
|
|
542
|
-
|
543
|
-
|
544
|
-
ns,
|
545
|
-
(int) RTEST(with_comments),
|
546
|
-
buf);
|
661
|
+
ruby_xfree(c_namespaces);
|
662
|
+
xmlOutputBufferClose(c_obuf);
|
547
663
|
|
548
|
-
|
664
|
+
return rb_funcall(rb_io, rb_intern("string"), 0);
|
665
|
+
}
|
549
666
|
|
550
|
-
|
667
|
+
VALUE
|
668
|
+
noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
|
669
|
+
{
|
670
|
+
VALUE rb_document;
|
671
|
+
|
672
|
+
if (!klass) {
|
673
|
+
klass = cNokogiriXmlDocument;
|
674
|
+
}
|
675
|
+
|
676
|
+
rb_document = _xml_document_alloc(klass);
|
677
|
+
_xml_document_data_ptr_set(rb_document, c_document);
|
678
|
+
|
679
|
+
rb_iv_set(rb_document, "@decorators", Qnil);
|
680
|
+
rb_iv_set(rb_document, "@errors", Qnil);
|
681
|
+
|
682
|
+
rb_obj_call_init(rb_document, argc, argv);
|
683
|
+
|
684
|
+
return rb_document ;
|
551
685
|
}
|
552
686
|
|
553
|
-
|
554
|
-
|
687
|
+
|
688
|
+
/* deprecated. use noko_xml_document_wrap() instead. */
|
689
|
+
VALUE
|
690
|
+
Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
|
555
691
|
{
|
556
|
-
|
557
|
-
|
558
|
-
|
692
|
+
/* TODO: deprecate this method in v2.0 */
|
693
|
+
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
694
|
+
}
|
559
695
|
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
696
|
+
VALUE
|
697
|
+
noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
|
698
|
+
{
|
699
|
+
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
700
|
+
}
|
701
|
+
|
702
|
+
xmlDocPtr
|
703
|
+
noko_xml_document_unwrap(VALUE rb_document)
|
704
|
+
{
|
705
|
+
xmlDocPtr c_document;
|
706
|
+
TypedData_Get_Struct(rb_document, xmlDoc, &xml_doc_type, c_document);
|
707
|
+
return c_document;
|
708
|
+
}
|
709
|
+
|
710
|
+
/* Schema creation will remove and deallocate "blank" nodes.
|
711
|
+
* If those blank nodes have been exposed to Ruby, they could get freed
|
712
|
+
* out from under the VALUE pointer. This function checks to see if any of
|
713
|
+
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
714
|
+
*/
|
715
|
+
int
|
716
|
+
noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
|
717
|
+
{
|
718
|
+
VALUE cache = DOC_NODE_CACHE(c_document);
|
719
|
+
|
720
|
+
if (NIL_P(cache)) {
|
721
|
+
return 0;
|
722
|
+
}
|
723
|
+
|
724
|
+
for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
|
725
|
+
xmlNodePtr node;
|
726
|
+
VALUE element = rb_ary_entry(cache, jnode);
|
727
|
+
|
728
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
729
|
+
if (xmlIsBlankNode(node)) {
|
730
|
+
return 1;
|
731
|
+
}
|
732
|
+
}
|
733
|
+
|
734
|
+
return 0;
|
735
|
+
}
|
564
736
|
|
565
|
-
|
737
|
+
void
|
738
|
+
noko_xml_document_pin_node(xmlNodePtr node)
|
739
|
+
{
|
740
|
+
xmlDocPtr doc;
|
741
|
+
nokogiriTuplePtr tuple;
|
742
|
+
|
743
|
+
doc = node->doc;
|
744
|
+
tuple = (nokogiriTuplePtr)doc->_private;
|
745
|
+
st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
|
746
|
+
}
|
566
747
|
|
567
|
-
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
|
568
|
-
rb_define_singleton_method(klass, "read_io", read_io, 4);
|
569
|
-
rb_define_singleton_method(klass, "new", new, -1);
|
570
748
|
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
rb_define_method(klass, "url", url, 0);
|
579
|
-
rb_define_method(klass, "create_entity", create_entity, -1);
|
580
|
-
rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
|
749
|
+
void
|
750
|
+
noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
|
751
|
+
{
|
752
|
+
nokogiriTuplePtr tuple;
|
753
|
+
|
754
|
+
tuple = (nokogiriTuplePtr)doc->_private;
|
755
|
+
st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
|
581
756
|
}
|
582
757
|
|
583
758
|
|
584
|
-
|
585
|
-
|
759
|
+
void
|
760
|
+
noko_init_xml_document(void)
|
586
761
|
{
|
587
|
-
|
762
|
+
assert(cNokogiriXmlNode);
|
588
763
|
|
589
|
-
|
590
|
-
klass ? klass : cNokogiriXmlDocument,
|
591
|
-
0,
|
592
|
-
dealloc,
|
593
|
-
doc
|
594
|
-
);
|
764
|
+
cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
|
595
765
|
|
596
|
-
|
597
|
-
rb_iv_set(rb_doc, "@decorators", Qnil);
|
598
|
-
rb_iv_set(rb_doc, "@node_cache", cache);
|
766
|
+
rb_define_alloc_func(cNokogiriXmlDocument, _xml_document_alloc);
|
599
767
|
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
doc->_private = tuple ;
|
768
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", noko_xml_document_s_read_memory, 4);
|
769
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "read_io", noko_xml_document_s_read_io, 4);
|
770
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
|
604
771
|
|
605
|
-
|
772
|
+
rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
|
773
|
+
rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
|
774
|
+
rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
|
775
|
+
rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
|
776
|
+
rb_define_method(cNokogiriXmlDocument, "version", version, 0);
|
777
|
+
rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
|
778
|
+
rb_define_method(cNokogiriXmlDocument, "url", url, 0);
|
779
|
+
rb_define_method(cNokogiriXmlDocument, "create_entity", noko_xml_document__create_entity, -1);
|
780
|
+
rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
|
606
781
|
|
607
|
-
|
782
|
+
rb_define_protected_method(cNokogiriXmlDocument, "initialize_copy_with_args", rb_xml_document_initialize_copy_with_args,
|
783
|
+
2);
|
608
784
|
}
|