nokogiri 1.18.0-aarch64-linux-gnu

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +39 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +486 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +274 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +27 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +321 -0
@@ -0,0 +1,784 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlDocument ;
4
+
5
+ static int
6
+ dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
7
+ {
8
+ switch (node->type) {
9
+ case XML_ATTRIBUTE_NODE:
10
+ xmlFreePropList((xmlAttrPtr)node);
11
+ break;
12
+ case XML_NAMESPACE_DECL:
13
+ xmlFreeNs((xmlNsPtr)node);
14
+ break;
15
+ case XML_DTD_NODE:
16
+ xmlFreeDtd((xmlDtdPtr)node);
17
+ break;
18
+ default:
19
+ if (node->parent == NULL) {
20
+ node->next = NULL;
21
+ node->prev = NULL;
22
+ xmlAddChild((xmlNodePtr)doc, node);
23
+ }
24
+ }
25
+ return ST_CONTINUE;
26
+ }
27
+
28
+ static int
29
+ dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
30
+ {
31
+ return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
32
+ }
33
+
34
+ static void
35
+ remove_private(xmlNodePtr node)
36
+ {
37
+ xmlNodePtr child;
38
+
39
+ for (child = node->children; child; child = child->next) {
40
+ remove_private(child);
41
+ }
42
+
43
+ if ((node->type == XML_ELEMENT_NODE ||
44
+ node->type == XML_XINCLUDE_START ||
45
+ node->type == XML_XINCLUDE_END) &&
46
+ node->properties) {
47
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
48
+ remove_private(child);
49
+ }
50
+ }
51
+
52
+ node->_private = NULL;
53
+ }
54
+
55
+ static void
56
+ mark(void *data)
57
+ {
58
+ xmlDocPtr doc = (xmlDocPtr)data;
59
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
60
+ if (tuple) {
61
+ rb_gc_mark(tuple->doc);
62
+ rb_gc_mark(tuple->node_cache);
63
+ }
64
+ }
65
+
66
+ static void
67
+ dealloc(void *data)
68
+ {
69
+ xmlDocPtr doc = (xmlDocPtr)data;
70
+ st_table *node_hash;
71
+
72
+ node_hash = DOC_UNLINKED_NODE_HASH(doc);
73
+
74
+ st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
75
+ st_free_table(node_hash);
76
+
77
+ ruby_xfree(doc->_private);
78
+
79
+ #if defined(__GNUC__) && __GNUC__ >= 5
80
+ #pragma GCC diagnostic push
81
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
82
+ #endif
83
+ /*
84
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
85
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
86
+ * nokogiri, which will result in segfaults.
87
+ *
88
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
89
+ * before that callback gets invoked.
90
+ *
91
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
92
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
93
+ */
94
+ if (xmlDeregisterNodeDefaultValue) {
95
+ remove_private((xmlNodePtr)doc);
96
+ }
97
+ #if defined(__GNUC__) && __GNUC__ >= 5
98
+ #pragma GCC diagnostic pop
99
+ #endif
100
+
101
+ xmlFreeDoc(doc);
102
+ }
103
+
104
+ static size_t
105
+ memsize_node(const xmlNodePtr node)
106
+ {
107
+ /* note we don't count namespace definitions, just going for a good-enough number here */
108
+ xmlNodePtr child;
109
+ xmlAttrPtr property;
110
+ size_t memsize = 0;
111
+
112
+ memsize += (size_t)xmlStrlen(node->name);
113
+
114
+ if (node->type == XML_ELEMENT_NODE) {
115
+ for (property = node->properties; property; property = property->next) {
116
+ memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
117
+ }
118
+ }
119
+ if (node->type == XML_TEXT_NODE) {
120
+ memsize += (size_t)xmlStrlen(node->content);
121
+ }
122
+ for (child = node->children; child; child = child->next) {
123
+ memsize += sizeof(xmlNode) + memsize_node(child);
124
+ }
125
+ return memsize;
126
+ }
127
+
128
+ static size_t
129
+ memsize(const void *data)
130
+ {
131
+ xmlDocPtr doc = (const xmlDocPtr)data;
132
+ size_t memsize = sizeof(xmlDoc);
133
+ /* This may not account for all memory use */
134
+ memsize += memsize_node((xmlNodePtr)doc);
135
+ return memsize;
136
+ }
137
+
138
+ static const rb_data_type_t xml_doc_type = {
139
+ .wrap_struct_name = "xmlDoc",
140
+ .function = {
141
+ .dmark = mark,
142
+ .dfree = dealloc,
143
+ .dsize = memsize,
144
+ },
145
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
146
+ };
147
+
148
+ static VALUE
149
+ _xml_document_alloc(VALUE klass)
150
+ {
151
+ return TypedData_Wrap_Struct(klass, &xml_doc_type, NULL);
152
+ }
153
+
154
+ static void
155
+ _xml_document_data_ptr_set(VALUE rb_document, xmlDocPtr c_document)
156
+ {
157
+ nokogiriTuplePtr tuple;
158
+
159
+ assert(DATA_PTR(rb_document) == NULL);
160
+ assert(c_document->_private == NULL);
161
+
162
+ DATA_PTR(rb_document) = c_document;
163
+
164
+ tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
165
+ tuple->doc = rb_document;
166
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
167
+ tuple->node_cache = rb_ary_new();
168
+
169
+ c_document->_private = tuple ;
170
+
171
+ rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
172
+
173
+ return;
174
+ }
175
+
176
+ /* :nodoc: */
177
+ static VALUE
178
+ rb_xml_document_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level)
179
+ {
180
+ xmlDocPtr c_other, c_self;
181
+ int c_level;
182
+
183
+ c_other = noko_xml_document_unwrap(rb_other);
184
+ c_level = (int)NUM2INT(rb_level);
185
+
186
+ c_self = xmlCopyDoc(c_other, c_level);
187
+ if (c_self == NULL) { return Qnil; }
188
+
189
+ c_self->type = c_other->type;
190
+ _xml_document_data_ptr_set(rb_self, c_self);
191
+
192
+ return rb_self ;
193
+ }
194
+
195
+ static void
196
+ recursively_remove_namespaces_from_node(xmlNodePtr node)
197
+ {
198
+ xmlNodePtr child ;
199
+ xmlAttrPtr property ;
200
+
201
+ xmlSetNs(node, NULL);
202
+
203
+ for (child = node->children ; child ; child = child->next) {
204
+ recursively_remove_namespaces_from_node(child);
205
+ }
206
+
207
+ if (((node->type == XML_ELEMENT_NODE) ||
208
+ (node->type == XML_XINCLUDE_START) ||
209
+ (node->type == XML_XINCLUDE_END)) &&
210
+ node->nsDef) {
211
+ xmlNsPtr curr = node->nsDef;
212
+ while (curr) {
213
+ noko_xml_document_pin_namespace(curr, node->doc);
214
+ curr = curr->next;
215
+ }
216
+ node->nsDef = NULL;
217
+ }
218
+
219
+ if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
220
+ property = node->properties ;
221
+ while (property != NULL) {
222
+ if (property->ns) { property->ns = NULL ; }
223
+ property = property->next ;
224
+ }
225
+ }
226
+ }
227
+
228
+ /*
229
+ * call-seq:
230
+ * url
231
+ *
232
+ * Get the url name for this document.
233
+ */
234
+ static VALUE
235
+ url(VALUE self)
236
+ {
237
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
238
+
239
+ if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
240
+
241
+ return Qnil;
242
+ }
243
+
244
+ /*
245
+ * call-seq:
246
+ * root=
247
+ *
248
+ * Set the root element on this document
249
+ */
250
+ static VALUE
251
+ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
252
+ {
253
+ xmlDocPtr c_document;
254
+ xmlNodePtr c_new_root = NULL, c_current_root;
255
+
256
+ c_document = noko_xml_document_unwrap(self);
257
+
258
+ c_current_root = xmlDocGetRootElement(c_document);
259
+ if (c_current_root) {
260
+ xmlUnlinkNode(c_current_root);
261
+ noko_xml_document_pin_node(c_current_root);
262
+ }
263
+
264
+ if (!NIL_P(rb_new_root)) {
265
+ if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
266
+ rb_raise(rb_eArgError,
267
+ "expected Nokogiri::XML::Node but received %"PRIsVALUE,
268
+ rb_obj_class(rb_new_root));
269
+ }
270
+
271
+ Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
272
+
273
+ /* If the new root's document is not the same as the current document,
274
+ * then we need to dup the node in to this document. */
275
+ if (c_new_root->doc != c_document) {
276
+ c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
277
+ if (!c_new_root) {
278
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
279
+ }
280
+ }
281
+ }
282
+
283
+ xmlDocSetRootElement(c_document, c_new_root);
284
+
285
+ return rb_new_root;
286
+ }
287
+
288
+ /*
289
+ * call-seq:
290
+ * root
291
+ *
292
+ * Get the root node for this document.
293
+ */
294
+ static VALUE
295
+ rb_xml_document_root(VALUE self)
296
+ {
297
+ xmlDocPtr c_document;
298
+ xmlNodePtr c_root;
299
+
300
+ c_document = noko_xml_document_unwrap(self);
301
+
302
+ c_root = xmlDocGetRootElement(c_document);
303
+ if (!c_root) {
304
+ return Qnil;
305
+ }
306
+
307
+ return noko_xml_node_wrap(Qnil, c_root) ;
308
+ }
309
+
310
+ /*
311
+ * call-seq:
312
+ * encoding= encoding
313
+ *
314
+ * Set the encoding string for this Document
315
+ */
316
+ static VALUE
317
+ set_encoding(VALUE self, VALUE encoding)
318
+ {
319
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
320
+
321
+ if (doc->encoding) {
322
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
323
+ }
324
+
325
+ doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
326
+
327
+ return encoding;
328
+ }
329
+
330
+ /*
331
+ * call-seq:
332
+ * encoding
333
+ *
334
+ * Get the encoding for this Document
335
+ */
336
+ static VALUE
337
+ encoding(VALUE self)
338
+ {
339
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
340
+
341
+ if (!doc->encoding) { return Qnil; }
342
+ return NOKOGIRI_STR_NEW2(doc->encoding);
343
+ }
344
+
345
+ /*
346
+ * call-seq:
347
+ * version
348
+ *
349
+ * Get the XML version for this Document
350
+ */
351
+ static VALUE
352
+ version(VALUE self)
353
+ {
354
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
355
+
356
+ if (!doc->version) { return Qnil; }
357
+ return NOKOGIRI_STR_NEW2(doc->version);
358
+ }
359
+
360
+ /*
361
+ * call-seq:
362
+ * read_io(io, url, encoding, options)
363
+ *
364
+ * Create a new document from an IO object
365
+ */
366
+ static VALUE
367
+ noko_xml_document_s_read_io(VALUE rb_class,
368
+ VALUE rb_io,
369
+ VALUE rb_url,
370
+ VALUE rb_encoding,
371
+ VALUE rb_options)
372
+ {
373
+ /* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
374
+ private. */
375
+ libxmlStructuredErrorHandlerState handler_state;
376
+ VALUE rb_errors = rb_ary_new();
377
+
378
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
379
+
380
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
381
+ const char *c_enc = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
382
+ xmlDocPtr c_document = xmlReadIO(
383
+ (xmlInputReadCallback)noko_io_read,
384
+ (xmlInputCloseCallback)noko_io_close,
385
+ (void *)rb_io,
386
+ c_url,
387
+ c_enc,
388
+ (int)NUM2INT(rb_options)
389
+ );
390
+
391
+ noko__structured_error_func_restore(&handler_state);
392
+
393
+ if (c_document == NULL) {
394
+ xmlFreeDoc(c_document);
395
+
396
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
397
+ if (RB_TEST(exception)) {
398
+ rb_exc_raise(exception);
399
+ } else {
400
+ rb_raise(rb_eRuntimeError, "Could not parse document");
401
+ }
402
+ }
403
+
404
+ VALUE rb_document = noko_xml_document_wrap(rb_class, c_document);
405
+ rb_iv_set(rb_document, "@errors", rb_errors);
406
+ return rb_document;
407
+ }
408
+
409
+ /*
410
+ * call-seq:
411
+ * read_memory(string, url, encoding, options)
412
+ *
413
+ * Create a new document from a String
414
+ */
415
+ static VALUE
416
+ noko_xml_document_s_read_memory(VALUE rb_class,
417
+ VALUE rb_input,
418
+ VALUE rb_url,
419
+ VALUE rb_encoding,
420
+ VALUE rb_options)
421
+ {
422
+ /* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
423
+ private. */
424
+ VALUE rb_errors = rb_ary_new();
425
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
426
+
427
+ const char *c_buffer = StringValuePtr(rb_input);
428
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
429
+ const char *c_enc = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
430
+ int c_buffer_len = (int)RSTRING_LEN(rb_input);
431
+ xmlDocPtr c_document = xmlReadMemory(c_buffer, c_buffer_len, c_url, c_enc, (int)NUM2INT(rb_options));
432
+
433
+ xmlSetStructuredErrorFunc(NULL, NULL);
434
+
435
+ if (c_document == NULL) {
436
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
437
+ if (RB_TEST(exception)) {
438
+ rb_exc_raise(exception);
439
+ } else {
440
+ rb_raise(rb_eRuntimeError, "Could not parse document");
441
+ }
442
+ }
443
+
444
+ VALUE document = noko_xml_document_wrap(rb_class, c_document);
445
+ rb_iv_set(document, "@errors", rb_errors);
446
+ return document;
447
+ }
448
+
449
+ /*
450
+ * call-seq:
451
+ * new(version = "1.0")
452
+ *
453
+ * Create a new empty document declaring XML version +version+.
454
+ */
455
+ static VALUE
456
+ new (int argc, VALUE *argv, VALUE klass)
457
+ {
458
+ xmlDocPtr doc;
459
+ VALUE version, rest, rb_doc ;
460
+
461
+ rb_scan_args(argc, argv, "0*", &rest);
462
+ version = rb_ary_entry(rest, (long)0);
463
+ if (NIL_P(version)) { version = rb_str_new2("1.0"); }
464
+
465
+ doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
466
+ rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
467
+ return rb_doc ;
468
+ }
469
+
470
+ /*
471
+ * call-seq:
472
+ * remove_namespaces!
473
+ *
474
+ * Remove all namespaces from all nodes in the document.
475
+ *
476
+ * This could be useful for developers who either don't understand namespaces
477
+ * or don't care about them.
478
+ *
479
+ * The following example shows a use case, and you can decide for yourself
480
+ * whether this is a good thing or not:
481
+ *
482
+ * doc = Nokogiri::XML <<-EOXML
483
+ * <root>
484
+ * <car xmlns:part="http://general-motors.com/">
485
+ * <part:tire>Michelin Model XGV</part:tire>
486
+ * </car>
487
+ * <bicycle xmlns:part="http://schwinn.com/">
488
+ * <part:tire>I'm a bicycle tire!</part:tire>
489
+ * </bicycle>
490
+ * </root>
491
+ * EOXML
492
+ *
493
+ * doc.xpath("//tire").to_s # => ""
494
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
495
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
496
+ *
497
+ * doc.remove_namespaces!
498
+ *
499
+ * doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
500
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
501
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
502
+ *
503
+ * For more information on why this probably is *not* a good thing in general,
504
+ * please direct your browser to
505
+ * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
506
+ */
507
+ static VALUE
508
+ remove_namespaces_bang(VALUE self)
509
+ {
510
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
511
+
512
+ recursively_remove_namespaces_from_node((xmlNodePtr)doc);
513
+ return self;
514
+ }
515
+
516
+ /* call-seq:
517
+ * doc.create_entity(name, type, external_id, system_id, content)
518
+ *
519
+ * Create a new entity named +name+.
520
+ *
521
+ * +type+ is an integer representing the type of entity to be created, and it defaults to
522
+ * +Nokogiri::XML::EntityDecl::INTERNAL_GENERAL+. See the constants on Nokogiri::XML::EntityDecl for
523
+ * more information.
524
+ *
525
+ * +external_id+, +system_id+, and +content+ set the External ID, System ID,
526
+ * and content respectively. All of these parameters are optional.
527
+ */
528
+ static VALUE
529
+ noko_xml_document__create_entity(int argc, VALUE *argv, VALUE rb_document)
530
+ {
531
+ VALUE rb_name;
532
+ VALUE rb_type;
533
+ VALUE rb_ext_id;
534
+ VALUE rb_sys_id;
535
+ VALUE rb_content;
536
+
537
+ rb_scan_args(argc, argv, "14",
538
+ &rb_name,
539
+ &rb_type, &rb_ext_id, &rb_sys_id, &rb_content);
540
+
541
+ xmlDocPtr c_document = noko_xml_document_unwrap(rb_document);
542
+
543
+ libxmlStructuredErrorHandlerState handler_state;
544
+ VALUE rb_errors = rb_ary_new();
545
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
546
+
547
+ xmlEntityPtr c_entity = xmlAddDocEntity(
548
+ c_document,
549
+ (xmlChar *)(NIL_P(rb_name) ? NULL : StringValueCStr(rb_name)),
550
+ (int)(NIL_P(rb_type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(rb_type)),
551
+ (xmlChar *)(NIL_P(rb_ext_id) ? NULL : StringValueCStr(rb_ext_id)),
552
+ (xmlChar *)(NIL_P(rb_sys_id) ? NULL : StringValueCStr(rb_sys_id)),
553
+ (xmlChar *)(NIL_P(rb_content) ? NULL : StringValueCStr(rb_content))
554
+ );
555
+
556
+ noko__structured_error_func_restore(&handler_state);
557
+
558
+ if (NULL == c_entity) {
559
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
560
+ if (RB_TEST(exception)) {
561
+ rb_exc_raise(exception);
562
+ } else {
563
+ rb_raise(rb_eRuntimeError, "Could not create entity");
564
+ }
565
+ }
566
+
567
+ return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)c_entity);
568
+ }
569
+
570
+ static int
571
+ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
572
+ {
573
+ VALUE block = (VALUE)ctx;
574
+ VALUE rb_node;
575
+ VALUE rb_parent_node;
576
+ VALUE ret;
577
+
578
+ if (c_node->type == XML_NAMESPACE_DECL) {
579
+ rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
580
+ } else {
581
+ rb_node = noko_xml_node_wrap(Qnil, c_node);
582
+ }
583
+ rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
584
+
585
+ ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
586
+
587
+ return (Qfalse == ret || Qnil == ret) ? 0 : 1;
588
+ }
589
+
590
+ /* call-seq:
591
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
592
+ * doc.canonicalize { |obj, parent| ... }
593
+ *
594
+ * Canonicalize a document and return the results. Takes an optional block
595
+ * that takes two parameters: the +obj+ and that node's +parent+.
596
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
597
+ * The block must return a non-nil, non-false value if the +obj+ passed in
598
+ * should be included in the canonicalized document.
599
+ */
600
+ static VALUE
601
+ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
602
+ {
603
+ VALUE rb_mode;
604
+ VALUE rb_namespaces;
605
+ VALUE rb_comments_p;
606
+ int c_mode = 0;
607
+ xmlChar **c_namespaces;
608
+
609
+ xmlDocPtr c_doc;
610
+ xmlOutputBufferPtr c_obuf;
611
+ xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
612
+ void *rb_callback = NULL;
613
+
614
+ VALUE rb_cStringIO;
615
+ VALUE rb_io;
616
+
617
+ rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
618
+ if (!NIL_P(rb_mode)) {
619
+ Check_Type(rb_mode, T_FIXNUM);
620
+ c_mode = NUM2INT(rb_mode);
621
+ }
622
+ if (!NIL_P(rb_namespaces)) {
623
+ Check_Type(rb_namespaces, T_ARRAY);
624
+ if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
625
+ rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
626
+ }
627
+ }
628
+
629
+ c_doc = noko_xml_document_unwrap(self);
630
+
631
+ rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
632
+ rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
633
+ c_obuf = xmlAllocOutputBuffer(NULL);
634
+
635
+ c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
636
+ c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
637
+ c_obuf->context = (void *)rb_io;
638
+
639
+ if (rb_block_given_p()) {
640
+ c_callback_wrapper = block_caller;
641
+ rb_callback = (void *)rb_block_proc();
642
+ }
643
+
644
+ if (NIL_P(rb_namespaces)) {
645
+ c_namespaces = NULL;
646
+ } else {
647
+ long ns_len = RARRAY_LEN(rb_namespaces);
648
+ c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
649
+ for (int j = 0 ; j < ns_len ; j++) {
650
+ VALUE entry = rb_ary_entry(rb_namespaces, j);
651
+ c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
652
+ }
653
+ }
654
+
655
+ xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
656
+ c_mode,
657
+ c_namespaces,
658
+ (int)RTEST(rb_comments_p),
659
+ c_obuf);
660
+
661
+ ruby_xfree(c_namespaces);
662
+ xmlOutputBufferClose(c_obuf);
663
+
664
+ return rb_funcall(rb_io, rb_intern("string"), 0);
665
+ }
666
+
667
+ VALUE
668
+ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
669
+ {
670
+ VALUE rb_document;
671
+
672
+ if (!klass) {
673
+ klass = cNokogiriXmlDocument;
674
+ }
675
+
676
+ rb_document = _xml_document_alloc(klass);
677
+ _xml_document_data_ptr_set(rb_document, c_document);
678
+
679
+ rb_iv_set(rb_document, "@decorators", Qnil);
680
+ rb_iv_set(rb_document, "@errors", Qnil);
681
+
682
+ rb_obj_call_init(rb_document, argc, argv);
683
+
684
+ return rb_document ;
685
+ }
686
+
687
+
688
+ /* deprecated. use noko_xml_document_wrap() instead. */
689
+ VALUE
690
+ Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
691
+ {
692
+ /* TODO: deprecate this method in v2.0 */
693
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
694
+ }
695
+
696
+ VALUE
697
+ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
698
+ {
699
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
700
+ }
701
+
702
+ xmlDocPtr
703
+ noko_xml_document_unwrap(VALUE rb_document)
704
+ {
705
+ xmlDocPtr c_document;
706
+ TypedData_Get_Struct(rb_document, xmlDoc, &xml_doc_type, c_document);
707
+ return c_document;
708
+ }
709
+
710
+ /* Schema creation will remove and deallocate "blank" nodes.
711
+ * If those blank nodes have been exposed to Ruby, they could get freed
712
+ * out from under the VALUE pointer. This function checks to see if any of
713
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
714
+ */
715
+ int
716
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
717
+ {
718
+ VALUE cache = DOC_NODE_CACHE(c_document);
719
+
720
+ if (NIL_P(cache)) {
721
+ return 0;
722
+ }
723
+
724
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
725
+ xmlNodePtr node;
726
+ VALUE element = rb_ary_entry(cache, jnode);
727
+
728
+ Noko_Node_Get_Struct(element, xmlNode, node);
729
+ if (xmlIsBlankNode(node)) {
730
+ return 1;
731
+ }
732
+ }
733
+
734
+ return 0;
735
+ }
736
+
737
+ void
738
+ noko_xml_document_pin_node(xmlNodePtr node)
739
+ {
740
+ xmlDocPtr doc;
741
+ nokogiriTuplePtr tuple;
742
+
743
+ doc = node->doc;
744
+ tuple = (nokogiriTuplePtr)doc->_private;
745
+ st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
746
+ }
747
+
748
+
749
+ void
750
+ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
751
+ {
752
+ nokogiriTuplePtr tuple;
753
+
754
+ tuple = (nokogiriTuplePtr)doc->_private;
755
+ st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
756
+ }
757
+
758
+
759
+ void
760
+ noko_init_xml_document(void)
761
+ {
762
+ assert(cNokogiriXmlNode);
763
+
764
+ cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
765
+
766
+ rb_define_alloc_func(cNokogiriXmlDocument, _xml_document_alloc);
767
+
768
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", noko_xml_document_s_read_memory, 4);
769
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_io", noko_xml_document_s_read_io, 4);
770
+ rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
771
+
772
+ rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
773
+ rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
774
+ rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
775
+ rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
776
+ rb_define_method(cNokogiriXmlDocument, "version", version, 0);
777
+ rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
778
+ rb_define_method(cNokogiriXmlDocument, "url", url, 0);
779
+ rb_define_method(cNokogiriXmlDocument, "create_entity", noko_xml_document__create_entity, -1);
780
+ rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
781
+
782
+ rb_define_protected_method(cNokogiriXmlDocument, "initialize_copy_with_args", rb_xml_document_initialize_copy_with_args,
783
+ 2);
784
+ }