nokogiri 1.14.0.rc1-arm-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1082 -0
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +114 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  17. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  18. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  19. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  21. data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
  23. data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
  24. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  25. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  26. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  27. data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
  28. data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
  29. data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
  30. data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
  31. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  32. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  33. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  35. data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
  37. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  38. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
  39. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  41. data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
  42. data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
  43. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  44. data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
  45. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
  65. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  66. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  67. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  68. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  69. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  70. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  71. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  72. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  73. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  74. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  75. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  76. data/ext/nokogiri/include/libxslt/security.h +104 -0
  77. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  78. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  79. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  80. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  81. data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
  82. data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
  83. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  84. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  85. data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
  86. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  87. data/ext/nokogiri/nokogiri.c +259 -0
  88. data/ext/nokogiri/nokogiri.h +235 -0
  89. data/ext/nokogiri/test_global_handlers.c +40 -0
  90. data/ext/nokogiri/xml_attr.c +103 -0
  91. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  92. data/ext/nokogiri/xml_cdata.c +57 -0
  93. data/ext/nokogiri/xml_comment.c +62 -0
  94. data/ext/nokogiri/xml_document.c +689 -0
  95. data/ext/nokogiri/xml_document_fragment.c +44 -0
  96. data/ext/nokogiri/xml_dtd.c +208 -0
  97. data/ext/nokogiri/xml_element_content.c +128 -0
  98. data/ext/nokogiri/xml_element_decl.c +69 -0
  99. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  100. data/ext/nokogiri/xml_entity_decl.c +112 -0
  101. data/ext/nokogiri/xml_entity_reference.c +50 -0
  102. data/ext/nokogiri/xml_namespace.c +186 -0
  103. data/ext/nokogiri/xml_node.c +2425 -0
  104. data/ext/nokogiri/xml_node_set.c +496 -0
  105. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  106. data/ext/nokogiri/xml_reader.c +794 -0
  107. data/ext/nokogiri/xml_relax_ng.c +183 -0
  108. data/ext/nokogiri/xml_sax_parser.c +316 -0
  109. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  110. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  111. data/ext/nokogiri/xml_schema.c +282 -0
  112. data/ext/nokogiri/xml_syntax_error.c +85 -0
  113. data/ext/nokogiri/xml_text.c +48 -0
  114. data/ext/nokogiri/xml_xpath_context.c +413 -0
  115. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  116. data/gumbo-parser/CHANGES.md +63 -0
  117. data/gumbo-parser/Makefile +111 -0
  118. data/gumbo-parser/THANKS +27 -0
  119. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  120. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  123. data/lib/nokogiri/class_resolver.rb +67 -0
  124. data/lib/nokogiri/css/node.rb +54 -0
  125. data/lib/nokogiri/css/parser.rb +770 -0
  126. data/lib/nokogiri/css/parser.y +277 -0
  127. data/lib/nokogiri/css/parser_extras.rb +96 -0
  128. data/lib/nokogiri/css/syntax_error.rb +9 -0
  129. data/lib/nokogiri/css/tokenizer.rb +155 -0
  130. data/lib/nokogiri/css/tokenizer.rex +56 -0
  131. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  132. data/lib/nokogiri/css.rb +66 -0
  133. data/lib/nokogiri/decorators/slop.rb +44 -0
  134. data/lib/nokogiri/encoding_handler.rb +57 -0
  135. data/lib/nokogiri/extension.rb +32 -0
  136. data/lib/nokogiri/gumbo.rb +15 -0
  137. data/lib/nokogiri/html.rb +48 -0
  138. data/lib/nokogiri/html4/builder.rb +37 -0
  139. data/lib/nokogiri/html4/document.rb +214 -0
  140. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  141. data/lib/nokogiri/html4/element_description.rb +25 -0
  142. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  143. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  144. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  145. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  146. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  147. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  148. data/lib/nokogiri/html4.rb +47 -0
  149. data/lib/nokogiri/html5/document.rb +168 -0
  150. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  151. data/lib/nokogiri/html5/node.rb +98 -0
  152. data/lib/nokogiri/html5.rb +389 -0
  153. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  154. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  155. data/lib/nokogiri/syntax_error.rb +6 -0
  156. data/lib/nokogiri/version/constant.rb +6 -0
  157. data/lib/nokogiri/version/info.rb +223 -0
  158. data/lib/nokogiri/version.rb +4 -0
  159. data/lib/nokogiri/xml/attr.rb +66 -0
  160. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  161. data/lib/nokogiri/xml/builder.rb +487 -0
  162. data/lib/nokogiri/xml/cdata.rb +13 -0
  163. data/lib/nokogiri/xml/character_data.rb +9 -0
  164. data/lib/nokogiri/xml/document.rb +471 -0
  165. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  166. data/lib/nokogiri/xml/dtd.rb +34 -0
  167. data/lib/nokogiri/xml/element_content.rb +38 -0
  168. data/lib/nokogiri/xml/element_decl.rb +15 -0
  169. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  170. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  171. data/lib/nokogiri/xml/namespace.rb +58 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  173. data/lib/nokogiri/xml/node.rb +1563 -0
  174. data/lib/nokogiri/xml/node_set.rb +446 -0
  175. data/lib/nokogiri/xml/notation.rb +19 -0
  176. data/lib/nokogiri/xml/parse_options.rb +213 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  178. data/lib/nokogiri/xml/pp/node.rb +57 -0
  179. data/lib/nokogiri/xml/pp.rb +4 -0
  180. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  181. data/lib/nokogiri/xml/reader.rb +105 -0
  182. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  183. data/lib/nokogiri/xml/sax/document.rb +167 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  187. data/lib/nokogiri/xml/sax.rb +6 -0
  188. data/lib/nokogiri/xml/schema.rb +73 -0
  189. data/lib/nokogiri/xml/searchable.rb +270 -0
  190. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  191. data/lib/nokogiri/xml/text.rb +11 -0
  192. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  193. data/lib/nokogiri/xml/xpath.rb +21 -0
  194. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  195. data/lib/nokogiri/xml.rb +76 -0
  196. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  197. data/lib/nokogiri/xslt.rb +65 -0
  198. data/lib/nokogiri.rb +120 -0
  199. data/lib/xsd/xmlparser/nokogiri.rb +104 -0
  200. metadata +317 -0
@@ -0,0 +1,689 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlDocument ;
4
+
5
+ static int
6
+ dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
7
+ {
8
+ switch (node->type) {
9
+ case XML_ATTRIBUTE_NODE:
10
+ xmlFreePropList((xmlAttrPtr)node);
11
+ break;
12
+ case XML_NAMESPACE_DECL:
13
+ xmlFreeNs((xmlNsPtr)node);
14
+ break;
15
+ case XML_DTD_NODE:
16
+ xmlFreeDtd((xmlDtdPtr)node);
17
+ break;
18
+ default:
19
+ if (node->parent == NULL) {
20
+ xmlAddChild((xmlNodePtr)doc, node);
21
+ }
22
+ }
23
+ return ST_CONTINUE;
24
+ }
25
+
26
+ static int
27
+ dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
28
+ {
29
+ return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
30
+ }
31
+
32
+ static void
33
+ remove_private(xmlNodePtr node)
34
+ {
35
+ xmlNodePtr child;
36
+
37
+ for (child = node->children; child; child = child->next) {
38
+ remove_private(child);
39
+ }
40
+
41
+ if ((node->type == XML_ELEMENT_NODE ||
42
+ node->type == XML_XINCLUDE_START ||
43
+ node->type == XML_XINCLUDE_END) &&
44
+ node->properties) {
45
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
46
+ remove_private(child);
47
+ }
48
+ }
49
+
50
+ node->_private = NULL;
51
+ }
52
+
53
+ static void
54
+ mark(xmlDocPtr doc)
55
+ {
56
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
57
+ if (tuple) {
58
+ rb_gc_mark(tuple->doc);
59
+ rb_gc_mark(tuple->node_cache);
60
+ }
61
+ }
62
+
63
+ static void
64
+ dealloc(xmlDocPtr doc)
65
+ {
66
+ st_table *node_hash;
67
+
68
+ node_hash = DOC_UNLINKED_NODE_HASH(doc);
69
+
70
+ st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
71
+ st_free_table(node_hash);
72
+
73
+ ruby_xfree(doc->_private);
74
+
75
+ /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
76
+ * have their _private pointers cleared. This is to avoid libxml-ruby's
77
+ * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
78
+ * free context, which can result in segfaults.
79
+ */
80
+ if (xmlDeregisterNodeDefaultValue) {
81
+ remove_private((xmlNodePtr)doc);
82
+ }
83
+
84
+ xmlFreeDoc(doc);
85
+ }
86
+
87
+ static void
88
+ recursively_remove_namespaces_from_node(xmlNodePtr node)
89
+ {
90
+ xmlNodePtr child ;
91
+ xmlAttrPtr property ;
92
+
93
+ xmlSetNs(node, NULL);
94
+
95
+ for (child = node->children ; child ; child = child->next) {
96
+ recursively_remove_namespaces_from_node(child);
97
+ }
98
+
99
+ if (((node->type == XML_ELEMENT_NODE) ||
100
+ (node->type == XML_XINCLUDE_START) ||
101
+ (node->type == XML_XINCLUDE_END)) &&
102
+ node->nsDef) {
103
+ xmlNsPtr curr = node->nsDef;
104
+ while (curr) {
105
+ noko_xml_document_pin_namespace(curr, node->doc);
106
+ curr = curr->next;
107
+ }
108
+ node->nsDef = NULL;
109
+ }
110
+
111
+ if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
112
+ property = node->properties ;
113
+ while (property != NULL) {
114
+ if (property->ns) { property->ns = NULL ; }
115
+ property = property->next ;
116
+ }
117
+ }
118
+ }
119
+
120
+ /*
121
+ * call-seq:
122
+ * url
123
+ *
124
+ * Get the url name for this document.
125
+ */
126
+ static VALUE
127
+ url(VALUE self)
128
+ {
129
+ xmlDocPtr doc;
130
+ Data_Get_Struct(self, xmlDoc, doc);
131
+
132
+ if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
133
+
134
+ return Qnil;
135
+ }
136
+
137
+ /*
138
+ * call-seq:
139
+ * root=
140
+ *
141
+ * Set the root element on this document
142
+ */
143
+ static VALUE
144
+ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
145
+ {
146
+ xmlDocPtr c_document;
147
+ xmlNodePtr c_new_root = NULL, c_current_root;
148
+
149
+ Data_Get_Struct(self, xmlDoc, c_document);
150
+
151
+ c_current_root = xmlDocGetRootElement(c_document);
152
+ if (c_current_root) {
153
+ xmlUnlinkNode(c_current_root);
154
+ noko_xml_document_pin_node(c_current_root);
155
+ }
156
+
157
+ if (!NIL_P(rb_new_root)) {
158
+ if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
159
+ rb_raise(rb_eArgError,
160
+ "expected Nokogiri::XML::Node but received %"PRIsVALUE,
161
+ rb_obj_class(rb_new_root));
162
+ }
163
+
164
+ Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
165
+
166
+ /* If the new root's document is not the same as the current document,
167
+ * then we need to dup the node in to this document. */
168
+ if (c_new_root->doc != c_document) {
169
+ c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
170
+ if (!c_new_root) {
171
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
172
+ }
173
+ }
174
+ }
175
+
176
+ xmlDocSetRootElement(c_document, c_new_root);
177
+
178
+ return rb_new_root;
179
+ }
180
+
181
+ /*
182
+ * call-seq:
183
+ * root
184
+ *
185
+ * Get the root node for this document.
186
+ */
187
+ static VALUE
188
+ rb_xml_document_root(VALUE self)
189
+ {
190
+ xmlDocPtr c_document;
191
+ xmlNodePtr c_root;
192
+
193
+ Data_Get_Struct(self, xmlDoc, c_document);
194
+
195
+ c_root = xmlDocGetRootElement(c_document);
196
+ if (!c_root) {
197
+ return Qnil;
198
+ }
199
+
200
+ return noko_xml_node_wrap(Qnil, c_root) ;
201
+ }
202
+
203
+ /*
204
+ * call-seq:
205
+ * encoding= encoding
206
+ *
207
+ * Set the encoding string for this Document
208
+ */
209
+ static VALUE
210
+ set_encoding(VALUE self, VALUE encoding)
211
+ {
212
+ xmlDocPtr doc;
213
+ Data_Get_Struct(self, xmlDoc, doc);
214
+
215
+ if (doc->encoding) {
216
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
217
+ }
218
+
219
+ doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
220
+
221
+ return encoding;
222
+ }
223
+
224
+ /*
225
+ * call-seq:
226
+ * encoding
227
+ *
228
+ * Get the encoding for this Document
229
+ */
230
+ static VALUE
231
+ encoding(VALUE self)
232
+ {
233
+ xmlDocPtr doc;
234
+ Data_Get_Struct(self, xmlDoc, doc);
235
+
236
+ if (!doc->encoding) { return Qnil; }
237
+ return NOKOGIRI_STR_NEW2(doc->encoding);
238
+ }
239
+
240
+ /*
241
+ * call-seq:
242
+ * version
243
+ *
244
+ * Get the XML version for this Document
245
+ */
246
+ static VALUE
247
+ version(VALUE self)
248
+ {
249
+ xmlDocPtr doc;
250
+ Data_Get_Struct(self, xmlDoc, doc);
251
+
252
+ if (!doc->version) { return Qnil; }
253
+ return NOKOGIRI_STR_NEW2(doc->version);
254
+ }
255
+
256
+ /*
257
+ * call-seq:
258
+ * read_io(io, url, encoding, options)
259
+ *
260
+ * Create a new document from an IO object
261
+ */
262
+ static VALUE
263
+ read_io(VALUE klass,
264
+ VALUE io,
265
+ VALUE url,
266
+ VALUE encoding,
267
+ VALUE options)
268
+ {
269
+ const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
270
+ const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
271
+ VALUE error_list = rb_ary_new();
272
+ VALUE document;
273
+ xmlDocPtr doc;
274
+
275
+ xmlResetLastError();
276
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
277
+
278
+ doc = xmlReadIO(
279
+ (xmlInputReadCallback)noko_io_read,
280
+ (xmlInputCloseCallback)noko_io_close,
281
+ (void *)io,
282
+ c_url,
283
+ c_enc,
284
+ (int)NUM2INT(options)
285
+ );
286
+ xmlSetStructuredErrorFunc(NULL, NULL);
287
+
288
+ if (doc == NULL) {
289
+ xmlErrorPtr error;
290
+
291
+ xmlFreeDoc(doc);
292
+
293
+ error = xmlGetLastError();
294
+ if (error) {
295
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
296
+ } else {
297
+ rb_raise(rb_eRuntimeError, "Could not parse document");
298
+ }
299
+
300
+ return Qnil;
301
+ }
302
+
303
+ document = noko_xml_document_wrap(klass, doc);
304
+ rb_iv_set(document, "@errors", error_list);
305
+ return document;
306
+ }
307
+
308
+ /*
309
+ * call-seq:
310
+ * read_memory(string, url, encoding, options)
311
+ *
312
+ * Create a new document from a String
313
+ */
314
+ static VALUE
315
+ read_memory(VALUE klass,
316
+ VALUE string,
317
+ VALUE url,
318
+ VALUE encoding,
319
+ VALUE options)
320
+ {
321
+ const char *c_buffer = StringValuePtr(string);
322
+ const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
323
+ const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
324
+ int len = (int)RSTRING_LEN(string);
325
+ VALUE error_list = rb_ary_new();
326
+ VALUE document;
327
+ xmlDocPtr doc;
328
+
329
+ xmlResetLastError();
330
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
331
+ doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
332
+ xmlSetStructuredErrorFunc(NULL, NULL);
333
+
334
+ if (doc == NULL) {
335
+ xmlErrorPtr error;
336
+
337
+ xmlFreeDoc(doc);
338
+
339
+ error = xmlGetLastError();
340
+ if (error) {
341
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
342
+ } else {
343
+ rb_raise(rb_eRuntimeError, "Could not parse document");
344
+ }
345
+
346
+ return Qnil;
347
+ }
348
+
349
+ document = noko_xml_document_wrap(klass, doc);
350
+ rb_iv_set(document, "@errors", error_list);
351
+ return document;
352
+ }
353
+
354
+ /*
355
+ * call-seq:
356
+ * dup
357
+ *
358
+ * Copy this Document. An optional depth may be passed in, but it defaults
359
+ * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
360
+ */
361
+ static VALUE
362
+ duplicate_document(int argc, VALUE *argv, VALUE self)
363
+ {
364
+ xmlDocPtr doc, dup;
365
+ VALUE copy;
366
+ VALUE level;
367
+
368
+ if (rb_scan_args(argc, argv, "01", &level) == 0) {
369
+ level = INT2NUM((long)1);
370
+ }
371
+
372
+ Data_Get_Struct(self, xmlDoc, doc);
373
+
374
+ dup = xmlCopyDoc(doc, (int)NUM2INT(level));
375
+
376
+ if (dup == NULL) { return Qnil; }
377
+
378
+ dup->type = doc->type;
379
+ copy = noko_xml_document_wrap(rb_obj_class(self), dup);
380
+ rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors"));
381
+ return copy ;
382
+ }
383
+
384
+ /*
385
+ * call-seq:
386
+ * new(version = default)
387
+ *
388
+ * Create a new document with +version+ (defaults to "1.0")
389
+ */
390
+ static VALUE
391
+ new (int argc, VALUE *argv, VALUE klass)
392
+ {
393
+ xmlDocPtr doc;
394
+ VALUE version, rest, rb_doc ;
395
+
396
+ rb_scan_args(argc, argv, "0*", &rest);
397
+ version = rb_ary_entry(rest, (long)0);
398
+ if (NIL_P(version)) { version = rb_str_new2("1.0"); }
399
+
400
+ doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
401
+ rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
402
+ return rb_doc ;
403
+ }
404
+
405
+ /*
406
+ * call-seq:
407
+ * remove_namespaces!
408
+ *
409
+ * Remove all namespaces from all nodes in the document.
410
+ *
411
+ * This could be useful for developers who either don't understand namespaces
412
+ * or don't care about them.
413
+ *
414
+ * The following example shows a use case, and you can decide for yourself
415
+ * whether this is a good thing or not:
416
+ *
417
+ * doc = Nokogiri::XML <<-EOXML
418
+ * <root>
419
+ * <car xmlns:part="http://general-motors.com/">
420
+ * <part:tire>Michelin Model XGV</part:tire>
421
+ * </car>
422
+ * <bicycle xmlns:part="http://schwinn.com/">
423
+ * <part:tire>I'm a bicycle tire!</part:tire>
424
+ * </bicycle>
425
+ * </root>
426
+ * EOXML
427
+ *
428
+ * doc.xpath("//tire").to_s # => ""
429
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
430
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
431
+ *
432
+ * doc.remove_namespaces!
433
+ *
434
+ * doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
435
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
436
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
437
+ *
438
+ * For more information on why this probably is *not* a good thing in general,
439
+ * please direct your browser to
440
+ * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
441
+ */
442
+ static VALUE
443
+ remove_namespaces_bang(VALUE self)
444
+ {
445
+ xmlDocPtr doc ;
446
+ Data_Get_Struct(self, xmlDoc, doc);
447
+
448
+ recursively_remove_namespaces_from_node((xmlNodePtr)doc);
449
+ return self;
450
+ }
451
+
452
+ /* call-seq: doc.create_entity(name, type, external_id, system_id, content)
453
+ *
454
+ * Create a new entity named +name+.
455
+ *
456
+ * +type+ is an integer representing the type of entity to be created, and it
457
+ * defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See
458
+ * the constants on Nokogiri::XML::EntityDecl for more information.
459
+ *
460
+ * +external_id+, +system_id+, and +content+ set the External ID, System ID,
461
+ * and content respectively. All of these parameters are optional.
462
+ */
463
+ static VALUE
464
+ create_entity(int argc, VALUE *argv, VALUE self)
465
+ {
466
+ VALUE name;
467
+ VALUE type;
468
+ VALUE external_id;
469
+ VALUE system_id;
470
+ VALUE content;
471
+ xmlEntityPtr ptr;
472
+ xmlDocPtr doc ;
473
+
474
+ Data_Get_Struct(self, xmlDoc, doc);
475
+
476
+ rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
477
+ &content);
478
+
479
+ xmlResetLastError();
480
+ ptr = xmlAddDocEntity(
481
+ doc,
482
+ (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
483
+ (int)(NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
484
+ (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
485
+ (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
486
+ (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
487
+ );
488
+
489
+ if (NULL == ptr) {
490
+ xmlErrorPtr error = xmlGetLastError();
491
+ if (error) {
492
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
493
+ } else {
494
+ rb_raise(rb_eRuntimeError, "Could not create entity");
495
+ }
496
+
497
+ return Qnil;
498
+ }
499
+
500
+ return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
501
+ }
502
+
503
+ static int
504
+ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
505
+ {
506
+ VALUE block = (VALUE)ctx;
507
+ VALUE rb_node;
508
+ VALUE rb_parent_node;
509
+ VALUE ret;
510
+
511
+ if (c_node->type == XML_NAMESPACE_DECL) {
512
+ rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
513
+ } else {
514
+ rb_node = noko_xml_node_wrap(Qnil, c_node);
515
+ }
516
+ rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
517
+
518
+ ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
519
+
520
+ return (Qfalse == ret || Qnil == ret) ? 0 : 1;
521
+ }
522
+
523
+ /* call-seq:
524
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
525
+ * doc.canonicalize { |obj, parent| ... }
526
+ *
527
+ * Canonicalize a document and return the results. Takes an optional block
528
+ * that takes two parameters: the +obj+ and that node's +parent+.
529
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
530
+ * The block must return a non-nil, non-false value if the +obj+ passed in
531
+ * should be included in the canonicalized document.
532
+ */
533
+ static VALUE
534
+ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
535
+ {
536
+ VALUE rb_mode;
537
+ VALUE rb_namespaces;
538
+ VALUE rb_comments_p;
539
+ int c_mode = 0;
540
+ xmlChar **c_namespaces;
541
+
542
+ xmlDocPtr c_doc;
543
+ xmlOutputBufferPtr c_obuf;
544
+ xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
545
+ void *rb_callback = NULL;
546
+
547
+ VALUE rb_cStringIO;
548
+ VALUE rb_io;
549
+
550
+ rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
551
+ if (!NIL_P(rb_mode)) {
552
+ Check_Type(rb_mode, T_FIXNUM);
553
+ c_mode = NUM2INT(rb_mode);
554
+ }
555
+ if (!NIL_P(rb_namespaces)) {
556
+ Check_Type(rb_namespaces, T_ARRAY);
557
+ if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
558
+ rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
559
+ }
560
+ }
561
+
562
+ Data_Get_Struct(self, xmlDoc, c_doc);
563
+
564
+ rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
565
+ rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
566
+ c_obuf = xmlAllocOutputBuffer(NULL);
567
+
568
+ c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
569
+ c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
570
+ c_obuf->context = (void *)rb_io;
571
+
572
+ if (rb_block_given_p()) {
573
+ c_callback_wrapper = block_caller;
574
+ rb_callback = (void *)rb_block_proc();
575
+ }
576
+
577
+ if (NIL_P(rb_namespaces)) {
578
+ c_namespaces = NULL;
579
+ } else {
580
+ long ns_len = RARRAY_LEN(rb_namespaces);
581
+ c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
582
+ for (int j = 0 ; j < ns_len ; j++) {
583
+ VALUE entry = rb_ary_entry(rb_namespaces, j);
584
+ c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
585
+ }
586
+ }
587
+
588
+ xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
589
+ c_mode,
590
+ c_namespaces,
591
+ (int)RTEST(rb_comments_p),
592
+ c_obuf);
593
+
594
+ ruby_xfree(c_namespaces);
595
+ xmlOutputBufferClose(c_obuf);
596
+
597
+ return rb_funcall(rb_io, rb_intern("string"), 0);
598
+ }
599
+
600
+ VALUE
601
+ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
602
+ {
603
+ VALUE rb_document;
604
+ nokogiriTuplePtr tuple;
605
+
606
+ if (!klass) {
607
+ klass = cNokogiriXmlDocument;
608
+ }
609
+
610
+ rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
611
+
612
+ tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
613
+ tuple->doc = rb_document;
614
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
615
+ tuple->node_cache = rb_ary_new();
616
+
617
+ c_document->_private = tuple ;
618
+
619
+ rb_iv_set(rb_document, "@decorators", Qnil);
620
+ rb_iv_set(rb_document, "@errors", Qnil);
621
+ rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
622
+
623
+ rb_obj_call_init(rb_document, argc, argv);
624
+
625
+ return rb_document ;
626
+ }
627
+
628
+
629
+ /* deprecated. use noko_xml_document_wrap() instead. */
630
+ VALUE
631
+ Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
632
+ {
633
+ /* TODO: deprecate this method in v2.0 */
634
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
635
+ }
636
+
637
+ VALUE
638
+ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
639
+ {
640
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
641
+ }
642
+
643
+
644
+ void
645
+ noko_xml_document_pin_node(xmlNodePtr node)
646
+ {
647
+ xmlDocPtr doc;
648
+ nokogiriTuplePtr tuple;
649
+
650
+ doc = node->doc;
651
+ tuple = (nokogiriTuplePtr)doc->_private;
652
+ st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
653
+ }
654
+
655
+
656
+ void
657
+ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
658
+ {
659
+ nokogiriTuplePtr tuple;
660
+
661
+ tuple = (nokogiriTuplePtr)doc->_private;
662
+ st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
663
+ }
664
+
665
+
666
+ void
667
+ noko_init_xml_document()
668
+ {
669
+ assert(cNokogiriXmlNode);
670
+ /*
671
+ * Nokogiri::XML::Document wraps an xml document.
672
+ */
673
+ cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
674
+
675
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", read_memory, 4);
676
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4);
677
+ rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
678
+
679
+ rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
680
+ rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
681
+ rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
682
+ rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
683
+ rb_define_method(cNokogiriXmlDocument, "version", version, 0);
684
+ rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
685
+ rb_define_method(cNokogiriXmlDocument, "dup", duplicate_document, -1);
686
+ rb_define_method(cNokogiriXmlDocument, "url", url, 0);
687
+ rb_define_method(cNokogiriXmlDocument, "create_entity", create_entity, -1);
688
+ rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
689
+ }