nokogiri 1.18.0.rc1-x86_64-linux-gnu

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +502 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +297 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +49 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +324 -0
@@ -0,0 +1,784 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlDocument ;
4
+
5
+ static int
6
+ dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
7
+ {
8
+ switch (node->type) {
9
+ case XML_ATTRIBUTE_NODE:
10
+ xmlFreePropList((xmlAttrPtr)node);
11
+ break;
12
+ case XML_NAMESPACE_DECL:
13
+ xmlFreeNs((xmlNsPtr)node);
14
+ break;
15
+ case XML_DTD_NODE:
16
+ xmlFreeDtd((xmlDtdPtr)node);
17
+ break;
18
+ default:
19
+ if (node->parent == NULL) {
20
+ node->next = NULL;
21
+ node->prev = NULL;
22
+ xmlAddChild((xmlNodePtr)doc, node);
23
+ }
24
+ }
25
+ return ST_CONTINUE;
26
+ }
27
+
28
+ static int
29
+ dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
30
+ {
31
+ return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
32
+ }
33
+
34
+ static void
35
+ remove_private(xmlNodePtr node)
36
+ {
37
+ xmlNodePtr child;
38
+
39
+ for (child = node->children; child; child = child->next) {
40
+ remove_private(child);
41
+ }
42
+
43
+ if ((node->type == XML_ELEMENT_NODE ||
44
+ node->type == XML_XINCLUDE_START ||
45
+ node->type == XML_XINCLUDE_END) &&
46
+ node->properties) {
47
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
48
+ remove_private(child);
49
+ }
50
+ }
51
+
52
+ node->_private = NULL;
53
+ }
54
+
55
+ static void
56
+ mark(void *data)
57
+ {
58
+ xmlDocPtr doc = (xmlDocPtr)data;
59
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
60
+ if (tuple) {
61
+ rb_gc_mark(tuple->doc);
62
+ rb_gc_mark(tuple->node_cache);
63
+ }
64
+ }
65
+
66
+ static void
67
+ dealloc(void *data)
68
+ {
69
+ xmlDocPtr doc = (xmlDocPtr)data;
70
+ st_table *node_hash;
71
+
72
+ node_hash = DOC_UNLINKED_NODE_HASH(doc);
73
+
74
+ st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
75
+ st_free_table(node_hash);
76
+
77
+ ruby_xfree(doc->_private);
78
+
79
+ #if defined(__GNUC__) && __GNUC__ >= 5
80
+ #pragma GCC diagnostic push
81
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
82
+ #endif
83
+ /*
84
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
85
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
86
+ * nokogiri, which will result in segfaults.
87
+ *
88
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
89
+ * before that callback gets invoked.
90
+ *
91
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
92
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
93
+ */
94
+ if (xmlDeregisterNodeDefaultValue) {
95
+ remove_private((xmlNodePtr)doc);
96
+ }
97
+ #if defined(__GNUC__) && __GNUC__ >= 5
98
+ #pragma GCC diagnostic pop
99
+ #endif
100
+
101
+ xmlFreeDoc(doc);
102
+ }
103
+
104
+ static size_t
105
+ memsize_node(const xmlNodePtr node)
106
+ {
107
+ /* note we don't count namespace definitions, just going for a good-enough number here */
108
+ xmlNodePtr child;
109
+ xmlAttrPtr property;
110
+ size_t memsize = 0;
111
+
112
+ memsize += (size_t)xmlStrlen(node->name);
113
+
114
+ if (node->type == XML_ELEMENT_NODE) {
115
+ for (property = node->properties; property; property = property->next) {
116
+ memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
117
+ }
118
+ }
119
+ if (node->type == XML_TEXT_NODE) {
120
+ memsize += (size_t)xmlStrlen(node->content);
121
+ }
122
+ for (child = node->children; child; child = child->next) {
123
+ memsize += sizeof(xmlNode) + memsize_node(child);
124
+ }
125
+ return memsize;
126
+ }
127
+
128
+ static size_t
129
+ memsize(const void *data)
130
+ {
131
+ xmlDocPtr doc = (const xmlDocPtr)data;
132
+ size_t memsize = sizeof(xmlDoc);
133
+ /* This may not account for all memory use */
134
+ memsize += memsize_node((xmlNodePtr)doc);
135
+ return memsize;
136
+ }
137
+
138
+ static const rb_data_type_t xml_doc_type = {
139
+ .wrap_struct_name = "xmlDoc",
140
+ .function = {
141
+ .dmark = mark,
142
+ .dfree = dealloc,
143
+ .dsize = memsize,
144
+ },
145
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
146
+ };
147
+
148
+ static VALUE
149
+ _xml_document_alloc(VALUE klass)
150
+ {
151
+ return TypedData_Wrap_Struct(klass, &xml_doc_type, NULL);
152
+ }
153
+
154
+ static void
155
+ _xml_document_data_ptr_set(VALUE rb_document, xmlDocPtr c_document)
156
+ {
157
+ nokogiriTuplePtr tuple;
158
+
159
+ assert(DATA_PTR(rb_document) == NULL);
160
+ assert(c_document->_private == NULL);
161
+
162
+ DATA_PTR(rb_document) = c_document;
163
+
164
+ tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
165
+ tuple->doc = rb_document;
166
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
167
+ tuple->node_cache = rb_ary_new();
168
+
169
+ c_document->_private = tuple ;
170
+
171
+ rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
172
+
173
+ return;
174
+ }
175
+
176
+ /* :nodoc: */
177
+ static VALUE
178
+ rb_xml_document_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level)
179
+ {
180
+ xmlDocPtr c_other, c_self;
181
+ int c_level;
182
+
183
+ c_other = noko_xml_document_unwrap(rb_other);
184
+ c_level = (int)NUM2INT(rb_level);
185
+
186
+ c_self = xmlCopyDoc(c_other, c_level);
187
+ if (c_self == NULL) { return Qnil; }
188
+
189
+ c_self->type = c_other->type;
190
+ _xml_document_data_ptr_set(rb_self, c_self);
191
+
192
+ return rb_self ;
193
+ }
194
+
195
+ static void
196
+ recursively_remove_namespaces_from_node(xmlNodePtr node)
197
+ {
198
+ xmlNodePtr child ;
199
+ xmlAttrPtr property ;
200
+
201
+ xmlSetNs(node, NULL);
202
+
203
+ for (child = node->children ; child ; child = child->next) {
204
+ recursively_remove_namespaces_from_node(child);
205
+ }
206
+
207
+ if (((node->type == XML_ELEMENT_NODE) ||
208
+ (node->type == XML_XINCLUDE_START) ||
209
+ (node->type == XML_XINCLUDE_END)) &&
210
+ node->nsDef) {
211
+ xmlNsPtr curr = node->nsDef;
212
+ while (curr) {
213
+ noko_xml_document_pin_namespace(curr, node->doc);
214
+ curr = curr->next;
215
+ }
216
+ node->nsDef = NULL;
217
+ }
218
+
219
+ if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
220
+ property = node->properties ;
221
+ while (property != NULL) {
222
+ if (property->ns) { property->ns = NULL ; }
223
+ property = property->next ;
224
+ }
225
+ }
226
+ }
227
+
228
+ /*
229
+ * call-seq:
230
+ * url
231
+ *
232
+ * Get the url name for this document.
233
+ */
234
+ static VALUE
235
+ url(VALUE self)
236
+ {
237
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
238
+
239
+ if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
240
+
241
+ return Qnil;
242
+ }
243
+
244
+ /*
245
+ * call-seq:
246
+ * root=
247
+ *
248
+ * Set the root element on this document
249
+ */
250
+ static VALUE
251
+ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
252
+ {
253
+ xmlDocPtr c_document;
254
+ xmlNodePtr c_new_root = NULL, c_current_root;
255
+
256
+ c_document = noko_xml_document_unwrap(self);
257
+
258
+ c_current_root = xmlDocGetRootElement(c_document);
259
+ if (c_current_root) {
260
+ xmlUnlinkNode(c_current_root);
261
+ noko_xml_document_pin_node(c_current_root);
262
+ }
263
+
264
+ if (!NIL_P(rb_new_root)) {
265
+ if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
266
+ rb_raise(rb_eArgError,
267
+ "expected Nokogiri::XML::Node but received %"PRIsVALUE,
268
+ rb_obj_class(rb_new_root));
269
+ }
270
+
271
+ Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
272
+
273
+ /* If the new root's document is not the same as the current document,
274
+ * then we need to dup the node in to this document. */
275
+ if (c_new_root->doc != c_document) {
276
+ c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
277
+ if (!c_new_root) {
278
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
279
+ }
280
+ }
281
+ }
282
+
283
+ xmlDocSetRootElement(c_document, c_new_root);
284
+
285
+ return rb_new_root;
286
+ }
287
+
288
+ /*
289
+ * call-seq:
290
+ * root
291
+ *
292
+ * Get the root node for this document.
293
+ */
294
+ static VALUE
295
+ rb_xml_document_root(VALUE self)
296
+ {
297
+ xmlDocPtr c_document;
298
+ xmlNodePtr c_root;
299
+
300
+ c_document = noko_xml_document_unwrap(self);
301
+
302
+ c_root = xmlDocGetRootElement(c_document);
303
+ if (!c_root) {
304
+ return Qnil;
305
+ }
306
+
307
+ return noko_xml_node_wrap(Qnil, c_root) ;
308
+ }
309
+
310
+ /*
311
+ * call-seq:
312
+ * encoding= encoding
313
+ *
314
+ * Set the encoding string for this Document
315
+ */
316
+ static VALUE
317
+ set_encoding(VALUE self, VALUE encoding)
318
+ {
319
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
320
+
321
+ if (doc->encoding) {
322
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
323
+ }
324
+
325
+ doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
326
+
327
+ return encoding;
328
+ }
329
+
330
+ /*
331
+ * call-seq:
332
+ * encoding
333
+ *
334
+ * Get the encoding for this Document
335
+ */
336
+ static VALUE
337
+ encoding(VALUE self)
338
+ {
339
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
340
+
341
+ if (!doc->encoding) { return Qnil; }
342
+ return NOKOGIRI_STR_NEW2(doc->encoding);
343
+ }
344
+
345
+ /*
346
+ * call-seq:
347
+ * version
348
+ *
349
+ * Get the XML version for this Document
350
+ */
351
+ static VALUE
352
+ version(VALUE self)
353
+ {
354
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
355
+
356
+ if (!doc->version) { return Qnil; }
357
+ return NOKOGIRI_STR_NEW2(doc->version);
358
+ }
359
+
360
+ /*
361
+ * call-seq:
362
+ * read_io(io, url, encoding, options)
363
+ *
364
+ * Create a new document from an IO object
365
+ */
366
+ static VALUE
367
+ noko_xml_document_s_read_io(VALUE rb_class,
368
+ VALUE rb_io,
369
+ VALUE rb_url,
370
+ VALUE rb_encoding,
371
+ VALUE rb_options)
372
+ {
373
+ /* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
374
+ private. */
375
+ libxmlStructuredErrorHandlerState handler_state;
376
+ VALUE rb_errors = rb_ary_new();
377
+
378
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
379
+
380
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
381
+ const char *c_enc = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
382
+ xmlDocPtr c_document = xmlReadIO(
383
+ (xmlInputReadCallback)noko_io_read,
384
+ (xmlInputCloseCallback)noko_io_close,
385
+ (void *)rb_io,
386
+ c_url,
387
+ c_enc,
388
+ (int)NUM2INT(rb_options)
389
+ );
390
+
391
+ noko__structured_error_func_restore(&handler_state);
392
+
393
+ if (c_document == NULL) {
394
+ xmlFreeDoc(c_document);
395
+
396
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
397
+ if (RB_TEST(exception)) {
398
+ rb_exc_raise(exception);
399
+ } else {
400
+ rb_raise(rb_eRuntimeError, "Could not parse document");
401
+ }
402
+ }
403
+
404
+ VALUE rb_document = noko_xml_document_wrap(rb_class, c_document);
405
+ rb_iv_set(rb_document, "@errors", rb_errors);
406
+ return rb_document;
407
+ }
408
+
409
+ /*
410
+ * call-seq:
411
+ * read_memory(string, url, encoding, options)
412
+ *
413
+ * Create a new document from a String
414
+ */
415
+ static VALUE
416
+ noko_xml_document_s_read_memory(VALUE rb_class,
417
+ VALUE rb_input,
418
+ VALUE rb_url,
419
+ VALUE rb_encoding,
420
+ VALUE rb_options)
421
+ {
422
+ /* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
423
+ private. */
424
+ VALUE rb_errors = rb_ary_new();
425
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
426
+
427
+ const char *c_buffer = StringValuePtr(rb_input);
428
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
429
+ const char *c_enc = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
430
+ int c_buffer_len = (int)RSTRING_LEN(rb_input);
431
+ xmlDocPtr c_document = xmlReadMemory(c_buffer, c_buffer_len, c_url, c_enc, (int)NUM2INT(rb_options));
432
+
433
+ xmlSetStructuredErrorFunc(NULL, NULL);
434
+
435
+ if (c_document == NULL) {
436
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
437
+ if (RB_TEST(exception)) {
438
+ rb_exc_raise(exception);
439
+ } else {
440
+ rb_raise(rb_eRuntimeError, "Could not parse document");
441
+ }
442
+ }
443
+
444
+ VALUE document = noko_xml_document_wrap(rb_class, c_document);
445
+ rb_iv_set(document, "@errors", rb_errors);
446
+ return document;
447
+ }
448
+
449
+ /*
450
+ * call-seq:
451
+ * new(version = "1.0")
452
+ *
453
+ * Create a new empty document declaring XML version +version+.
454
+ */
455
+ static VALUE
456
+ new (int argc, VALUE *argv, VALUE klass)
457
+ {
458
+ xmlDocPtr doc;
459
+ VALUE version, rest, rb_doc ;
460
+
461
+ rb_scan_args(argc, argv, "0*", &rest);
462
+ version = rb_ary_entry(rest, (long)0);
463
+ if (NIL_P(version)) { version = rb_str_new2("1.0"); }
464
+
465
+ doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
466
+ rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
467
+ return rb_doc ;
468
+ }
469
+
470
+ /*
471
+ * call-seq:
472
+ * remove_namespaces!
473
+ *
474
+ * Remove all namespaces from all nodes in the document.
475
+ *
476
+ * This could be useful for developers who either don't understand namespaces
477
+ * or don't care about them.
478
+ *
479
+ * The following example shows a use case, and you can decide for yourself
480
+ * whether this is a good thing or not:
481
+ *
482
+ * doc = Nokogiri::XML <<-EOXML
483
+ * <root>
484
+ * <car xmlns:part="http://general-motors.com/">
485
+ * <part:tire>Michelin Model XGV</part:tire>
486
+ * </car>
487
+ * <bicycle xmlns:part="http://schwinn.com/">
488
+ * <part:tire>I'm a bicycle tire!</part:tire>
489
+ * </bicycle>
490
+ * </root>
491
+ * EOXML
492
+ *
493
+ * doc.xpath("//tire").to_s # => ""
494
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
495
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
496
+ *
497
+ * doc.remove_namespaces!
498
+ *
499
+ * doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
500
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
501
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
502
+ *
503
+ * For more information on why this probably is *not* a good thing in general,
504
+ * please direct your browser to
505
+ * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
506
+ */
507
+ static VALUE
508
+ remove_namespaces_bang(VALUE self)
509
+ {
510
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
511
+
512
+ recursively_remove_namespaces_from_node((xmlNodePtr)doc);
513
+ return self;
514
+ }
515
+
516
+ /* call-seq:
517
+ * doc.create_entity(name, type, external_id, system_id, content)
518
+ *
519
+ * Create a new entity named +name+.
520
+ *
521
+ * +type+ is an integer representing the type of entity to be created, and it defaults to
522
+ * +Nokogiri::XML::EntityDecl::INTERNAL_GENERAL+. See the constants on Nokogiri::XML::EntityDecl for
523
+ * more information.
524
+ *
525
+ * +external_id+, +system_id+, and +content+ set the External ID, System ID,
526
+ * and content respectively. All of these parameters are optional.
527
+ */
528
+ static VALUE
529
+ noko_xml_document__create_entity(int argc, VALUE *argv, VALUE rb_document)
530
+ {
531
+ VALUE rb_name;
532
+ VALUE rb_type;
533
+ VALUE rb_ext_id;
534
+ VALUE rb_sys_id;
535
+ VALUE rb_content;
536
+
537
+ rb_scan_args(argc, argv, "14",
538
+ &rb_name,
539
+ &rb_type, &rb_ext_id, &rb_sys_id, &rb_content);
540
+
541
+ xmlDocPtr c_document = noko_xml_document_unwrap(rb_document);
542
+
543
+ libxmlStructuredErrorHandlerState handler_state;
544
+ VALUE rb_errors = rb_ary_new();
545
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
546
+
547
+ xmlEntityPtr c_entity = xmlAddDocEntity(
548
+ c_document,
549
+ (xmlChar *)(NIL_P(rb_name) ? NULL : StringValueCStr(rb_name)),
550
+ (int)(NIL_P(rb_type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(rb_type)),
551
+ (xmlChar *)(NIL_P(rb_ext_id) ? NULL : StringValueCStr(rb_ext_id)),
552
+ (xmlChar *)(NIL_P(rb_sys_id) ? NULL : StringValueCStr(rb_sys_id)),
553
+ (xmlChar *)(NIL_P(rb_content) ? NULL : StringValueCStr(rb_content))
554
+ );
555
+
556
+ noko__structured_error_func_restore(&handler_state);
557
+
558
+ if (NULL == c_entity) {
559
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
560
+ if (RB_TEST(exception)) {
561
+ rb_exc_raise(exception);
562
+ } else {
563
+ rb_raise(rb_eRuntimeError, "Could not create entity");
564
+ }
565
+ }
566
+
567
+ return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)c_entity);
568
+ }
569
+
570
+ static int
571
+ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
572
+ {
573
+ VALUE block = (VALUE)ctx;
574
+ VALUE rb_node;
575
+ VALUE rb_parent_node;
576
+ VALUE ret;
577
+
578
+ if (c_node->type == XML_NAMESPACE_DECL) {
579
+ rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
580
+ } else {
581
+ rb_node = noko_xml_node_wrap(Qnil, c_node);
582
+ }
583
+ rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
584
+
585
+ ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
586
+
587
+ return (Qfalse == ret || Qnil == ret) ? 0 : 1;
588
+ }
589
+
590
+ /* call-seq:
591
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
592
+ * doc.canonicalize { |obj, parent| ... }
593
+ *
594
+ * Canonicalize a document and return the results. Takes an optional block
595
+ * that takes two parameters: the +obj+ and that node's +parent+.
596
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
597
+ * The block must return a non-nil, non-false value if the +obj+ passed in
598
+ * should be included in the canonicalized document.
599
+ */
600
+ static VALUE
601
+ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
602
+ {
603
+ VALUE rb_mode;
604
+ VALUE rb_namespaces;
605
+ VALUE rb_comments_p;
606
+ int c_mode = 0;
607
+ xmlChar **c_namespaces;
608
+
609
+ xmlDocPtr c_doc;
610
+ xmlOutputBufferPtr c_obuf;
611
+ xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
612
+ void *rb_callback = NULL;
613
+
614
+ VALUE rb_cStringIO;
615
+ VALUE rb_io;
616
+
617
+ rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
618
+ if (!NIL_P(rb_mode)) {
619
+ Check_Type(rb_mode, T_FIXNUM);
620
+ c_mode = NUM2INT(rb_mode);
621
+ }
622
+ if (!NIL_P(rb_namespaces)) {
623
+ Check_Type(rb_namespaces, T_ARRAY);
624
+ if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
625
+ rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
626
+ }
627
+ }
628
+
629
+ c_doc = noko_xml_document_unwrap(self);
630
+
631
+ rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
632
+ rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
633
+ c_obuf = xmlAllocOutputBuffer(NULL);
634
+
635
+ c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
636
+ c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
637
+ c_obuf->context = (void *)rb_io;
638
+
639
+ if (rb_block_given_p()) {
640
+ c_callback_wrapper = block_caller;
641
+ rb_callback = (void *)rb_block_proc();
642
+ }
643
+
644
+ if (NIL_P(rb_namespaces)) {
645
+ c_namespaces = NULL;
646
+ } else {
647
+ long ns_len = RARRAY_LEN(rb_namespaces);
648
+ c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
649
+ for (int j = 0 ; j < ns_len ; j++) {
650
+ VALUE entry = rb_ary_entry(rb_namespaces, j);
651
+ c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
652
+ }
653
+ }
654
+
655
+ xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
656
+ c_mode,
657
+ c_namespaces,
658
+ (int)RTEST(rb_comments_p),
659
+ c_obuf);
660
+
661
+ ruby_xfree(c_namespaces);
662
+ xmlOutputBufferClose(c_obuf);
663
+
664
+ return rb_funcall(rb_io, rb_intern("string"), 0);
665
+ }
666
+
667
+ VALUE
668
+ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
669
+ {
670
+ VALUE rb_document;
671
+
672
+ if (!klass) {
673
+ klass = cNokogiriXmlDocument;
674
+ }
675
+
676
+ rb_document = _xml_document_alloc(klass);
677
+ _xml_document_data_ptr_set(rb_document, c_document);
678
+
679
+ rb_iv_set(rb_document, "@decorators", Qnil);
680
+ rb_iv_set(rb_document, "@errors", Qnil);
681
+
682
+ rb_obj_call_init(rb_document, argc, argv);
683
+
684
+ return rb_document ;
685
+ }
686
+
687
+
688
+ /* deprecated. use noko_xml_document_wrap() instead. */
689
+ VALUE
690
+ Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
691
+ {
692
+ /* TODO: deprecate this method in v2.0 */
693
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
694
+ }
695
+
696
+ VALUE
697
+ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
698
+ {
699
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
700
+ }
701
+
702
+ xmlDocPtr
703
+ noko_xml_document_unwrap(VALUE rb_document)
704
+ {
705
+ xmlDocPtr c_document;
706
+ TypedData_Get_Struct(rb_document, xmlDoc, &xml_doc_type, c_document);
707
+ return c_document;
708
+ }
709
+
710
+ /* Schema creation will remove and deallocate "blank" nodes.
711
+ * If those blank nodes have been exposed to Ruby, they could get freed
712
+ * out from under the VALUE pointer. This function checks to see if any of
713
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
714
+ */
715
+ int
716
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
717
+ {
718
+ VALUE cache = DOC_NODE_CACHE(c_document);
719
+
720
+ if (NIL_P(cache)) {
721
+ return 0;
722
+ }
723
+
724
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
725
+ xmlNodePtr node;
726
+ VALUE element = rb_ary_entry(cache, jnode);
727
+
728
+ Noko_Node_Get_Struct(element, xmlNode, node);
729
+ if (xmlIsBlankNode(node)) {
730
+ return 1;
731
+ }
732
+ }
733
+
734
+ return 0;
735
+ }
736
+
737
+ void
738
+ noko_xml_document_pin_node(xmlNodePtr node)
739
+ {
740
+ xmlDocPtr doc;
741
+ nokogiriTuplePtr tuple;
742
+
743
+ doc = node->doc;
744
+ tuple = (nokogiriTuplePtr)doc->_private;
745
+ st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
746
+ }
747
+
748
+
749
+ void
750
+ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
751
+ {
752
+ nokogiriTuplePtr tuple;
753
+
754
+ tuple = (nokogiriTuplePtr)doc->_private;
755
+ st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
756
+ }
757
+
758
+
759
+ void
760
+ noko_init_xml_document(void)
761
+ {
762
+ assert(cNokogiriXmlNode);
763
+
764
+ cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
765
+
766
+ rb_define_alloc_func(cNokogiriXmlDocument, _xml_document_alloc);
767
+
768
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", noko_xml_document_s_read_memory, 4);
769
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_io", noko_xml_document_s_read_io, 4);
770
+ rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
771
+
772
+ rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
773
+ rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
774
+ rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
775
+ rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
776
+ rb_define_method(cNokogiriXmlDocument, "version", version, 0);
777
+ rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
778
+ rb_define_method(cNokogiriXmlDocument, "url", url, 0);
779
+ rb_define_method(cNokogiriXmlDocument, "create_entity", noko_xml_document__create_entity, -1);
780
+ rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
781
+
782
+ rb_define_protected_method(cNokogiriXmlDocument, "initialize_copy_with_args", rb_xml_document_initialize_copy_with_args,
783
+ 2);
784
+ }