nokogiri 1.14.0.rc1-arm-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1082 -0
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +114 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  17. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  18. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  19. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  21. data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
  23. data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
  24. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  25. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  26. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  27. data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
  28. data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
  29. data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
  30. data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
  31. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  32. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  33. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  35. data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
  37. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  38. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
  39. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  41. data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
  42. data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
  43. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  44. data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
  45. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
  65. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  66. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  67. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  68. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  69. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  70. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  71. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  72. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  73. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  74. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  75. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  76. data/ext/nokogiri/include/libxslt/security.h +104 -0
  77. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  78. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  79. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  80. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  81. data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
  82. data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
  83. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  84. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  85. data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
  86. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  87. data/ext/nokogiri/nokogiri.c +259 -0
  88. data/ext/nokogiri/nokogiri.h +235 -0
  89. data/ext/nokogiri/test_global_handlers.c +40 -0
  90. data/ext/nokogiri/xml_attr.c +103 -0
  91. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  92. data/ext/nokogiri/xml_cdata.c +57 -0
  93. data/ext/nokogiri/xml_comment.c +62 -0
  94. data/ext/nokogiri/xml_document.c +689 -0
  95. data/ext/nokogiri/xml_document_fragment.c +44 -0
  96. data/ext/nokogiri/xml_dtd.c +208 -0
  97. data/ext/nokogiri/xml_element_content.c +128 -0
  98. data/ext/nokogiri/xml_element_decl.c +69 -0
  99. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  100. data/ext/nokogiri/xml_entity_decl.c +112 -0
  101. data/ext/nokogiri/xml_entity_reference.c +50 -0
  102. data/ext/nokogiri/xml_namespace.c +186 -0
  103. data/ext/nokogiri/xml_node.c +2425 -0
  104. data/ext/nokogiri/xml_node_set.c +496 -0
  105. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  106. data/ext/nokogiri/xml_reader.c +794 -0
  107. data/ext/nokogiri/xml_relax_ng.c +183 -0
  108. data/ext/nokogiri/xml_sax_parser.c +316 -0
  109. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  110. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  111. data/ext/nokogiri/xml_schema.c +282 -0
  112. data/ext/nokogiri/xml_syntax_error.c +85 -0
  113. data/ext/nokogiri/xml_text.c +48 -0
  114. data/ext/nokogiri/xml_xpath_context.c +413 -0
  115. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  116. data/gumbo-parser/CHANGES.md +63 -0
  117. data/gumbo-parser/Makefile +111 -0
  118. data/gumbo-parser/THANKS +27 -0
  119. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  120. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  123. data/lib/nokogiri/class_resolver.rb +67 -0
  124. data/lib/nokogiri/css/node.rb +54 -0
  125. data/lib/nokogiri/css/parser.rb +770 -0
  126. data/lib/nokogiri/css/parser.y +277 -0
  127. data/lib/nokogiri/css/parser_extras.rb +96 -0
  128. data/lib/nokogiri/css/syntax_error.rb +9 -0
  129. data/lib/nokogiri/css/tokenizer.rb +155 -0
  130. data/lib/nokogiri/css/tokenizer.rex +56 -0
  131. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  132. data/lib/nokogiri/css.rb +66 -0
  133. data/lib/nokogiri/decorators/slop.rb +44 -0
  134. data/lib/nokogiri/encoding_handler.rb +57 -0
  135. data/lib/nokogiri/extension.rb +32 -0
  136. data/lib/nokogiri/gumbo.rb +15 -0
  137. data/lib/nokogiri/html.rb +48 -0
  138. data/lib/nokogiri/html4/builder.rb +37 -0
  139. data/lib/nokogiri/html4/document.rb +214 -0
  140. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  141. data/lib/nokogiri/html4/element_description.rb +25 -0
  142. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  143. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  144. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  145. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  146. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  147. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  148. data/lib/nokogiri/html4.rb +47 -0
  149. data/lib/nokogiri/html5/document.rb +168 -0
  150. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  151. data/lib/nokogiri/html5/node.rb +98 -0
  152. data/lib/nokogiri/html5.rb +389 -0
  153. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  154. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  155. data/lib/nokogiri/syntax_error.rb +6 -0
  156. data/lib/nokogiri/version/constant.rb +6 -0
  157. data/lib/nokogiri/version/info.rb +223 -0
  158. data/lib/nokogiri/version.rb +4 -0
  159. data/lib/nokogiri/xml/attr.rb +66 -0
  160. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  161. data/lib/nokogiri/xml/builder.rb +487 -0
  162. data/lib/nokogiri/xml/cdata.rb +13 -0
  163. data/lib/nokogiri/xml/character_data.rb +9 -0
  164. data/lib/nokogiri/xml/document.rb +471 -0
  165. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  166. data/lib/nokogiri/xml/dtd.rb +34 -0
  167. data/lib/nokogiri/xml/element_content.rb +38 -0
  168. data/lib/nokogiri/xml/element_decl.rb +15 -0
  169. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  170. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  171. data/lib/nokogiri/xml/namespace.rb +58 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  173. data/lib/nokogiri/xml/node.rb +1563 -0
  174. data/lib/nokogiri/xml/node_set.rb +446 -0
  175. data/lib/nokogiri/xml/notation.rb +19 -0
  176. data/lib/nokogiri/xml/parse_options.rb +213 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  178. data/lib/nokogiri/xml/pp/node.rb +57 -0
  179. data/lib/nokogiri/xml/pp.rb +4 -0
  180. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  181. data/lib/nokogiri/xml/reader.rb +105 -0
  182. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  183. data/lib/nokogiri/xml/sax/document.rb +167 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  187. data/lib/nokogiri/xml/sax.rb +6 -0
  188. data/lib/nokogiri/xml/schema.rb +73 -0
  189. data/lib/nokogiri/xml/searchable.rb +270 -0
  190. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  191. data/lib/nokogiri/xml/text.rb +11 -0
  192. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  193. data/lib/nokogiri/xml/xpath.rb +21 -0
  194. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  195. data/lib/nokogiri/xml.rb +76 -0
  196. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  197. data/lib/nokogiri/xslt.rb +65 -0
  198. data/lib/nokogiri.rb +120 -0
  199. data/lib/xsd/xmlparser/nokogiri.rb +104 -0
  200. metadata +317 -0
@@ -0,0 +1,2425 @@
1
+ #include <nokogiri.h>
2
+
3
+ #include <stdbool.h>
4
+
5
+ // :stopdoc:
6
+
7
+ VALUE cNokogiriXmlNode ;
8
+ static ID id_decorate, id_decorate_bang;
9
+
10
+ typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
11
+
12
+ static void
13
+ _xml_node_mark(void *ptr)
14
+ {
15
+ xmlNodePtr node = ptr;
16
+
17
+ if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
18
+ return;
19
+ }
20
+
21
+ xmlDocPtr doc = node->doc;
22
+ if (doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
23
+ if (DOC_RUBY_OBJECT_TEST(doc)) {
24
+ rb_gc_mark(DOC_RUBY_OBJECT(doc));
25
+ }
26
+ } else if (node->doc->_private) {
27
+ rb_gc_mark((VALUE)doc->_private);
28
+ }
29
+ }
30
+
31
+ #ifdef HAVE_RB_GC_LOCATION
32
+ static void
33
+ _xml_node_update_references(void *ptr)
34
+ {
35
+ xmlNodePtr node = ptr;
36
+
37
+ if (node->_private) {
38
+ node->_private = (void *)rb_gc_location((VALUE)node->_private);
39
+ }
40
+ }
41
+ #else
42
+ # define _xml_node_update_references 0
43
+ #endif
44
+
45
+ static const rb_data_type_t nokogiri_node_type = {
46
+ "Nokogiri/XMLNode",
47
+ {_xml_node_mark, 0, 0, _xml_node_update_references},
48
+ 0, 0,
49
+ #ifdef RUBY_TYPED_FREE_IMMEDIATELY
50
+ RUBY_TYPED_FREE_IMMEDIATELY,
51
+ #endif
52
+ };
53
+
54
+ static void
55
+ relink_namespace(xmlNodePtr reparented)
56
+ {
57
+ xmlNodePtr child;
58
+ xmlAttrPtr attr;
59
+
60
+ if (reparented->type != XML_ATTRIBUTE_NODE &&
61
+ reparented->type != XML_ELEMENT_NODE) { return; }
62
+
63
+ if (reparented->ns == NULL || reparented->ns->prefix == NULL) {
64
+ xmlNsPtr ns = NULL;
65
+ xmlChar *name = NULL, *prefix = NULL;
66
+
67
+ name = xmlSplitQName2(reparented->name, &prefix);
68
+
69
+ if (reparented->type == XML_ATTRIBUTE_NODE) {
70
+ if (prefix == NULL || strcmp((char *)prefix, XMLNS_PREFIX) == 0) {
71
+ xmlFree(name);
72
+ xmlFree(prefix);
73
+ return;
74
+ }
75
+ }
76
+
77
+ ns = xmlSearchNs(reparented->doc, reparented, prefix);
78
+
79
+ if (ns != NULL) {
80
+ xmlNodeSetName(reparented, name);
81
+ xmlSetNs(reparented, ns);
82
+ }
83
+
84
+ xmlFree(name);
85
+ xmlFree(prefix);
86
+ }
87
+
88
+ /* Avoid segv when relinking against unlinked nodes. */
89
+ if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; }
90
+
91
+ /* Make sure that our reparented node has the correct namespaces */
92
+ if (!reparented->ns &&
93
+ (reparented->doc != (xmlDocPtr)reparented->parent) &&
94
+ (rb_iv_get(DOC_RUBY_OBJECT(reparented->doc), "@namespace_inheritance") == Qtrue)) {
95
+ xmlSetNs(reparented, reparented->parent->ns);
96
+ }
97
+
98
+ /* Search our parents for an existing definition */
99
+ if (reparented->nsDef) {
100
+ xmlNsPtr curr = reparented->nsDef;
101
+ xmlNsPtr prev = NULL;
102
+
103
+ while (curr) {
104
+ xmlNsPtr ns = xmlSearchNsByHref(
105
+ reparented->doc,
106
+ reparented->parent,
107
+ curr->href
108
+ );
109
+ /* If we find the namespace is already declared, remove it from this
110
+ * definition list. */
111
+ if (ns && ns != curr && xmlStrEqual(ns->prefix, curr->prefix)) {
112
+ if (prev) {
113
+ prev->next = curr->next;
114
+ } else {
115
+ reparented->nsDef = curr->next;
116
+ }
117
+ noko_xml_document_pin_namespace(curr, reparented->doc);
118
+ } else {
119
+ prev = curr;
120
+ }
121
+ curr = curr->next;
122
+ }
123
+ }
124
+
125
+ /*
126
+ * Search our parents for an existing definition of current namespace,
127
+ * because the definition it's pointing to may have just been removed nsDef.
128
+ *
129
+ * And although that would technically probably be OK, I'd feel better if we
130
+ * referred to a namespace that's still present in a node's nsDef somewhere
131
+ * in the doc.
132
+ */
133
+ if (reparented->ns) {
134
+ xmlNsPtr ns = xmlSearchNs(reparented->doc, reparented, reparented->ns->prefix);
135
+ if (ns
136
+ && ns != reparented->ns
137
+ && xmlStrEqual(ns->prefix, reparented->ns->prefix)
138
+ && xmlStrEqual(ns->href, reparented->ns->href)
139
+ ) {
140
+ xmlSetNs(reparented, ns);
141
+ }
142
+ }
143
+
144
+ /* Only walk all children if there actually is a namespace we need to */
145
+ /* reparent. */
146
+ if (NULL == reparented->ns) { return; }
147
+
148
+ /* When a node gets reparented, walk it's children to make sure that */
149
+ /* their namespaces are reparented as well. */
150
+ child = reparented->children;
151
+ while (NULL != child) {
152
+ relink_namespace(child);
153
+ child = child->next;
154
+ }
155
+
156
+ if (reparented->type == XML_ELEMENT_NODE) {
157
+ attr = reparented->properties;
158
+ while (NULL != attr) {
159
+ relink_namespace((xmlNodePtr)attr);
160
+ attr = attr->next;
161
+ }
162
+ }
163
+ }
164
+
165
+
166
+ /* internal function meant to wrap xmlReplaceNode
167
+ and fix some issues we have with libxml2 merging nodes */
168
+ static xmlNodePtr
169
+ xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
170
+ {
171
+ xmlNodePtr retval ;
172
+
173
+ retval = xmlReplaceNode(pivot, new_node) ;
174
+
175
+ if (retval == pivot) {
176
+ retval = new_node ; /* return semantics for reparent_node_with */
177
+ }
178
+
179
+ /* work around libxml2 issue: https://bugzilla.gnome.org/show_bug.cgi?id=615612 */
180
+ if (retval && retval->type == XML_TEXT_NODE) {
181
+ if (retval->prev && retval->prev->type == XML_TEXT_NODE) {
182
+ retval = xmlTextMerge(retval->prev, retval);
183
+ }
184
+ if (retval->next && retval->next->type == XML_TEXT_NODE) {
185
+ retval = xmlTextMerge(retval, retval->next);
186
+ }
187
+ }
188
+
189
+ return retval ;
190
+ }
191
+
192
+
193
+ static void
194
+ raise_if_ancestor_of_self(xmlNodePtr self)
195
+ {
196
+ for (xmlNodePtr ancestor = self->parent ; ancestor ; ancestor = ancestor->parent) {
197
+ if (self == ancestor) {
198
+ rb_raise(rb_eRuntimeError, "cycle detected: node '%s' is an ancestor of itself", self->name);
199
+ }
200
+ }
201
+ }
202
+
203
+
204
+ static VALUE
205
+ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
206
+ {
207
+ VALUE reparented_obj ;
208
+ xmlNodePtr reparentee, original_reparentee, pivot, reparented, next_text, new_next_text, parent ;
209
+ int original_ns_prefix_is_default = 0 ;
210
+
211
+ if (!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) {
212
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
213
+ }
214
+ if (rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument)) {
215
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
216
+ }
217
+
218
+ Noko_Node_Get_Struct(reparentee_obj, xmlNode, reparentee);
219
+ Noko_Node_Get_Struct(pivot_obj, xmlNode, pivot);
220
+
221
+ /*
222
+ * Check if nodes given are appropriate to have a parent-child
223
+ * relationship, based on the DOM specification.
224
+ *
225
+ * cf. http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/core.html#ID-1590626202
226
+ */
227
+ if (prf == xmlAddChild) {
228
+ parent = pivot;
229
+ } else {
230
+ parent = pivot->parent;
231
+ }
232
+
233
+ if (parent) {
234
+ switch (parent->type) {
235
+ case XML_DOCUMENT_NODE:
236
+ case XML_HTML_DOCUMENT_NODE:
237
+ switch (reparentee->type) {
238
+ case XML_ELEMENT_NODE:
239
+ case XML_PI_NODE:
240
+ case XML_COMMENT_NODE:
241
+ case XML_DOCUMENT_TYPE_NODE:
242
+ /*
243
+ * The DOM specification says no to adding text-like nodes
244
+ * directly to a document, but we allow it for compatibility.
245
+ */
246
+ case XML_TEXT_NODE:
247
+ case XML_CDATA_SECTION_NODE:
248
+ case XML_ENTITY_REF_NODE:
249
+ goto ok;
250
+ default:
251
+ break;
252
+ }
253
+ break;
254
+ case XML_DOCUMENT_FRAG_NODE:
255
+ case XML_ENTITY_REF_NODE:
256
+ case XML_ELEMENT_NODE:
257
+ switch (reparentee->type) {
258
+ case XML_ELEMENT_NODE:
259
+ case XML_PI_NODE:
260
+ case XML_COMMENT_NODE:
261
+ case XML_TEXT_NODE:
262
+ case XML_CDATA_SECTION_NODE:
263
+ case XML_ENTITY_REF_NODE:
264
+ goto ok;
265
+ default:
266
+ break;
267
+ }
268
+ break;
269
+ case XML_ATTRIBUTE_NODE:
270
+ switch (reparentee->type) {
271
+ case XML_TEXT_NODE:
272
+ case XML_ENTITY_REF_NODE:
273
+ goto ok;
274
+ default:
275
+ break;
276
+ }
277
+ break;
278
+ case XML_TEXT_NODE:
279
+ /*
280
+ * xmlAddChild() breaks the DOM specification in that it allows
281
+ * adding a text node to another, in which case text nodes are
282
+ * coalesced, but since our JRuby version does not support such
283
+ * operation, we should inhibit it.
284
+ */
285
+ break;
286
+ default:
287
+ break;
288
+ }
289
+
290
+ rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj));
291
+ }
292
+
293
+ ok:
294
+ original_reparentee = reparentee;
295
+
296
+ if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
297
+ /*
298
+ * if the reparentee is a text node, there's a very good chance it will be
299
+ * merged with an adjacent text node after being reparented, and in that case
300
+ * libxml will free the underlying C struct.
301
+ *
302
+ * since we clearly have a ruby object which references the underlying
303
+ * memory, we can't let the C struct get freed. let's pickle the original
304
+ * reparentee by rooting it; and then we'll reparent a duplicate of the
305
+ * node that we don't care about preserving.
306
+ *
307
+ * alternatively, if the reparentee is from a different document than the
308
+ * pivot node, libxml2 is going to get confused about which document's
309
+ * "dictionary" the node's strings belong to (this is an otherwise
310
+ * uninteresting libxml2 implementation detail). as a result, we cannot
311
+ * reparent the actual reparentee, so we reparent a duplicate.
312
+ */
313
+ if (reparentee->type == XML_TEXT_NODE && reparentee->_private) {
314
+ /*
315
+ * additionally, since we know this C struct isn't going to be related to
316
+ * a Ruby object anymore, let's break the relationship on this end as
317
+ * well.
318
+ *
319
+ * this is not absolutely necessary unless libxml-ruby is also in effect,
320
+ * in which case its global callback `rxml_node_deregisterNode` will try
321
+ * to do things to our data.
322
+ *
323
+ * for more details on this particular (and particularly nasty) edge
324
+ * case, see:
325
+ *
326
+ * https://github.com/sparklemotion/nokogiri/issues/1426
327
+ */
328
+ reparentee->_private = NULL ;
329
+ }
330
+
331
+ if (reparentee->ns != NULL && reparentee->ns->prefix == NULL) {
332
+ original_ns_prefix_is_default = 1;
333
+ }
334
+
335
+ noko_xml_document_pin_node(reparentee);
336
+
337
+ if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
338
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
339
+ }
340
+
341
+ if (original_ns_prefix_is_default && reparentee->ns != NULL && reparentee->ns->prefix != NULL) {
342
+ /*
343
+ * issue #391, where new node's prefix may become the string "default"
344
+ * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior.
345
+ */
346
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(reparentee->ns->prefix));
347
+ reparentee->ns->prefix = NULL;
348
+ }
349
+ }
350
+
351
+ xmlUnlinkNode(original_reparentee);
352
+
353
+ if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
354
+ && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
355
+ /*
356
+ * libxml merges text nodes in a right-to-left fashion, meaning that if
357
+ * there are two text nodes who would be adjacent, the right (or following,
358
+ * or next) node will be merged into the left (or preceding, or previous)
359
+ * node.
360
+ *
361
+ * and by "merged" I mean the string contents will be concatenated onto the
362
+ * left node's contents, and then the node will be freed.
363
+ *
364
+ * which means that if we have a ruby object wrapped around the right node,
365
+ * its memory would be freed out from under it.
366
+ *
367
+ * so, we detect this edge case and unlink-and-root the text node before it gets
368
+ * merged. then we dup the node and insert that duplicate back into the
369
+ * document where the real node was.
370
+ *
371
+ * yes, this is totally lame.
372
+ */
373
+ next_text = pivot->next ;
374
+ new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;
375
+
376
+ xmlUnlinkNode(next_text);
377
+ noko_xml_document_pin_node(next_text);
378
+
379
+ xmlAddNextSibling(pivot, new_next_text);
380
+ }
381
+
382
+ if (!(reparented = (*prf)(pivot, reparentee))) {
383
+ rb_raise(rb_eRuntimeError, "Could not reparent node");
384
+ }
385
+
386
+ /*
387
+ * make sure the ruby object is pointed at the just-reparented node, which
388
+ * might be a duplicate (see above) or might be the result of merging
389
+ * adjacent text nodes.
390
+ */
391
+ DATA_PTR(reparentee_obj) = reparented ;
392
+ reparented_obj = noko_xml_node_wrap(Qnil, reparented);
393
+
394
+ rb_funcall(reparented_obj, id_decorate_bang, 0);
395
+
396
+ /* if we've created a cycle, raise an exception */
397
+ raise_if_ancestor_of_self(reparented);
398
+
399
+ relink_namespace(reparented);
400
+
401
+ return reparented_obj ;
402
+ }
403
+
404
+ // :startdoc:
405
+
406
+ /*
407
+ * :call-seq:
408
+ * add_namespace_definition(prefix, href) → Nokogiri::XML::Namespace
409
+ * add_namespace(prefix, href) → Nokogiri::XML::Namespace
410
+ *
411
+ * :category: Manipulating Document Structure
412
+ *
413
+ * Adds a namespace definition to this node with +prefix+ using +href+ value, as if this node had
414
+ * included an attribute "xmlns:prefix=href".
415
+ *
416
+ * A default namespace definition for this node can be added by passing +nil+ for +prefix+.
417
+ *
418
+ * [Parameters]
419
+ * - +prefix+ (String, +nil+) An {XML Name}[https://www.w3.org/TR/xml-names/#ns-decl]
420
+ * - +href+ (String) The {URI reference}[https://www.w3.org/TR/xml-names/#sec-namespaces]
421
+ *
422
+ * [Returns] The new Nokogiri::XML::Namespace
423
+ *
424
+ * *Example:* adding a non-default namespace definition
425
+ *
426
+ * doc = Nokogiri::XML("<store><inventory></inventory></store>")
427
+ * inventory = doc.at_css("inventory")
428
+ * inventory.add_namespace_definition("automobile", "http://alices-autos.com/")
429
+ * inventory.add_namespace_definition("bicycle", "http://bobs-bikes.com/")
430
+ * inventory.add_child("<automobile:tire>Michelin model XGV, size 75R</automobile:tire>")
431
+ * doc.to_xml
432
+ * # => "<?xml version=\"1.0\"?>\n" +
433
+ * # "<store>\n" +
434
+ * # " <inventory xmlns:automobile=\"http://alices-autos.com/\" xmlns:bicycle=\"http://bobs-bikes.com/\">\n" +
435
+ * # " <automobile:tire>Michelin model XGV, size 75R</automobile:tire>\n" +
436
+ * # " </inventory>\n" +
437
+ * # "</store>\n"
438
+ *
439
+ * *Example:* adding a default namespace definition
440
+ *
441
+ * doc = Nokogiri::XML("<store><inventory><tire>Michelin model XGV, size 75R</tire></inventory></store>")
442
+ * doc.at_css("tire").add_namespace_definition(nil, "http://bobs-bikes.com/")
443
+ * doc.to_xml
444
+ * # => "<?xml version=\"1.0\"?>\n" +
445
+ * # "<store>\n" +
446
+ * # " <inventory>\n" +
447
+ * # " <tire xmlns=\"http://bobs-bikes.com/\">Michelin model XGV, size 75R</tire>\n" +
448
+ * # " </inventory>\n" +
449
+ * # "</store>\n"
450
+ *
451
+ */
452
+ static VALUE
453
+ rb_xml_node_add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_href)
454
+ {
455
+ xmlNodePtr c_node, element;
456
+ xmlNsPtr c_namespace;
457
+ const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix));
458
+
459
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
460
+ element = c_node ;
461
+
462
+ c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix);
463
+
464
+ if (!c_namespace) {
465
+ if (c_node->type != XML_ELEMENT_NODE) {
466
+ element = c_node->parent;
467
+ }
468
+ c_namespace = xmlNewNs(element, (const xmlChar *)StringValueCStr(rb_href), c_prefix);
469
+ }
470
+
471
+ if (!c_namespace) {
472
+ return Qnil ;
473
+ }
474
+
475
+ if (NIL_P(rb_prefix) || c_node != element) {
476
+ xmlSetNs(c_node, c_namespace);
477
+ }
478
+
479
+ return noko_xml_namespace_wrap(c_namespace, c_node->doc);
480
+ }
481
+
482
+
483
+ /*
484
+ * :call-seq: attribute(name) → Nokogiri::XML::Attr
485
+ *
486
+ * :category: Working With Node Attributes
487
+ *
488
+ * [Returns] Attribute (Nokogiri::XML::Attr) belonging to this node with name +name+.
489
+ *
490
+ * ⚠ Note that attribute namespaces are ignored and only the simple (non-namespace-prefixed) name is
491
+ * used to find a matching attribute. In case of a simple name collision, only one of the matching
492
+ * attributes will be returned. In this case, you will need to use #attribute_with_ns.
493
+ *
494
+ * *Example:*
495
+ *
496
+ * doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
497
+ * child = doc.at_css("child")
498
+ * child.attribute("size") # => #<Nokogiri::XML::Attr:0x550 name="size" value="large">
499
+ * child.attribute("class") # => #<Nokogiri::XML::Attr:0x564 name="class" value="big wide tall">
500
+ *
501
+ * *Example* showing that namespaced attributes will not be returned:
502
+ *
503
+ * ⚠ Note that only one of the two matching attributes is returned.
504
+ *
505
+ * doc = Nokogiri::XML(<<~EOF)
506
+ * <root xmlns:width='http://example.com/widths'
507
+ * xmlns:height='http://example.com/heights'>
508
+ * <child width:size='broad' height:size='tall'/>
509
+ * </root>
510
+ * EOF
511
+ * doc.at_css("child").attribute("size")
512
+ * # => #(Attr:0x550 {
513
+ * # name = "size",
514
+ * # namespace = #(Namespace:0x564 {
515
+ * # prefix = "width",
516
+ * # href = "http://example.com/widths"
517
+ * # }),
518
+ * # value = "broad"
519
+ * # })
520
+ */
521
+ static VALUE
522
+ rb_xml_node_attribute(VALUE self, VALUE name)
523
+ {
524
+ xmlNodePtr node;
525
+ xmlAttrPtr prop;
526
+ Noko_Node_Get_Struct(self, xmlNode, node);
527
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name));
528
+
529
+ if (! prop) { return Qnil; }
530
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
531
+ }
532
+
533
+
534
+ /*
535
+ * :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
536
+ *
537
+ * :category: Working With Node Attributes
538
+ *
539
+ * [Returns] Attributes (an Array of Nokogiri::XML::Attr) belonging to this node.
540
+ *
541
+ * Note that this is the preferred alternative to #attributes when the simple
542
+ * (non-namespace-prefixed) attribute names may collide.
543
+ *
544
+ * *Example:*
545
+ *
546
+ * Contrast this with the colliding-name example from #attributes.
547
+ *
548
+ * doc = Nokogiri::XML(<<~EOF)
549
+ * <root xmlns:width='http://example.com/widths'
550
+ * xmlns:height='http://example.com/heights'>
551
+ * <child width:size='broad' height:size='tall'/>
552
+ * </root>
553
+ * EOF
554
+ * doc.at_css("child").attribute_nodes
555
+ * # => [#(Attr:0x550 {
556
+ * # name = "size",
557
+ * # namespace = #(Namespace:0x564 {
558
+ * # prefix = "width",
559
+ * # href = "http://example.com/widths"
560
+ * # }),
561
+ * # value = "broad"
562
+ * # }),
563
+ * # #(Attr:0x578 {
564
+ * # name = "size",
565
+ * # namespace = #(Namespace:0x58c {
566
+ * # prefix = "height",
567
+ * # href = "http://example.com/heights"
568
+ * # }),
569
+ * # value = "tall"
570
+ * # })]
571
+ */
572
+ static VALUE
573
+ rb_xml_node_attribute_nodes(VALUE rb_node)
574
+ {
575
+ xmlNodePtr c_node;
576
+
577
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
578
+
579
+ return noko_xml_node_attrs(c_node);
580
+ }
581
+
582
+
583
+ /*
584
+ * :call-seq: attribute_with_ns(name, namespace) → Nokogiri::XML::Attr
585
+ *
586
+ * :category: Working With Node Attributes
587
+ *
588
+ * [Returns]
589
+ * Attribute (Nokogiri::XML::Attr) belonging to this node with matching +name+ and +namespace+.
590
+ *
591
+ * [Parameters]
592
+ * - +name+ (String): the simple (non-namespace-prefixed) name of the attribute
593
+ * - +namespace+ (String): the URI of the attribute's namespace
594
+ *
595
+ * See related: #attribute
596
+ *
597
+ * *Example:*
598
+ *
599
+ * doc = Nokogiri::XML(<<~EOF)
600
+ * <root xmlns:width='http://example.com/widths'
601
+ * xmlns:height='http://example.com/heights'>
602
+ * <child width:size='broad' height:size='tall'/>
603
+ * </root>
604
+ * EOF
605
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/widths")
606
+ * # => #(Attr:0x550 {
607
+ * # name = "size",
608
+ * # namespace = #(Namespace:0x564 {
609
+ * # prefix = "width",
610
+ * # href = "http://example.com/widths"
611
+ * # }),
612
+ * # value = "broad"
613
+ * # })
614
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/heights")
615
+ * # => #(Attr:0x578 {
616
+ * # name = "size",
617
+ * # namespace = #(Namespace:0x58c {
618
+ * # prefix = "height",
619
+ * # href = "http://example.com/heights"
620
+ * # }),
621
+ * # value = "tall"
622
+ * # })
623
+ */
624
+ static VALUE
625
+ rb_xml_node_attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
626
+ {
627
+ xmlNodePtr node;
628
+ xmlAttrPtr prop;
629
+ Noko_Node_Get_Struct(self, xmlNode, node);
630
+ prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name),
631
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace));
632
+
633
+ if (! prop) { return Qnil; }
634
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
635
+ }
636
+
637
+
638
+
639
+ /*
640
+ * call-seq: blank? → Boolean
641
+ *
642
+ * [Returns] +true+ if the node is an empty or whitespace-only text or cdata node, else +false+.
643
+ *
644
+ * *Example:*
645
+ *
646
+ * Nokogiri("<root><child/></root>").root.child.blank? # => false
647
+ * Nokogiri("<root>\t \n</root>").root.child.blank? # => true
648
+ * Nokogiri("<root><![CDATA[\t \n]]></root>").root.child.blank? # => true
649
+ * Nokogiri("<root>not-blank</root>").root.child
650
+ * .tap { |n| n.content = "" }.blank # => true
651
+ */
652
+ static VALUE
653
+ rb_xml_node_blank_eh(VALUE self)
654
+ {
655
+ xmlNodePtr node;
656
+ Noko_Node_Get_Struct(self, xmlNode, node);
657
+ return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
658
+ }
659
+
660
+
661
+ /*
662
+ * :call-seq: child() → Nokogiri::XML::Node
663
+ *
664
+ * :category: Traversing Document Structure
665
+ *
666
+ * [Returns] First of this node's children, or +nil+ if there are no children
667
+ *
668
+ * This is a convenience method and is equivalent to:
669
+ *
670
+ * node.children.first
671
+ *
672
+ * See related: #children
673
+ */
674
+ static VALUE
675
+ rb_xml_node_child(VALUE self)
676
+ {
677
+ xmlNodePtr node, child;
678
+ Noko_Node_Get_Struct(self, xmlNode, node);
679
+
680
+ child = node->children;
681
+ if (!child) { return Qnil; }
682
+
683
+ return noko_xml_node_wrap(Qnil, child);
684
+ }
685
+
686
+
687
+ /*
688
+ * :call-seq: children() → Nokogiri::XML::NodeSet
689
+ *
690
+ * :category: Traversing Document Structure
691
+ *
692
+ * [Returns] Nokogiri::XML::NodeSet containing this node's children.
693
+ */
694
+ static VALUE
695
+ rb_xml_node_children(VALUE self)
696
+ {
697
+ xmlNodePtr node;
698
+ xmlNodePtr child;
699
+ xmlNodeSetPtr set;
700
+ VALUE document;
701
+ VALUE node_set;
702
+
703
+ Noko_Node_Get_Struct(self, xmlNode, node);
704
+
705
+ child = node->children;
706
+ set = xmlXPathNodeSetCreate(child);
707
+
708
+ document = DOC_RUBY_OBJECT(node->doc);
709
+
710
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
711
+
712
+ child = child->next;
713
+ while (NULL != child) {
714
+ xmlXPathNodeSetAddUnique(set, child);
715
+ child = child->next;
716
+ }
717
+
718
+ node_set = noko_xml_node_set_wrap(set, document);
719
+
720
+ return node_set;
721
+ }
722
+
723
+
724
+ /*
725
+ * :call-seq:
726
+ * content() → String
727
+ * inner_text() → String
728
+ * text() → String
729
+ * to_str() → String
730
+ *
731
+ * [Returns]
732
+ * Contents of all the text nodes in this node's subtree, concatenated together into a single
733
+ * String.
734
+ *
735
+ * ⚠ Note that entities will _always_ be expanded in the returned String.
736
+ *
737
+ * See related: #inner_html
738
+ *
739
+ * *Example* of how entities are handled:
740
+ *
741
+ * Note that <tt>&lt;</tt> becomes <tt><</tt> in the returned String.
742
+ *
743
+ * doc = Nokogiri::XML.fragment("<child>a &lt; b</child>")
744
+ * doc.at_css("child").content
745
+ * # => "a < b"
746
+ *
747
+ * *Example* of how a subtree is handled:
748
+ *
749
+ * Note that the <tt><span></tt> tags are omitted and only the text node contents are returned,
750
+ * concatenated into a single string.
751
+ *
752
+ * doc = Nokogiri::XML.fragment("<child><span>first</span> <span>second</span></child>")
753
+ * doc.at_css("child").content
754
+ * # => "first second"
755
+ */
756
+ static VALUE
757
+ rb_xml_node_content(VALUE self)
758
+ {
759
+ xmlNodePtr node;
760
+ xmlChar *content;
761
+
762
+ Noko_Node_Get_Struct(self, xmlNode, node);
763
+
764
+ content = xmlNodeGetContent(node);
765
+ if (content) {
766
+ VALUE rval = NOKOGIRI_STR_NEW2(content);
767
+ xmlFree(content);
768
+ return rval;
769
+ }
770
+ return Qnil;
771
+ }
772
+
773
+
774
+ /*
775
+ * :call-seq: document() → Nokogiri::XML::Document
776
+ *
777
+ * :category: Traversing Document Structure
778
+ *
779
+ * [Returns] Parent Nokogiri::XML::Document for this node
780
+ */
781
+ static VALUE
782
+ rb_xml_node_document(VALUE self)
783
+ {
784
+ xmlNodePtr node;
785
+ Noko_Node_Get_Struct(self, xmlNode, node);
786
+ return DOC_RUBY_OBJECT(node->doc);
787
+ }
788
+
789
+ /*
790
+ * :call-seq: pointer_id() → Integer
791
+ *
792
+ * [Returns]
793
+ * A unique id for this node based on the internal memory structures. This method is used by #==
794
+ * to determine node identity.
795
+ */
796
+ static VALUE
797
+ rb_xml_node_pointer_id(VALUE self)
798
+ {
799
+ xmlNodePtr node;
800
+ Noko_Node_Get_Struct(self, xmlNode, node);
801
+
802
+ return rb_uint2inum((uintptr_t)(node));
803
+ }
804
+
805
+ /*
806
+ * :call-seq: encode_special_chars(string) → String
807
+ *
808
+ * Encode any special characters in +string+
809
+ */
810
+ static VALUE
811
+ encode_special_chars(VALUE self, VALUE string)
812
+ {
813
+ xmlNodePtr node;
814
+ xmlChar *encoded;
815
+ VALUE encoded_str;
816
+
817
+ Noko_Node_Get_Struct(self, xmlNode, node);
818
+ encoded = xmlEncodeSpecialChars(
819
+ node->doc,
820
+ (const xmlChar *)StringValueCStr(string)
821
+ );
822
+
823
+ encoded_str = NOKOGIRI_STR_NEW2(encoded);
824
+ xmlFree(encoded);
825
+
826
+ return encoded_str;
827
+ }
828
+
829
+ /*
830
+ * :call-seq:
831
+ * create_internal_subset(name, external_id, system_id)
832
+ *
833
+ * Create the internal subset of a document.
834
+ *
835
+ * doc.create_internal_subset("chapter", "-//OASIS//DTD DocBook XML//EN", "chapter.dtd")
836
+ * # => <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML//EN" "chapter.dtd">
837
+ *
838
+ * doc.create_internal_subset("chapter", nil, "chapter.dtd")
839
+ * # => <!DOCTYPE chapter SYSTEM "chapter.dtd">
840
+ */
841
+ static VALUE
842
+ create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
843
+ {
844
+ xmlNodePtr node;
845
+ xmlDocPtr doc;
846
+ xmlDtdPtr dtd;
847
+
848
+ Noko_Node_Get_Struct(self, xmlNode, node);
849
+
850
+ doc = node->doc;
851
+
852
+ if (xmlGetIntSubset(doc)) {
853
+ rb_raise(rb_eRuntimeError, "Document already has an internal subset");
854
+ }
855
+
856
+ dtd = xmlCreateIntSubset(
857
+ doc,
858
+ NIL_P(name) ? NULL : (const xmlChar *)StringValueCStr(name),
859
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValueCStr(external_id),
860
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
861
+ );
862
+
863
+ if (!dtd) { return Qnil; }
864
+
865
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
866
+ }
867
+
868
+ /*
869
+ * :call-seq:
870
+ * create_external_subset(name, external_id, system_id)
871
+ *
872
+ * Create an external subset
873
+ */
874
+ static VALUE
875
+ create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
876
+ {
877
+ xmlNodePtr node;
878
+ xmlDocPtr doc;
879
+ xmlDtdPtr dtd;
880
+
881
+ Noko_Node_Get_Struct(self, xmlNode, node);
882
+
883
+ doc = node->doc;
884
+
885
+ if (doc->extSubset) {
886
+ rb_raise(rb_eRuntimeError, "Document already has an external subset");
887
+ }
888
+
889
+ dtd = xmlNewDtd(
890
+ doc,
891
+ NIL_P(name) ? NULL : (const xmlChar *)StringValueCStr(name),
892
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValueCStr(external_id),
893
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
894
+ );
895
+
896
+ if (!dtd) { return Qnil; }
897
+
898
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
899
+ }
900
+
901
+ /*
902
+ * :call-seq:
903
+ * external_subset()
904
+ *
905
+ * Get the external subset
906
+ */
907
+ static VALUE
908
+ external_subset(VALUE self)
909
+ {
910
+ xmlNodePtr node;
911
+ xmlDocPtr doc;
912
+ xmlDtdPtr dtd;
913
+
914
+ Noko_Node_Get_Struct(self, xmlNode, node);
915
+
916
+ if (!node->doc) { return Qnil; }
917
+
918
+ doc = node->doc;
919
+ dtd = doc->extSubset;
920
+
921
+ if (!dtd) { return Qnil; }
922
+
923
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
924
+ }
925
+
926
+ /*
927
+ * :call-seq:
928
+ * internal_subset()
929
+ *
930
+ * Get the internal subset
931
+ */
932
+ static VALUE
933
+ internal_subset(VALUE self)
934
+ {
935
+ xmlNodePtr node;
936
+ xmlDocPtr doc;
937
+ xmlDtdPtr dtd;
938
+
939
+ Noko_Node_Get_Struct(self, xmlNode, node);
940
+
941
+ if (!node->doc) { return Qnil; }
942
+
943
+ doc = node->doc;
944
+ dtd = xmlGetIntSubset(doc);
945
+
946
+ if (!dtd) { return Qnil; }
947
+
948
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
949
+ }
950
+
951
+ /*
952
+ * :call-seq:
953
+ * dup → Nokogiri::XML::Node
954
+ * dup(depth) → Nokogiri::XML::Node
955
+ * dup(depth, new_parent_doc) → Nokogiri::XML::Node
956
+ *
957
+ * Copy this node.
958
+ *
959
+ * [Parameters]
960
+ * - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy.
961
+ * - +new_parent_doc+
962
+ * The new node's parent Document. Defaults to the this node's document.
963
+ *
964
+ * [Returns] The new Nokgiri::XML::Node
965
+ */
966
+ static VALUE
967
+ duplicate_node(int argc, VALUE *argv, VALUE self)
968
+ {
969
+ VALUE r_level, r_new_parent_doc;
970
+ int level;
971
+ int n_args;
972
+ xmlDocPtr new_parent_doc;
973
+ xmlNodePtr node, dup;
974
+
975
+ Noko_Node_Get_Struct(self, xmlNode, node);
976
+
977
+ n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);
978
+
979
+ if (n_args < 1) {
980
+ r_level = INT2NUM((long)1);
981
+ }
982
+ level = (int)NUM2INT(r_level);
983
+
984
+ if (n_args < 2) {
985
+ new_parent_doc = node->doc;
986
+ } else {
987
+ Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc);
988
+ }
989
+
990
+ dup = xmlDocCopyNode(node, new_parent_doc, level);
991
+ if (dup == NULL) { return Qnil; }
992
+
993
+ noko_xml_document_pin_node(dup);
994
+
995
+ return noko_xml_node_wrap(rb_obj_class(self), dup);
996
+ }
997
+
998
+ /*
999
+ * :call-seq:
1000
+ * unlink() → self
1001
+ *
1002
+ * Unlink this node from its current context.
1003
+ */
1004
+ static VALUE
1005
+ unlink_node(VALUE self)
1006
+ {
1007
+ xmlNodePtr node;
1008
+ Noko_Node_Get_Struct(self, xmlNode, node);
1009
+ xmlUnlinkNode(node);
1010
+ noko_xml_document_pin_node(node);
1011
+ return self;
1012
+ }
1013
+
1014
+
1015
+ /*
1016
+ * call-seq:
1017
+ * next_sibling
1018
+ *
1019
+ * Returns the next sibling node
1020
+ */
1021
+ static VALUE
1022
+ next_sibling(VALUE self)
1023
+ {
1024
+ xmlNodePtr node, sibling;
1025
+ Noko_Node_Get_Struct(self, xmlNode, node);
1026
+
1027
+ sibling = node->next;
1028
+ if (!sibling) { return Qnil; }
1029
+
1030
+ return noko_xml_node_wrap(Qnil, sibling) ;
1031
+ }
1032
+
1033
+ /*
1034
+ * call-seq:
1035
+ * previous_sibling
1036
+ *
1037
+ * Returns the previous sibling node
1038
+ */
1039
+ static VALUE
1040
+ previous_sibling(VALUE self)
1041
+ {
1042
+ xmlNodePtr node, sibling;
1043
+ Noko_Node_Get_Struct(self, xmlNode, node);
1044
+
1045
+ sibling = node->prev;
1046
+ if (!sibling) { return Qnil; }
1047
+
1048
+ return noko_xml_node_wrap(Qnil, sibling);
1049
+ }
1050
+
1051
+ /*
1052
+ * call-seq:
1053
+ * next_element
1054
+ *
1055
+ * Returns the next Nokogiri::XML::Element type sibling node.
1056
+ */
1057
+ static VALUE
1058
+ next_element(VALUE self)
1059
+ {
1060
+ xmlNodePtr node, sibling;
1061
+ Noko_Node_Get_Struct(self, xmlNode, node);
1062
+
1063
+ sibling = xmlNextElementSibling(node);
1064
+ if (!sibling) { return Qnil; }
1065
+
1066
+ return noko_xml_node_wrap(Qnil, sibling);
1067
+ }
1068
+
1069
+ /*
1070
+ * call-seq:
1071
+ * previous_element
1072
+ *
1073
+ * Returns the previous Nokogiri::XML::Element type sibling node.
1074
+ */
1075
+ static VALUE
1076
+ previous_element(VALUE self)
1077
+ {
1078
+ xmlNodePtr node, sibling;
1079
+ Noko_Node_Get_Struct(self, xmlNode, node);
1080
+
1081
+ /*
1082
+ * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
1083
+ */
1084
+ sibling = node->prev;
1085
+ if (!sibling) { return Qnil; }
1086
+
1087
+ while (sibling && sibling->type != XML_ELEMENT_NODE) {
1088
+ sibling = sibling->prev;
1089
+ }
1090
+
1091
+ return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ;
1092
+ }
1093
+
1094
+ /* :nodoc: */
1095
+ static VALUE
1096
+ replace(VALUE self, VALUE new_node)
1097
+ {
1098
+ VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
1099
+
1100
+ xmlNodePtr pivot;
1101
+ Noko_Node_Get_Struct(self, xmlNode, pivot);
1102
+ noko_xml_document_pin_node(pivot);
1103
+
1104
+ return reparent;
1105
+ }
1106
+
1107
+ /*
1108
+ * :call-seq:
1109
+ * element_children() → NodeSet
1110
+ * elements() → NodeSet
1111
+ *
1112
+ * [Returns]
1113
+ * The node's child elements as a NodeSet. Only children that are elements will be returned, which
1114
+ * notably excludes Text nodes.
1115
+ *
1116
+ * *Example:*
1117
+ *
1118
+ * Note that #children returns the Text node "hello" while #element_children does not.
1119
+ *
1120
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1121
+ * div.element_children
1122
+ * # => [#<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
1123
+ * div.children
1124
+ * # => [#<Nokogiri::XML::Text:0x64 "hello">,
1125
+ * # #<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
1126
+ */
1127
+ static VALUE
1128
+ rb_xml_node_element_children(VALUE self)
1129
+ {
1130
+ xmlNodePtr node;
1131
+ xmlNodePtr child;
1132
+ xmlNodeSetPtr set;
1133
+ VALUE document;
1134
+ VALUE node_set;
1135
+
1136
+ Noko_Node_Get_Struct(self, xmlNode, node);
1137
+
1138
+ child = xmlFirstElementChild(node);
1139
+ set = xmlXPathNodeSetCreate(child);
1140
+
1141
+ document = DOC_RUBY_OBJECT(node->doc);
1142
+
1143
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
1144
+
1145
+ child = xmlNextElementSibling(child);
1146
+ while (NULL != child) {
1147
+ xmlXPathNodeSetAddUnique(set, child);
1148
+ child = xmlNextElementSibling(child);
1149
+ }
1150
+
1151
+ node_set = noko_xml_node_set_wrap(set, document);
1152
+
1153
+ return node_set;
1154
+ }
1155
+
1156
+ /*
1157
+ * :call-seq:
1158
+ * first_element_child() → Node
1159
+ *
1160
+ * [Returns] The first child Node that is an element.
1161
+ *
1162
+ * *Example:*
1163
+ *
1164
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span></tt> element is
1165
+ * returned.
1166
+ *
1167
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1168
+ * div.first_element_child
1169
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "world")] })
1170
+ */
1171
+ static VALUE
1172
+ rb_xml_node_first_element_child(VALUE self)
1173
+ {
1174
+ xmlNodePtr node, child;
1175
+ Noko_Node_Get_Struct(self, xmlNode, node);
1176
+
1177
+ child = xmlFirstElementChild(node);
1178
+ if (!child) { return Qnil; }
1179
+
1180
+ return noko_xml_node_wrap(Qnil, child);
1181
+ }
1182
+
1183
+ /*
1184
+ * :call-seq:
1185
+ * last_element_child() → Node
1186
+ *
1187
+ * [Returns] The last child Node that is an element.
1188
+ *
1189
+ * *Example:*
1190
+ *
1191
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span>yes</span></tt>
1192
+ * element is returned.
1193
+ *
1194
+ * div = Nokogiri::HTML5("<div><span>no</span><span>yes</span>skip</div>").at_css("div")
1195
+ * div.last_element_child
1196
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "yes")] })
1197
+ */
1198
+ static VALUE
1199
+ rb_xml_node_last_element_child(VALUE self)
1200
+ {
1201
+ xmlNodePtr node, child;
1202
+ Noko_Node_Get_Struct(self, xmlNode, node);
1203
+
1204
+ child = xmlLastElementChild(node);
1205
+ if (!child) { return Qnil; }
1206
+
1207
+ return noko_xml_node_wrap(Qnil, child);
1208
+ }
1209
+
1210
+ /*
1211
+ * call-seq:
1212
+ * key?(attribute)
1213
+ *
1214
+ * Returns true if +attribute+ is set
1215
+ */
1216
+ static VALUE
1217
+ key_eh(VALUE self, VALUE attribute)
1218
+ {
1219
+ xmlNodePtr node;
1220
+ Noko_Node_Get_Struct(self, xmlNode, node);
1221
+ if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) {
1222
+ return Qtrue;
1223
+ }
1224
+ return Qfalse;
1225
+ }
1226
+
1227
+ /*
1228
+ * call-seq:
1229
+ * namespaced_key?(attribute, namespace)
1230
+ *
1231
+ * Returns true if +attribute+ is set with +namespace+
1232
+ */
1233
+ static VALUE
1234
+ namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
1235
+ {
1236
+ xmlNodePtr node;
1237
+ Noko_Node_Get_Struct(self, xmlNode, node);
1238
+ if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
1239
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) {
1240
+ return Qtrue;
1241
+ }
1242
+ return Qfalse;
1243
+ }
1244
+
1245
+ /*
1246
+ * call-seq:
1247
+ * []=(property, value)
1248
+ *
1249
+ * Set the +property+ to +value+
1250
+ */
1251
+ static VALUE
1252
+ set(VALUE self, VALUE property, VALUE value)
1253
+ {
1254
+ xmlNodePtr node, cur;
1255
+ xmlAttrPtr prop;
1256
+ Noko_Node_Get_Struct(self, xmlNode, node);
1257
+
1258
+ /* If a matching attribute node already exists, then xmlSetProp will destroy
1259
+ * the existing node's children. However, if Nokogiri has a node object
1260
+ * pointing to one of those children, we are left with a broken reference.
1261
+ *
1262
+ * We can avoid this by unlinking these nodes first.
1263
+ */
1264
+ if (node->type != XML_ELEMENT_NODE) {
1265
+ return (Qnil);
1266
+ }
1267
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(property));
1268
+ if (prop && prop->children) {
1269
+ for (cur = prop->children; cur; cur = cur->next) {
1270
+ if (cur->_private) {
1271
+ noko_xml_document_pin_node(cur);
1272
+ xmlUnlinkNode(cur);
1273
+ }
1274
+ }
1275
+ }
1276
+
1277
+ xmlSetProp(node, (xmlChar *)StringValueCStr(property),
1278
+ (xmlChar *)StringValueCStr(value));
1279
+
1280
+ return value;
1281
+ }
1282
+
1283
+ /*
1284
+ * call-seq:
1285
+ * get(attribute)
1286
+ *
1287
+ * Get the value for +attribute+
1288
+ */
1289
+ static VALUE
1290
+ get(VALUE self, VALUE rattribute)
1291
+ {
1292
+ xmlNodePtr node;
1293
+ xmlChar *value = 0;
1294
+ VALUE rvalue;
1295
+ xmlChar *colon;
1296
+ xmlChar *attribute, *attr_name, *prefix;
1297
+ xmlNsPtr ns;
1298
+
1299
+ if (NIL_P(rattribute)) { return Qnil; }
1300
+
1301
+ Noko_Node_Get_Struct(self, xmlNode, node);
1302
+ attribute = xmlCharStrdup(StringValueCStr(rattribute));
1303
+
1304
+ colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':'));
1305
+ if (colon) {
1306
+ /* split the attribute string into separate prefix and name by
1307
+ * null-terminating the prefix at the colon */
1308
+ prefix = attribute;
1309
+ attr_name = colon + 1;
1310
+ (*colon) = 0;
1311
+
1312
+ ns = xmlSearchNs(node->doc, node, prefix);
1313
+ if (ns) {
1314
+ value = xmlGetNsProp(node, attr_name, ns->href);
1315
+ } else {
1316
+ value = xmlGetProp(node, (xmlChar *)StringValueCStr(rattribute));
1317
+ }
1318
+ } else {
1319
+ value = xmlGetNoNsProp(node, attribute);
1320
+ }
1321
+
1322
+ xmlFree((void *)attribute);
1323
+ if (!value) { return Qnil; }
1324
+
1325
+ rvalue = NOKOGIRI_STR_NEW2(value);
1326
+ xmlFree((void *)value);
1327
+
1328
+ return rvalue ;
1329
+ }
1330
+
1331
+ /*
1332
+ * call-seq:
1333
+ * set_namespace(namespace)
1334
+ *
1335
+ * Set the namespace to +namespace+
1336
+ */
1337
+ static VALUE
1338
+ set_namespace(VALUE self, VALUE namespace)
1339
+ {
1340
+ xmlNodePtr node;
1341
+ xmlNsPtr ns = NULL;
1342
+
1343
+ Noko_Node_Get_Struct(self, xmlNode, node);
1344
+
1345
+ if (!NIL_P(namespace)) {
1346
+ Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
1347
+ }
1348
+
1349
+ xmlSetNs(node, ns);
1350
+
1351
+ return self;
1352
+ }
1353
+
1354
+ /*
1355
+ * :call-seq:
1356
+ * namespace() → Namespace
1357
+ *
1358
+ * [Returns] The Namespace of the element or attribute node, or +nil+ if there is no namespace.
1359
+ *
1360
+ * *Example:*
1361
+ *
1362
+ * doc = Nokogiri::XML(<<~EOF)
1363
+ * <root>
1364
+ * <first/>
1365
+ * <second xmlns="http://example.com/child"/>
1366
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1367
+ * </root>
1368
+ * EOF
1369
+ * doc.at_xpath("//first").namespace
1370
+ * # => nil
1371
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace
1372
+ * # => #(Namespace:0x3c { href = "http://example.com/child" })
1373
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace
1374
+ * # => #(Namespace:0x50 { prefix = "foo", href = "http://example.com/foo" })
1375
+ */
1376
+ static VALUE
1377
+ rb_xml_node_namespace(VALUE rb_node)
1378
+ {
1379
+ xmlNodePtr c_node ;
1380
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1381
+
1382
+ if (c_node->ns) {
1383
+ return noko_xml_namespace_wrap(c_node->ns, c_node->doc);
1384
+ }
1385
+
1386
+ return Qnil ;
1387
+ }
1388
+
1389
+ /*
1390
+ * :call-seq:
1391
+ * namespace_definitions() → Array<Nokogiri::XML::Namespace>
1392
+ *
1393
+ * [Returns]
1394
+ * Namespaces that are defined directly on this node, as an Array of Namespace objects. The array
1395
+ * will be empty if no namespaces are defined on this node.
1396
+ *
1397
+ * *Example:*
1398
+ *
1399
+ * doc = Nokogiri::XML(<<~EOF)
1400
+ * <root xmlns="http://example.com/root">
1401
+ * <first/>
1402
+ * <second xmlns="http://example.com/child" xmlns:unused="http://example.com/unused"/>
1403
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1404
+ * </root>
1405
+ * EOF
1406
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_definitions
1407
+ * # => []
1408
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace_definitions
1409
+ * # => [#(Namespace:0x3c { href = "http://example.com/child" }),
1410
+ * # #(Namespace:0x50 {
1411
+ * # prefix = "unused",
1412
+ * # href = "http://example.com/unused"
1413
+ * # })]
1414
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace_definitions
1415
+ * # => [#(Namespace:0x64 { prefix = "foo", href = "http://example.com/foo" })]
1416
+ */
1417
+ static VALUE
1418
+ namespace_definitions(VALUE rb_node)
1419
+ {
1420
+ /* this code in the mode of xmlHasProp() */
1421
+ xmlNodePtr c_node ;
1422
+ xmlNsPtr c_namespace;
1423
+ VALUE definitions = rb_ary_new();
1424
+
1425
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1426
+
1427
+ c_namespace = c_node->nsDef;
1428
+ if (!c_namespace) {
1429
+ return definitions;
1430
+ }
1431
+
1432
+ while (c_namespace != NULL) {
1433
+ rb_ary_push(definitions, noko_xml_namespace_wrap(c_namespace, c_node->doc));
1434
+ c_namespace = c_namespace->next;
1435
+ }
1436
+
1437
+ return definitions;
1438
+ }
1439
+
1440
+ /*
1441
+ * :call-seq:
1442
+ * namespace_scopes() → Array<Nokogiri::XML::Namespace>
1443
+ *
1444
+ * [Returns] Array of all the Namespaces on this node and its ancestors.
1445
+ *
1446
+ * See also #namespaces
1447
+ *
1448
+ * *Example:*
1449
+ *
1450
+ * doc = Nokogiri::XML(<<~EOF)
1451
+ * <root xmlns="http://example.com/root" xmlns:bar="http://example.com/bar">
1452
+ * <first/>
1453
+ * <second xmlns="http://example.com/child"/>
1454
+ * <third xmlns:foo="http://example.com/foo"/>
1455
+ * </root>
1456
+ * EOF
1457
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes
1458
+ * # => [#(Namespace:0x3c { href = "http://example.com/root" }),
1459
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1460
+ * doc.at_xpath("//child:second", "child" => "http://example.com/child").namespace_scopes
1461
+ * # => [#(Namespace:0x64 { href = "http://example.com/child" }),
1462
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1463
+ * doc.at_xpath("//root:third", "root" => "http://example.com/root").namespace_scopes
1464
+ * # => [#(Namespace:0x78 { prefix = "foo", href = "http://example.com/foo" }),
1465
+ * # #(Namespace:0x3c { href = "http://example.com/root" }),
1466
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1467
+ */
1468
+ static VALUE
1469
+ rb_xml_node_namespace_scopes(VALUE rb_node)
1470
+ {
1471
+ xmlNodePtr c_node ;
1472
+ xmlNsPtr *namespaces;
1473
+ VALUE scopes = rb_ary_new();
1474
+ int j;
1475
+
1476
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1477
+
1478
+ namespaces = xmlGetNsList(c_node->doc, c_node);
1479
+ if (!namespaces) {
1480
+ return scopes;
1481
+ }
1482
+
1483
+ for (j = 0 ; namespaces[j] != NULL ; ++j) {
1484
+ rb_ary_push(scopes, noko_xml_namespace_wrap(namespaces[j], c_node->doc));
1485
+ }
1486
+
1487
+ xmlFree(namespaces);
1488
+ return scopes;
1489
+ }
1490
+
1491
+ /*
1492
+ * call-seq:
1493
+ * node_type
1494
+ *
1495
+ * Get the type for this Node
1496
+ */
1497
+ static VALUE
1498
+ node_type(VALUE self)
1499
+ {
1500
+ xmlNodePtr node;
1501
+ Noko_Node_Get_Struct(self, xmlNode, node);
1502
+ return INT2NUM(node->type);
1503
+ }
1504
+
1505
+ /*
1506
+ * call-seq:
1507
+ * content=
1508
+ *
1509
+ * Set the content for this Node
1510
+ */
1511
+ static VALUE
1512
+ set_native_content(VALUE self, VALUE content)
1513
+ {
1514
+ xmlNodePtr node, child, next ;
1515
+ Noko_Node_Get_Struct(self, xmlNode, node);
1516
+
1517
+ child = node->children;
1518
+ while (NULL != child) {
1519
+ next = child->next ;
1520
+ xmlUnlinkNode(child) ;
1521
+ noko_xml_document_pin_node(child);
1522
+ child = next ;
1523
+ }
1524
+
1525
+ xmlNodeSetContent(node, (xmlChar *)StringValueCStr(content));
1526
+ return content;
1527
+ }
1528
+
1529
+ /*
1530
+ * call-seq:
1531
+ * lang=
1532
+ *
1533
+ * Set the language of a node, i.e. the values of the xml:lang attribute.
1534
+ */
1535
+ static VALUE
1536
+ set_lang(VALUE self_rb, VALUE lang_rb)
1537
+ {
1538
+ xmlNodePtr self ;
1539
+ xmlChar *lang ;
1540
+
1541
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1542
+ lang = (xmlChar *)StringValueCStr(lang_rb);
1543
+
1544
+ xmlNodeSetLang(self, lang);
1545
+
1546
+ return Qnil ;
1547
+ }
1548
+
1549
+ /*
1550
+ * call-seq:
1551
+ * lang
1552
+ *
1553
+ * Searches the language of a node, i.e. the values of the xml:lang attribute or
1554
+ * the one carried by the nearest ancestor.
1555
+ */
1556
+ static VALUE
1557
+ get_lang(VALUE self_rb)
1558
+ {
1559
+ xmlNodePtr self ;
1560
+ xmlChar *lang ;
1561
+ VALUE lang_rb ;
1562
+
1563
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1564
+
1565
+ lang = xmlNodeGetLang(self);
1566
+ if (lang) {
1567
+ lang_rb = NOKOGIRI_STR_NEW2(lang);
1568
+ xmlFree(lang);
1569
+ return lang_rb ;
1570
+ }
1571
+
1572
+ return Qnil ;
1573
+ }
1574
+
1575
+ /* :nodoc: */
1576
+ static VALUE
1577
+ add_child(VALUE self, VALUE new_child)
1578
+ {
1579
+ return reparent_node_with(self, new_child, xmlAddChild);
1580
+ }
1581
+
1582
+ /*
1583
+ * call-seq:
1584
+ * parent
1585
+ *
1586
+ * Get the parent Node for this Node
1587
+ */
1588
+ static VALUE
1589
+ get_parent(VALUE self)
1590
+ {
1591
+ xmlNodePtr node, parent;
1592
+ Noko_Node_Get_Struct(self, xmlNode, node);
1593
+
1594
+ parent = node->parent;
1595
+ if (!parent) { return Qnil; }
1596
+
1597
+ return noko_xml_node_wrap(Qnil, parent) ;
1598
+ }
1599
+
1600
+ /*
1601
+ * call-seq:
1602
+ * name=(new_name)
1603
+ *
1604
+ * Set the name for this Node
1605
+ */
1606
+ static VALUE
1607
+ set_name(VALUE self, VALUE new_name)
1608
+ {
1609
+ xmlNodePtr node;
1610
+ Noko_Node_Get_Struct(self, xmlNode, node);
1611
+ xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name));
1612
+ return new_name;
1613
+ }
1614
+
1615
+ /*
1616
+ * call-seq:
1617
+ * name
1618
+ *
1619
+ * Returns the name for this Node
1620
+ */
1621
+ static VALUE
1622
+ get_name(VALUE self)
1623
+ {
1624
+ xmlNodePtr node;
1625
+ Noko_Node_Get_Struct(self, xmlNode, node);
1626
+ if (node->name) {
1627
+ return NOKOGIRI_STR_NEW2(node->name);
1628
+ }
1629
+ return Qnil;
1630
+ }
1631
+
1632
+ /*
1633
+ * call-seq:
1634
+ * path
1635
+ *
1636
+ * Returns the path associated with this Node
1637
+ */
1638
+ static VALUE
1639
+ rb_xml_node_path(VALUE rb_node)
1640
+ {
1641
+ xmlNodePtr c_node;
1642
+ xmlChar *c_path ;
1643
+ VALUE rval;
1644
+
1645
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1646
+
1647
+ c_path = xmlGetNodePath(c_node);
1648
+ if (c_path == NULL) {
1649
+ // see https://github.com/sparklemotion/nokogiri/issues/2250
1650
+ // this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we
1651
+ // do this for now to preserve the behavior across libxml2 versions.
1652
+ rval = NOKOGIRI_STR_NEW2("?");
1653
+ } else {
1654
+ rval = NOKOGIRI_STR_NEW2(c_path);
1655
+ xmlFree(c_path);
1656
+ }
1657
+
1658
+ return rval ;
1659
+ }
1660
+
1661
+ /* :nodoc: */
1662
+ static VALUE
1663
+ add_next_sibling(VALUE self, VALUE new_sibling)
1664
+ {
1665
+ return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
1666
+ }
1667
+
1668
+ /* :nodoc: */
1669
+ static VALUE
1670
+ add_previous_sibling(VALUE self, VALUE new_sibling)
1671
+ {
1672
+ return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
1673
+ }
1674
+
1675
+ /*
1676
+ * call-seq:
1677
+ * native_write_to(io, encoding, options)
1678
+ *
1679
+ * Write this Node to +io+ with +encoding+ and +options+
1680
+ */
1681
+ static VALUE
1682
+ native_write_to(
1683
+ VALUE self,
1684
+ VALUE io,
1685
+ VALUE encoding,
1686
+ VALUE indent_string,
1687
+ VALUE options
1688
+ )
1689
+ {
1690
+ xmlNodePtr node;
1691
+ const char *before_indent;
1692
+ xmlSaveCtxtPtr savectx;
1693
+
1694
+ Noko_Node_Get_Struct(self, xmlNode, node);
1695
+
1696
+ xmlIndentTreeOutput = 1;
1697
+
1698
+ before_indent = xmlTreeIndentString;
1699
+
1700
+ xmlTreeIndentString = StringValueCStr(indent_string);
1701
+
1702
+ savectx = xmlSaveToIO(
1703
+ (xmlOutputWriteCallback)noko_io_write,
1704
+ (xmlOutputCloseCallback)noko_io_close,
1705
+ (void *)io,
1706
+ RTEST(encoding) ? StringValueCStr(encoding) : NULL,
1707
+ (int)NUM2INT(options)
1708
+ );
1709
+
1710
+ xmlSaveTree(savectx, node);
1711
+ xmlSaveClose(savectx);
1712
+
1713
+ xmlTreeIndentString = before_indent;
1714
+ return io;
1715
+ }
1716
+
1717
+
1718
+ static inline void
1719
+ output_partial_string(VALUE out, char const *str, size_t length)
1720
+ {
1721
+ if (length) {
1722
+ rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
1723
+ }
1724
+ }
1725
+
1726
+ static inline void
1727
+ output_char(VALUE out, char ch)
1728
+ {
1729
+ output_partial_string(out, &ch, 1);
1730
+ }
1731
+
1732
+ static inline void
1733
+ output_string(VALUE out, char const *str)
1734
+ {
1735
+ output_partial_string(out, str, strlen(str));
1736
+ }
1737
+
1738
+ static inline void
1739
+ output_tagname(VALUE out, xmlNodePtr elem)
1740
+ {
1741
+ // Elements in the HTML, MathML, and SVG namespaces do not use a namespace
1742
+ // prefix in the HTML syntax.
1743
+ char const *name = (char const *)elem->name;
1744
+ xmlNsPtr ns = elem->ns;
1745
+ if (ns && ns->href && ns->prefix
1746
+ && strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
1747
+ && strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
1748
+ && strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
1749
+ output_string(out, (char const *)elem->ns->prefix);
1750
+ output_char(out, ':');
1751
+ char const *colon = strchr(name, ':');
1752
+ if (colon) {
1753
+ name = colon + 1;
1754
+ }
1755
+ }
1756
+ output_string(out, name);
1757
+ }
1758
+
1759
+ static inline void
1760
+ output_attr_name(VALUE out, xmlAttrPtr attr)
1761
+ {
1762
+ xmlNsPtr ns = attr->ns;
1763
+ char const *name = (char const *)attr->name;
1764
+ if (ns && ns->href) {
1765
+ char const *uri = (char const *)ns->href;
1766
+ char const *localname = strchr(name, ':');
1767
+ if (localname) {
1768
+ ++localname;
1769
+ } else {
1770
+ localname = name;
1771
+ }
1772
+
1773
+ if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
1774
+ output_string(out, "xml:");
1775
+ name = localname;
1776
+ } else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
1777
+ // xmlns:xmlns -> xmlns
1778
+ // xmlns:foo -> xmlns:foo
1779
+ if (strcmp(localname, "xmlns")) {
1780
+ output_string(out, "xmlns:");
1781
+ }
1782
+ name = localname;
1783
+ } else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
1784
+ output_string(out, "xlink:");
1785
+ name = localname;
1786
+ } else if (ns->prefix) {
1787
+ output_string(out, (char const *)ns->prefix);
1788
+ output_char(out, ':');
1789
+ name = localname;
1790
+ }
1791
+ }
1792
+ output_string(out, name);
1793
+ }
1794
+
1795
+ static void
1796
+ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1797
+ {
1798
+ xmlChar const *next = start;
1799
+ int ch;
1800
+
1801
+ while ((ch = *next) != 0) {
1802
+ char const *replacement = NULL;
1803
+ size_t replaced_bytes = 1;
1804
+ if (ch == '&') {
1805
+ replacement = "&amp;";
1806
+ } else if (ch == 0xC2 && next[1] == 0xA0) {
1807
+ // U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
1808
+ replacement = "&nbsp;";
1809
+ replaced_bytes = 2;
1810
+ } else if (attr && ch == '"') {
1811
+ replacement = "&quot;";
1812
+ } else if (!attr && ch == '<') {
1813
+ replacement = "&lt;";
1814
+ } else if (!attr && ch == '>') {
1815
+ replacement = "&gt;";
1816
+ } else {
1817
+ ++next;
1818
+ continue;
1819
+ }
1820
+ output_partial_string(out, (char const *)start, next - start);
1821
+ output_string(out, replacement);
1822
+ next += replaced_bytes;
1823
+ start = next;
1824
+ }
1825
+ output_partial_string(out, (char const *)start, next - start);
1826
+ }
1827
+
1828
+ static bool
1829
+ should_prepend_newline(xmlNodePtr node)
1830
+ {
1831
+ char const *name = (char const *)node->name;
1832
+ xmlNodePtr child = node->children;
1833
+
1834
+ if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
1835
+ return false;
1836
+ }
1837
+
1838
+ return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
1839
+ }
1840
+
1841
+ static VALUE
1842
+ rb_prepend_newline(VALUE self)
1843
+ {
1844
+ xmlNodePtr node;
1845
+ Noko_Node_Get_Struct(self, xmlNode, node);
1846
+ return should_prepend_newline(node) ? Qtrue : Qfalse;
1847
+ }
1848
+
1849
+ static bool
1850
+ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1851
+ {
1852
+ char const *name = (char const *)node->name;
1853
+ if (name == NULL) { // fragments don't have a name
1854
+ return false;
1855
+ }
1856
+ for (size_t idx = 0; idx < num_tagnames; ++idx) {
1857
+ if (!strcmp(name, tagnames[idx])) {
1858
+ return true;
1859
+ }
1860
+ }
1861
+ return false;
1862
+
1863
+ }
1864
+
1865
+ static void
1866
+ output_node(
1867
+ VALUE out,
1868
+ xmlNodePtr node,
1869
+ bool preserve_newline
1870
+ )
1871
+ {
1872
+ static char const *const VOID_ELEMENTS[] = {
1873
+ "area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
1874
+ "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
1875
+ };
1876
+
1877
+ static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
1878
+ "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
1879
+ };
1880
+
1881
+ switch (node->type) {
1882
+ case XML_ELEMENT_NODE:
1883
+ // Serialize the start tag.
1884
+ output_char(out, '<');
1885
+ output_tagname(out, node);
1886
+
1887
+ // Add attributes.
1888
+ for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1889
+ output_char(out, ' ');
1890
+ output_attr_name(out, attr);
1891
+ if (attr->children) {
1892
+ output_string(out, "=\"");
1893
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1894
+ output_escaped_string(out, value, true);
1895
+ xmlFree(value);
1896
+ output_char(out, '"');
1897
+ } else {
1898
+ // Output name=""
1899
+ output_string(out, "=\"\"");
1900
+ }
1901
+ }
1902
+ output_char(out, '>');
1903
+
1904
+ // Add children and end tag if element is not void.
1905
+ if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
1906
+ if (preserve_newline && should_prepend_newline(node)) {
1907
+ output_char(out, '\n');
1908
+ }
1909
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1910
+ output_node(out, child, preserve_newline);
1911
+ }
1912
+ output_string(out, "</");
1913
+ output_tagname(out, node);
1914
+ output_char(out, '>');
1915
+ }
1916
+ break;
1917
+
1918
+ case XML_TEXT_NODE:
1919
+ if (node->parent
1920
+ && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
1921
+ sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
1922
+ output_string(out, (char const *)node->content);
1923
+ } else {
1924
+ output_escaped_string(out, node->content, false);
1925
+ }
1926
+ break;
1927
+
1928
+ case XML_CDATA_SECTION_NODE:
1929
+ output_string(out, "<![CDATA[");
1930
+ output_string(out, (char const *)node->content);
1931
+ output_string(out, "]]>");
1932
+ break;
1933
+
1934
+ case XML_COMMENT_NODE:
1935
+ output_string(out, "<!--");
1936
+ output_string(out, (char const *)node->content);
1937
+ output_string(out, "-->");
1938
+ break;
1939
+
1940
+ case XML_PI_NODE:
1941
+ output_string(out, "<?");
1942
+ output_string(out, (char const *)node->content);
1943
+ output_char(out, '>');
1944
+ break;
1945
+
1946
+ case XML_DOCUMENT_TYPE_NODE:
1947
+ case XML_DTD_NODE:
1948
+ output_string(out, "<!DOCTYPE ");
1949
+ output_string(out, (char const *)node->name);
1950
+ output_string(out, ">");
1951
+ break;
1952
+
1953
+ case XML_DOCUMENT_NODE:
1954
+ case XML_DOCUMENT_FRAG_NODE:
1955
+ case XML_HTML_DOCUMENT_NODE:
1956
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1957
+ output_node(out, child, preserve_newline);
1958
+ }
1959
+ break;
1960
+
1961
+ default:
1962
+ rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
1963
+ break;
1964
+ }
1965
+ }
1966
+
1967
+ static VALUE
1968
+ html_standard_serialize(
1969
+ VALUE self,
1970
+ VALUE preserve_newline
1971
+ )
1972
+ {
1973
+ xmlNodePtr node;
1974
+ Noko_Node_Get_Struct(self, xmlNode, node);
1975
+ VALUE output = rb_str_buf_new(4096);
1976
+ output_node(output, node, RTEST(preserve_newline));
1977
+ return output;
1978
+ }
1979
+
1980
+ /*
1981
+ * :call-seq:
1982
+ * line() → Integer
1983
+ *
1984
+ * [Returns] The line number of this Node.
1985
+ *
1986
+ * ---
1987
+ *
1988
+ * <b> ⚠ The CRuby and JRuby implementations differ in important ways! </b>
1989
+ *
1990
+ * Semantic differences:
1991
+ * - The CRuby method reflects the node's line number <i>in the parsed string</i>
1992
+ * - The JRuby method reflects the node's line number <i>in the final DOM structure</i> after
1993
+ * corrections have been applied
1994
+ *
1995
+ * Performance differences:
1996
+ * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time]
1997
+ * (constant time)
1998
+ * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear
1999
+ * time, where n is the number of nodes before/above the element in the DOM)
2000
+ *
2001
+ * If you'd like to help improve the JRuby implementation, please review these issues and reach out
2002
+ * to the maintainers:
2003
+ * - https://github.com/sparklemotion/nokogiri/issues/1223
2004
+ * - https://github.com/sparklemotion/nokogiri/pull/2177
2005
+ * - https://github.com/sparklemotion/nokogiri/issues/2380
2006
+ */
2007
+ static VALUE
2008
+ rb_xml_node_line(VALUE rb_node)
2009
+ {
2010
+ xmlNodePtr c_node;
2011
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2012
+
2013
+ return LONG2NUM(xmlGetLineNo(c_node));
2014
+ }
2015
+
2016
+ /*
2017
+ * call-seq:
2018
+ * line=(num)
2019
+ *
2020
+ * Sets the line for this Node. num must be less than 65535.
2021
+ */
2022
+ static VALUE
2023
+ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
2024
+ {
2025
+ xmlNodePtr c_node;
2026
+ int line_number = NUM2INT(rb_line_number);
2027
+
2028
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2029
+
2030
+ // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2031
+ // search for "psvi" in SAX2.c and tree.c to learn more.
2032
+ if (line_number < 65535) {
2033
+ c_node->line = (short) line_number;
2034
+ } else {
2035
+ c_node->line = 65535;
2036
+ if (c_node->type == XML_TEXT_NODE) {
2037
+ c_node->psvi = (void *)(ptrdiff_t) line_number;
2038
+ }
2039
+ }
2040
+
2041
+ return rb_line_number;
2042
+ }
2043
+
2044
+ /* :nodoc: documented in lib/nokogiri/xml/node.rb */
2045
+ static VALUE
2046
+ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
2047
+ {
2048
+ xmlNodePtr c_document_node;
2049
+ xmlNodePtr c_node;
2050
+ VALUE rb_name;
2051
+ VALUE rb_document_node;
2052
+ VALUE rest;
2053
+ VALUE rb_node;
2054
+
2055
+ rb_scan_args(argc, argv, "2*", &rb_name, &rb_document_node, &rest);
2056
+
2057
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlNode)) {
2058
+ rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
2059
+ }
2060
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
2061
+ // TODO: deprecate allowing Node
2062
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
2063
+ }
2064
+ Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
2065
+
2066
+ c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name));
2067
+ c_node->doc = c_document_node->doc;
2068
+ noko_xml_document_pin_node(c_node);
2069
+
2070
+ rb_node = noko_xml_node_wrap(
2071
+ klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
2072
+ c_node
2073
+ );
2074
+ rb_obj_call_init(rb_node, argc, argv);
2075
+
2076
+ if (rb_block_given_p()) { rb_yield(rb_node); }
2077
+
2078
+ return rb_node;
2079
+ }
2080
+
2081
+ /*
2082
+ * call-seq:
2083
+ * dump_html
2084
+ *
2085
+ * Returns the Node as html.
2086
+ */
2087
+ static VALUE
2088
+ dump_html(VALUE self)
2089
+ {
2090
+ xmlBufferPtr buf ;
2091
+ xmlNodePtr node ;
2092
+ VALUE html;
2093
+
2094
+ Noko_Node_Get_Struct(self, xmlNode, node);
2095
+
2096
+ buf = xmlBufferCreate() ;
2097
+ htmlNodeDump(buf, node->doc, node);
2098
+ html = NOKOGIRI_STR_NEW2(buf->content);
2099
+ xmlBufferFree(buf);
2100
+ return html ;
2101
+ }
2102
+
2103
+ /*
2104
+ * call-seq:
2105
+ * compare(other)
2106
+ *
2107
+ * Compare this Node to +other+ with respect to their Document
2108
+ */
2109
+ static VALUE
2110
+ compare(VALUE self, VALUE _other)
2111
+ {
2112
+ xmlNodePtr node, other;
2113
+ Noko_Node_Get_Struct(self, xmlNode, node);
2114
+ Noko_Node_Get_Struct(_other, xmlNode, other);
2115
+
2116
+ return INT2NUM(xmlXPathCmpNodes(other, node));
2117
+ }
2118
+
2119
+
2120
+ /*
2121
+ * call-seq:
2122
+ * process_xincludes(options)
2123
+ *
2124
+ * Loads and substitutes all xinclude elements below the node. The
2125
+ * parser context will be initialized with +options+.
2126
+ */
2127
+ static VALUE
2128
+ process_xincludes(VALUE self, VALUE options)
2129
+ {
2130
+ int rcode ;
2131
+ xmlNodePtr node;
2132
+ VALUE error_list = rb_ary_new();
2133
+
2134
+ Noko_Node_Get_Struct(self, xmlNode, node);
2135
+
2136
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
2137
+ rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
2138
+ xmlSetStructuredErrorFunc(NULL, NULL);
2139
+
2140
+ if (rcode < 0) {
2141
+ xmlErrorPtr error;
2142
+
2143
+ error = xmlGetLastError();
2144
+ if (error) {
2145
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
2146
+ } else {
2147
+ rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
2148
+ }
2149
+ }
2150
+
2151
+ return self;
2152
+ }
2153
+
2154
+
2155
+ /* TODO: DOCUMENT ME */
2156
+ static VALUE
2157
+ in_context(VALUE self, VALUE _str, VALUE _options)
2158
+ {
2159
+ xmlNodePtr node, list = 0, tmp, child_iter, node_children, doc_children;
2160
+ xmlNodeSetPtr set;
2161
+ xmlParserErrors error;
2162
+ VALUE doc, err;
2163
+ int doc_is_empty;
2164
+
2165
+ Noko_Node_Get_Struct(self, xmlNode, node);
2166
+
2167
+ doc = DOC_RUBY_OBJECT(node->doc);
2168
+ err = rb_iv_get(doc, "@errors");
2169
+ doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
2170
+ node_children = node->children;
2171
+ doc_children = node->doc->children;
2172
+
2173
+ xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
2174
+
2175
+ /* Twiddle global variable because of a bug in libxml2.
2176
+ * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2177
+ */
2178
+ #ifndef HTML_PARSE_NOIMPLIED
2179
+ htmlHandleOmittedElem(0);
2180
+ #endif
2181
+
2182
+ /* This function adds a fake node to the child of +node+. If the parser
2183
+ * does not exit cleanly with XML_ERR_OK, the list is freed. This can
2184
+ * leave the child pointers in a bad state if they were originally empty.
2185
+ *
2186
+ * http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
2187
+ * */
2188
+ error = xmlParseInNodeContext(node, StringValuePtr(_str),
2189
+ (int)RSTRING_LEN(_str),
2190
+ (int)NUM2INT(_options), &list);
2191
+
2192
+ /* xmlParseInNodeContext should not mutate the original document or node,
2193
+ * so reassigning these pointers should be OK. The reason we're reassigning
2194
+ * is because if there were errors, it's possible for the child pointers
2195
+ * to be manipulated. */
2196
+ if (error != XML_ERR_OK) {
2197
+ node->doc->children = doc_children;
2198
+ node->children = node_children;
2199
+ }
2200
+
2201
+ /* make sure parent/child pointers are coherent so an unlink will work
2202
+ * properly (#331)
2203
+ */
2204
+ child_iter = node->doc->children ;
2205
+ while (child_iter) {
2206
+ child_iter->parent = (xmlNodePtr)node->doc;
2207
+ child_iter = child_iter->next;
2208
+ }
2209
+
2210
+ #ifndef HTML_PARSE_NOIMPLIED
2211
+ htmlHandleOmittedElem(1);
2212
+ #endif
2213
+
2214
+ xmlSetStructuredErrorFunc(NULL, NULL);
2215
+
2216
+ /*
2217
+ * Workaround for a libxml2 bug where a parsing error may leave a broken
2218
+ * node reference in node->doc->children.
2219
+ *
2220
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2221
+ *
2222
+ * This workaround is limited to when a parse error occurs, the document
2223
+ * went from having no children to having children, and the context node is
2224
+ * part of a document fragment.
2225
+ *
2226
+ * TODO: This was fixed in libxml 2.8.0 by 71a243d
2227
+ */
2228
+ if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
2229
+ child_iter = node;
2230
+ while (child_iter->parent) {
2231
+ child_iter = child_iter->parent;
2232
+ }
2233
+
2234
+ if (child_iter->type == XML_DOCUMENT_FRAG_NODE) {
2235
+ node->doc->children = NULL;
2236
+ }
2237
+ }
2238
+
2239
+ /* FIXME: This probably needs to handle more constants... */
2240
+ switch (error) {
2241
+ case XML_ERR_INTERNAL_ERROR:
2242
+ case XML_ERR_NO_MEMORY:
2243
+ rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
2244
+ break;
2245
+ default:
2246
+ break;
2247
+ }
2248
+
2249
+ set = xmlXPathNodeSetCreate(NULL);
2250
+
2251
+ while (list) {
2252
+ tmp = list->next;
2253
+ list->next = NULL;
2254
+ xmlXPathNodeSetAddUnique(set, list);
2255
+ noko_xml_document_pin_node(list);
2256
+ list = tmp;
2257
+ }
2258
+
2259
+ return noko_xml_node_set_wrap(set, doc);
2260
+ }
2261
+
2262
+ VALUE
2263
+ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2264
+ {
2265
+ VALUE rb_document, rb_node_cache, rb_node;
2266
+ nokogiriTuplePtr node_has_a_document;
2267
+ xmlDocPtr c_doc;
2268
+
2269
+ assert(c_node);
2270
+
2271
+ if (c_node->type == XML_DOCUMENT_NODE || c_node->type == XML_HTML_DOCUMENT_NODE) {
2272
+ return DOC_RUBY_OBJECT(c_node->doc);
2273
+ }
2274
+
2275
+ c_doc = c_node->doc;
2276
+
2277
+ // Nodes yielded from XML::Reader don't have a fully-realized Document
2278
+ node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc);
2279
+
2280
+ if (c_node->_private && node_has_a_document) {
2281
+ return (VALUE)c_node->_private;
2282
+ }
2283
+
2284
+ if (!RTEST(rb_class)) {
2285
+ switch (c_node->type) {
2286
+ case XML_ELEMENT_NODE:
2287
+ rb_class = cNokogiriXmlElement;
2288
+ break;
2289
+ case XML_TEXT_NODE:
2290
+ rb_class = cNokogiriXmlText;
2291
+ break;
2292
+ case XML_ATTRIBUTE_NODE:
2293
+ rb_class = cNokogiriXmlAttr;
2294
+ break;
2295
+ case XML_ENTITY_REF_NODE:
2296
+ rb_class = cNokogiriXmlEntityReference;
2297
+ break;
2298
+ case XML_COMMENT_NODE:
2299
+ rb_class = cNokogiriXmlComment;
2300
+ break;
2301
+ case XML_DOCUMENT_FRAG_NODE:
2302
+ rb_class = cNokogiriXmlDocumentFragment;
2303
+ break;
2304
+ case XML_PI_NODE:
2305
+ rb_class = cNokogiriXmlProcessingInstruction;
2306
+ break;
2307
+ case XML_ENTITY_DECL:
2308
+ rb_class = cNokogiriXmlEntityDecl;
2309
+ break;
2310
+ case XML_CDATA_SECTION_NODE:
2311
+ rb_class = cNokogiriXmlCData;
2312
+ break;
2313
+ case XML_DTD_NODE:
2314
+ rb_class = cNokogiriXmlDtd;
2315
+ break;
2316
+ case XML_ATTRIBUTE_DECL:
2317
+ rb_class = cNokogiriXmlAttributeDecl;
2318
+ break;
2319
+ case XML_ELEMENT_DECL:
2320
+ rb_class = cNokogiriXmlElementDecl;
2321
+ break;
2322
+ default:
2323
+ rb_class = cNokogiriXmlNode;
2324
+ }
2325
+ }
2326
+
2327
+ rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ;
2328
+ c_node->_private = (void *)rb_node;
2329
+
2330
+ if (node_has_a_document) {
2331
+ rb_document = DOC_RUBY_OBJECT(c_doc);
2332
+ rb_node_cache = DOC_NODE_CACHE(c_doc);
2333
+ rb_ary_push(rb_node_cache, rb_node);
2334
+ rb_funcall(rb_document, id_decorate, 1, rb_node);
2335
+ }
2336
+
2337
+ return rb_node ;
2338
+ }
2339
+
2340
+
2341
+ /*
2342
+ * return Array<Nokogiri::XML::Attr> containing the node's attributes
2343
+ */
2344
+ VALUE
2345
+ noko_xml_node_attrs(xmlNodePtr c_node)
2346
+ {
2347
+ VALUE rb_properties = rb_ary_new();
2348
+ xmlAttrPtr c_property;
2349
+
2350
+ c_property = c_node->properties ;
2351
+ while (c_property != NULL) {
2352
+ rb_ary_push(rb_properties, noko_xml_node_wrap(Qnil, (xmlNodePtr)c_property));
2353
+ c_property = c_property->next ;
2354
+ }
2355
+
2356
+ return rb_properties;
2357
+ }
2358
+
2359
+ void
2360
+ noko_init_xml_node()
2361
+ {
2362
+ cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2363
+
2364
+ rb_undef_alloc_func(cNokogiriXmlNode);
2365
+
2366
+ rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2367
+
2368
+ rb_define_method(cNokogiriXmlNode, "add_namespace_definition", rb_xml_node_add_namespace_definition, 2);
2369
+ rb_define_method(cNokogiriXmlNode, "attribute", rb_xml_node_attribute, 1);
2370
+ rb_define_method(cNokogiriXmlNode, "attribute_nodes", rb_xml_node_attribute_nodes, 0);
2371
+ rb_define_method(cNokogiriXmlNode, "attribute_with_ns", rb_xml_node_attribute_with_ns, 2);
2372
+ rb_define_method(cNokogiriXmlNode, "blank?", rb_xml_node_blank_eh, 0);
2373
+ rb_define_method(cNokogiriXmlNode, "child", rb_xml_node_child, 0);
2374
+ rb_define_method(cNokogiriXmlNode, "children", rb_xml_node_children, 0);
2375
+ rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2376
+ rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2377
+ rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2378
+ rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2379
+ rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1);
2380
+ rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2381
+ rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2382
+ rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
2383
+ rb_define_method(cNokogiriXmlNode, "first_element_child", rb_xml_node_first_element_child, 0);
2384
+ rb_define_method(cNokogiriXmlNode, "internal_subset", internal_subset, 0);
2385
+ rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1);
2386
+ rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0);
2387
+ rb_define_method(cNokogiriXmlNode, "lang=", set_lang, 1);
2388
+ rb_define_method(cNokogiriXmlNode, "last_element_child", rb_xml_node_last_element_child, 0);
2389
+ rb_define_method(cNokogiriXmlNode, "line", rb_xml_node_line, 0);
2390
+ rb_define_method(cNokogiriXmlNode, "line=", rb_xml_node_line_set, 1);
2391
+ rb_define_method(cNokogiriXmlNode, "namespace", rb_xml_node_namespace, 0);
2392
+ rb_define_method(cNokogiriXmlNode, "namespace_definitions", namespace_definitions, 0);
2393
+ rb_define_method(cNokogiriXmlNode, "namespace_scopes", rb_xml_node_namespace_scopes, 0);
2394
+ rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2);
2395
+ rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1);
2396
+ rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0);
2397
+ rb_define_method(cNokogiriXmlNode, "next_sibling", next_sibling, 0);
2398
+ rb_define_method(cNokogiriXmlNode, "node_name", get_name, 0);
2399
+ rb_define_method(cNokogiriXmlNode, "node_name=", set_name, 1);
2400
+ rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0);
2401
+ rb_define_method(cNokogiriXmlNode, "parent", get_parent, 0);
2402
+ rb_define_method(cNokogiriXmlNode, "path", rb_xml_node_path, 0);
2403
+ rb_define_method(cNokogiriXmlNode, "pointer_id", rb_xml_node_pointer_id, 0);
2404
+ rb_define_method(cNokogiriXmlNode, "previous_element", previous_element, 0);
2405
+ rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2406
+ rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2407
+
2408
+ rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2409
+ rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2410
+ rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
2411
+ rb_define_private_method(cNokogiriXmlNode, "compare", compare, 1);
2412
+ rb_define_private_method(cNokogiriXmlNode, "dump_html", dump_html, 0);
2413
+ rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
2414
+ rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
2415
+ rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2416
+ rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2417
+ rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2418
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2419
+ rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2420
+ rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2421
+ rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
2422
+
2423
+ id_decorate = rb_intern("decorate");
2424
+ id_decorate_bang = rb_intern("decorate!");
2425
+ }