nokogiri 1.18.0.rc1-arm-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +502 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +297 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +49 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +324 -0
@@ -0,0 +1,2459 @@
1
+ #include <nokogiri.h>
2
+
3
+ #include <stdbool.h>
4
+
5
+ // :stopdoc:
6
+
7
+ VALUE cNokogiriXmlNode ;
8
+ static ID id_decorate, id_decorate_bang;
9
+
10
+ typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
11
+
12
+ static void
13
+ _xml_node_mark(void *ptr)
14
+ {
15
+ xmlNodePtr node = ptr;
16
+
17
+ if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
18
+ return;
19
+ }
20
+
21
+ xmlDocPtr doc = node->doc;
22
+ if (doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
23
+ if (DOC_RUBY_OBJECT_TEST(doc)) {
24
+ rb_gc_mark(DOC_RUBY_OBJECT(doc));
25
+ }
26
+ } else if (node->doc->_private) {
27
+ rb_gc_mark((VALUE)doc->_private);
28
+ }
29
+ }
30
+
31
+ static void
32
+ _xml_node_update_references(void *ptr)
33
+ {
34
+ xmlNodePtr node = ptr;
35
+
36
+ if (node->_private) {
37
+ node->_private = (void *)rb_gc_location((VALUE)node->_private);
38
+ }
39
+ }
40
+
41
+ static const rb_data_type_t xml_node_type = {
42
+ .wrap_struct_name = "xmlNode",
43
+ .function = {
44
+ .dmark = _xml_node_mark,
45
+ .dcompact = _xml_node_update_references,
46
+ },
47
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
48
+ };
49
+
50
+ static VALUE
51
+ _xml_node_alloc(VALUE klass)
52
+ {
53
+ return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
54
+ }
55
+
56
+ static void
57
+ _xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
58
+ {
59
+ assert(DATA_PTR(rb_node) == NULL);
60
+ assert(c_node->_private == NULL);
61
+
62
+ DATA_PTR(rb_node) = c_node;
63
+ c_node->_private = (void *)rb_node;
64
+
65
+ return;
66
+ }
67
+
68
+ static void
69
+ relink_namespace(xmlNodePtr reparented)
70
+ {
71
+ xmlNodePtr child;
72
+ xmlAttrPtr attr;
73
+
74
+ if (reparented->type != XML_ATTRIBUTE_NODE &&
75
+ reparented->type != XML_ELEMENT_NODE) { return; }
76
+
77
+ if (reparented->ns == NULL || reparented->ns->prefix == NULL) {
78
+ xmlNsPtr ns = NULL;
79
+ xmlChar *name = NULL, *prefix = NULL;
80
+
81
+ name = xmlSplitQName2(reparented->name, &prefix);
82
+
83
+ if (reparented->type == XML_ATTRIBUTE_NODE) {
84
+ if (prefix == NULL || strcmp((char *)prefix, XMLNS_PREFIX) == 0) {
85
+ xmlFree(name);
86
+ xmlFree(prefix);
87
+ return;
88
+ }
89
+ }
90
+
91
+ ns = xmlSearchNs(reparented->doc, reparented, prefix);
92
+
93
+ if (ns != NULL) {
94
+ xmlNodeSetName(reparented, name);
95
+ xmlSetNs(reparented, ns);
96
+ }
97
+
98
+ xmlFree(name);
99
+ xmlFree(prefix);
100
+ }
101
+
102
+ /* Avoid segv when relinking against unlinked nodes. */
103
+ if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; }
104
+
105
+ /* Make sure that our reparented node has the correct namespaces */
106
+ if (!reparented->ns &&
107
+ (reparented->doc != (xmlDocPtr)reparented->parent) &&
108
+ (rb_iv_get(DOC_RUBY_OBJECT(reparented->doc), "@namespace_inheritance") == Qtrue)) {
109
+ xmlSetNs(reparented, reparented->parent->ns);
110
+ }
111
+
112
+ /* Search our parents for an existing definition */
113
+ if (reparented->nsDef) {
114
+ xmlNsPtr curr = reparented->nsDef;
115
+ xmlNsPtr prev = NULL;
116
+
117
+ while (curr) {
118
+ xmlNsPtr ns = xmlSearchNsByHref(
119
+ reparented->doc,
120
+ reparented->parent,
121
+ curr->href
122
+ );
123
+ /* If we find the namespace is already declared, remove it from this
124
+ * definition list. */
125
+ if (ns && ns != curr && xmlStrEqual(ns->prefix, curr->prefix)) {
126
+ if (prev) {
127
+ prev->next = curr->next;
128
+ } else {
129
+ reparented->nsDef = curr->next;
130
+ }
131
+ noko_xml_document_pin_namespace(curr, reparented->doc);
132
+ } else {
133
+ prev = curr;
134
+ }
135
+ curr = curr->next;
136
+ }
137
+ }
138
+
139
+ /*
140
+ * Search our parents for an existing definition of current namespace,
141
+ * because the definition it's pointing to may have just been removed nsDef.
142
+ *
143
+ * And although that would technically probably be OK, I'd feel better if we
144
+ * referred to a namespace that's still present in a node's nsDef somewhere
145
+ * in the doc.
146
+ */
147
+ if (reparented->ns) {
148
+ xmlNsPtr ns = xmlSearchNs(reparented->doc, reparented, reparented->ns->prefix);
149
+ if (ns
150
+ && ns != reparented->ns
151
+ && xmlStrEqual(ns->prefix, reparented->ns->prefix)
152
+ && xmlStrEqual(ns->href, reparented->ns->href)
153
+ ) {
154
+ xmlSetNs(reparented, ns);
155
+ }
156
+ }
157
+
158
+ /* Only walk all children if there actually is a namespace we need to */
159
+ /* reparent. */
160
+ if (NULL == reparented->ns) { return; }
161
+
162
+ /* When a node gets reparented, walk its children to make sure that */
163
+ /* their namespaces are reparented as well. */
164
+ child = reparented->children;
165
+ while (NULL != child) {
166
+ relink_namespace(child);
167
+ child = child->next;
168
+ }
169
+
170
+ if (reparented->type == XML_ELEMENT_NODE) {
171
+ attr = reparented->properties;
172
+ while (NULL != attr) {
173
+ relink_namespace((xmlNodePtr)attr);
174
+ attr = attr->next;
175
+ }
176
+ }
177
+ }
178
+
179
+
180
+ /* internal function meant to wrap xmlReplaceNode
181
+ and fix some issues we have with libxml2 merging nodes */
182
+ static xmlNodePtr
183
+ xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
184
+ {
185
+ xmlNodePtr retval ;
186
+
187
+ retval = xmlReplaceNode(pivot, new_node) ;
188
+
189
+ if (retval == pivot) {
190
+ retval = new_node ; /* return semantics for reparent_node_with */
191
+ }
192
+
193
+ /* work around libxml2 issue: https://bugzilla.gnome.org/show_bug.cgi?id=615612 */
194
+ if (retval && retval->type == XML_TEXT_NODE) {
195
+ if (retval->prev && retval->prev->type == XML_TEXT_NODE) {
196
+ retval = xmlTextMerge(retval->prev, retval);
197
+ }
198
+ if (retval->next && retval->next->type == XML_TEXT_NODE) {
199
+ retval = xmlTextMerge(retval, retval->next);
200
+ }
201
+ }
202
+
203
+ return retval ;
204
+ }
205
+
206
+
207
+ static void
208
+ raise_if_ancestor_of_self(xmlNodePtr self)
209
+ {
210
+ for (xmlNodePtr ancestor = self->parent ; ancestor ; ancestor = ancestor->parent) {
211
+ if (self == ancestor) {
212
+ rb_raise(rb_eRuntimeError, "cycle detected: node '%s' is an ancestor of itself", self->name);
213
+ }
214
+ }
215
+ }
216
+
217
+
218
+ static VALUE
219
+ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
220
+ {
221
+ VALUE reparented_obj ;
222
+ xmlNodePtr reparentee, original_reparentee, pivot, reparented, next_text, new_next_text, parent ;
223
+ int original_ns_prefix_is_default = 0 ;
224
+
225
+ if (!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) {
226
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
227
+ }
228
+ if (rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument)) {
229
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
230
+ }
231
+
232
+ Noko_Node_Get_Struct(reparentee_obj, xmlNode, reparentee);
233
+ Noko_Node_Get_Struct(pivot_obj, xmlNode, pivot);
234
+
235
+ /*
236
+ * Check if nodes given are appropriate to have a parent-child
237
+ * relationship, based on the DOM specification.
238
+ *
239
+ * cf. http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/core.html#ID-1590626202
240
+ */
241
+ if (prf == xmlAddChild) {
242
+ parent = pivot;
243
+ } else {
244
+ parent = pivot->parent;
245
+ }
246
+
247
+ if (parent) {
248
+ switch (parent->type) {
249
+ case XML_DOCUMENT_NODE:
250
+ case XML_HTML_DOCUMENT_NODE:
251
+ switch (reparentee->type) {
252
+ case XML_ELEMENT_NODE:
253
+ case XML_PI_NODE:
254
+ case XML_COMMENT_NODE:
255
+ case XML_DOCUMENT_TYPE_NODE:
256
+ /*
257
+ * The DOM specification says no to adding text-like nodes
258
+ * directly to a document, but we allow it for compatibility.
259
+ */
260
+ case XML_TEXT_NODE:
261
+ case XML_CDATA_SECTION_NODE:
262
+ case XML_ENTITY_REF_NODE:
263
+ goto ok;
264
+ default:
265
+ break;
266
+ }
267
+ break;
268
+ case XML_DOCUMENT_FRAG_NODE:
269
+ case XML_ENTITY_REF_NODE:
270
+ case XML_ELEMENT_NODE:
271
+ switch (reparentee->type) {
272
+ case XML_ELEMENT_NODE:
273
+ case XML_PI_NODE:
274
+ case XML_COMMENT_NODE:
275
+ case XML_TEXT_NODE:
276
+ case XML_CDATA_SECTION_NODE:
277
+ case XML_ENTITY_REF_NODE:
278
+ goto ok;
279
+ default:
280
+ break;
281
+ }
282
+ break;
283
+ case XML_ATTRIBUTE_NODE:
284
+ switch (reparentee->type) {
285
+ case XML_TEXT_NODE:
286
+ case XML_ENTITY_REF_NODE:
287
+ goto ok;
288
+ default:
289
+ break;
290
+ }
291
+ break;
292
+ case XML_TEXT_NODE:
293
+ /*
294
+ * xmlAddChild() breaks the DOM specification in that it allows
295
+ * adding a text node to another, in which case text nodes are
296
+ * coalesced, but since our JRuby version does not support such
297
+ * operation, we should inhibit it.
298
+ */
299
+ break;
300
+ default:
301
+ break;
302
+ }
303
+
304
+ rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj));
305
+ }
306
+
307
+ ok:
308
+ original_reparentee = reparentee;
309
+
310
+ if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
311
+ /*
312
+ * if the reparentee is a text node, there's a very good chance it will be
313
+ * merged with an adjacent text node after being reparented, and in that case
314
+ * libxml will free the underlying C struct.
315
+ *
316
+ * since we clearly have a ruby object which references the underlying
317
+ * memory, we can't let the C struct get freed. let's pickle the original
318
+ * reparentee by rooting it; and then we'll reparent a duplicate of the
319
+ * node that we don't care about preserving.
320
+ *
321
+ * alternatively, if the reparentee is from a different document than the
322
+ * pivot node, libxml2 is going to get confused about which document's
323
+ * "dictionary" the node's strings belong to (this is an otherwise
324
+ * uninteresting libxml2 implementation detail). as a result, we cannot
325
+ * reparent the actual reparentee, so we reparent a duplicate.
326
+ */
327
+ if (reparentee->type == XML_TEXT_NODE && reparentee->_private) {
328
+ /*
329
+ * additionally, since we know this C struct isn't going to be related to
330
+ * a Ruby object anymore, let's break the relationship on this end as
331
+ * well.
332
+ *
333
+ * this is not absolutely necessary unless libxml-ruby is also in effect,
334
+ * in which case its global callback `rxml_node_deregisterNode` will try
335
+ * to do things to our data.
336
+ *
337
+ * for more details on this particular (and particularly nasty) edge
338
+ * case, see:
339
+ *
340
+ * https://github.com/sparklemotion/nokogiri/issues/1426
341
+ */
342
+ reparentee->_private = NULL ;
343
+ }
344
+
345
+ if (reparentee->ns != NULL && reparentee->ns->prefix == NULL) {
346
+ original_ns_prefix_is_default = 1;
347
+ }
348
+
349
+ noko_xml_document_pin_node(reparentee);
350
+
351
+ if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
352
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
353
+ }
354
+
355
+ if (original_ns_prefix_is_default && reparentee->ns != NULL && reparentee->ns->prefix != NULL) {
356
+ /*
357
+ * issue #391, where new node's prefix may become the string "default"
358
+ * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior.
359
+ */
360
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(reparentee->ns->prefix));
361
+ reparentee->ns->prefix = NULL;
362
+ }
363
+ }
364
+
365
+ xmlUnlinkNode(original_reparentee);
366
+
367
+ if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling && prf != xmlAddChild
368
+ && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
369
+ /*
370
+ * libxml merges text nodes in a right-to-left fashion, meaning that if
371
+ * there are two text nodes who would be adjacent, the right (or following,
372
+ * or next) node will be merged into the left (or preceding, or previous)
373
+ * node.
374
+ *
375
+ * and by "merged" I mean the string contents will be concatenated onto the
376
+ * left node's contents, and then the node will be freed.
377
+ *
378
+ * which means that if we have a ruby object wrapped around the right node,
379
+ * its memory would be freed out from under it.
380
+ *
381
+ * so, we detect this edge case and unlink-and-root the text node before it gets
382
+ * merged. then we dup the node and insert that duplicate back into the
383
+ * document where the real node was.
384
+ *
385
+ * yes, this is totally lame.
386
+ */
387
+ next_text = pivot->next ;
388
+ new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;
389
+
390
+ xmlUnlinkNode(next_text);
391
+ noko_xml_document_pin_node(next_text);
392
+
393
+ xmlAddNextSibling(pivot, new_next_text);
394
+ }
395
+
396
+ if (!(reparented = (*prf)(pivot, reparentee))) {
397
+ rb_raise(rb_eRuntimeError, "Could not reparent node");
398
+ }
399
+
400
+ /*
401
+ * make sure the ruby object is pointed at the just-reparented node, which
402
+ * might be a duplicate (see above) or might be the result of merging
403
+ * adjacent text nodes.
404
+ */
405
+ DATA_PTR(reparentee_obj) = reparented ;
406
+ reparented_obj = noko_xml_node_wrap(Qnil, reparented);
407
+
408
+ rb_funcall(reparented_obj, id_decorate_bang, 0);
409
+
410
+ /* if we've created a cycle, raise an exception */
411
+ raise_if_ancestor_of_self(reparented);
412
+
413
+ relink_namespace(reparented);
414
+
415
+ return reparented_obj ;
416
+ }
417
+
418
+ // :startdoc:
419
+
420
+ /*
421
+ * :call-seq:
422
+ * add_namespace_definition(prefix, href) → Nokogiri::XML::Namespace
423
+ * add_namespace(prefix, href) → Nokogiri::XML::Namespace
424
+ *
425
+ * :category: Manipulating Document Structure
426
+ *
427
+ * Adds a namespace definition to this node with +prefix+ using +href+ value, as if this node had
428
+ * included an attribute "xmlns:prefix=href".
429
+ *
430
+ * A default namespace definition for this node can be added by passing +nil+ for +prefix+.
431
+ *
432
+ * [Parameters]
433
+ * - +prefix+ (String, +nil+) An {XML Name}[https://www.w3.org/TR/xml-names/#ns-decl]
434
+ * - +href+ (String) The {URI reference}[https://www.w3.org/TR/xml-names/#sec-namespaces]
435
+ *
436
+ * [Returns] The new Nokogiri::XML::Namespace
437
+ *
438
+ * *Example:* adding a non-default namespace definition
439
+ *
440
+ * doc = Nokogiri::XML("<store><inventory></inventory></store>")
441
+ * inventory = doc.at_css("inventory")
442
+ * inventory.add_namespace_definition("automobile", "http://alices-autos.com/")
443
+ * inventory.add_namespace_definition("bicycle", "http://bobs-bikes.com/")
444
+ * inventory.add_child("<automobile:tire>Michelin model XGV, size 75R</automobile:tire>")
445
+ * doc.to_xml
446
+ * # => "<?xml version=\"1.0\"?>\n" +
447
+ * # "<store>\n" +
448
+ * # " <inventory xmlns:automobile=\"http://alices-autos.com/\" xmlns:bicycle=\"http://bobs-bikes.com/\">\n" +
449
+ * # " <automobile:tire>Michelin model XGV, size 75R</automobile:tire>\n" +
450
+ * # " </inventory>\n" +
451
+ * # "</store>\n"
452
+ *
453
+ * *Example:* adding a default namespace definition
454
+ *
455
+ * doc = Nokogiri::XML("<store><inventory><tire>Michelin model XGV, size 75R</tire></inventory></store>")
456
+ * doc.at_css("tire").add_namespace_definition(nil, "http://bobs-bikes.com/")
457
+ * doc.to_xml
458
+ * # => "<?xml version=\"1.0\"?>\n" +
459
+ * # "<store>\n" +
460
+ * # " <inventory>\n" +
461
+ * # " <tire xmlns=\"http://bobs-bikes.com/\">Michelin model XGV, size 75R</tire>\n" +
462
+ * # " </inventory>\n" +
463
+ * # "</store>\n"
464
+ *
465
+ */
466
+ static VALUE
467
+ rb_xml_node_add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_href)
468
+ {
469
+ xmlNodePtr c_node, element;
470
+ xmlNsPtr c_namespace;
471
+ const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix));
472
+
473
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
474
+ element = c_node ;
475
+
476
+ c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix);
477
+
478
+ if (!c_namespace) {
479
+ if (c_node->type != XML_ELEMENT_NODE) {
480
+ element = c_node->parent;
481
+ }
482
+ c_namespace = xmlNewNs(element, (const xmlChar *)StringValueCStr(rb_href), c_prefix);
483
+ }
484
+
485
+ if (!c_namespace) {
486
+ return Qnil ;
487
+ }
488
+
489
+ if (NIL_P(rb_prefix) || c_node != element) {
490
+ xmlSetNs(c_node, c_namespace);
491
+ }
492
+
493
+ return noko_xml_namespace_wrap(c_namespace, c_node->doc);
494
+ }
495
+
496
+
497
+ /*
498
+ * :call-seq: attribute(name) → Nokogiri::XML::Attr
499
+ *
500
+ * :category: Working With Node Attributes
501
+ *
502
+ * [Returns] Attribute (Nokogiri::XML::Attr) belonging to this node with name +name+.
503
+ *
504
+ * ⚠ Note that attribute namespaces are ignored and only the simple (non-namespace-prefixed) name is
505
+ * used to find a matching attribute. In case of a simple name collision, only one of the matching
506
+ * attributes will be returned. In this case, you will need to use #attribute_with_ns.
507
+ *
508
+ * *Example:*
509
+ *
510
+ * doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
511
+ * child = doc.at_css("child")
512
+ * child.attribute("size") # => #<Nokogiri::XML::Attr:0x550 name="size" value="large">
513
+ * child.attribute("class") # => #<Nokogiri::XML::Attr:0x564 name="class" value="big wide tall">
514
+ *
515
+ * *Example* showing that namespaced attributes will not be returned:
516
+ *
517
+ * ⚠ Note that only one of the two matching attributes is returned.
518
+ *
519
+ * doc = Nokogiri::XML(<<~EOF)
520
+ * <root xmlns:width='http://example.com/widths'
521
+ * xmlns:height='http://example.com/heights'>
522
+ * <child width:size='broad' height:size='tall'/>
523
+ * </root>
524
+ * EOF
525
+ * doc.at_css("child").attribute("size")
526
+ * # => #(Attr:0x550 {
527
+ * # name = "size",
528
+ * # namespace = #(Namespace:0x564 {
529
+ * # prefix = "width",
530
+ * # href = "http://example.com/widths"
531
+ * # }),
532
+ * # value = "broad"
533
+ * # })
534
+ */
535
+ static VALUE
536
+ rb_xml_node_attribute(VALUE self, VALUE name)
537
+ {
538
+ xmlNodePtr node;
539
+ xmlAttrPtr prop;
540
+ Noko_Node_Get_Struct(self, xmlNode, node);
541
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name));
542
+
543
+ if (! prop) { return Qnil; }
544
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
545
+ }
546
+
547
+
548
+ /*
549
+ * :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
550
+ *
551
+ * :category: Working With Node Attributes
552
+ *
553
+ * [Returns] Attributes (an Array of Nokogiri::XML::Attr) belonging to this node.
554
+ *
555
+ * Note that this is the preferred alternative to #attributes when the simple
556
+ * (non-namespace-prefixed) attribute names may collide.
557
+ *
558
+ * *Example:*
559
+ *
560
+ * Contrast this with the colliding-name example from #attributes.
561
+ *
562
+ * doc = Nokogiri::XML(<<~EOF)
563
+ * <root xmlns:width='http://example.com/widths'
564
+ * xmlns:height='http://example.com/heights'>
565
+ * <child width:size='broad' height:size='tall'/>
566
+ * </root>
567
+ * EOF
568
+ * doc.at_css("child").attribute_nodes
569
+ * # => [#(Attr:0x550 {
570
+ * # name = "size",
571
+ * # namespace = #(Namespace:0x564 {
572
+ * # prefix = "width",
573
+ * # href = "http://example.com/widths"
574
+ * # }),
575
+ * # value = "broad"
576
+ * # }),
577
+ * # #(Attr:0x578 {
578
+ * # name = "size",
579
+ * # namespace = #(Namespace:0x58c {
580
+ * # prefix = "height",
581
+ * # href = "http://example.com/heights"
582
+ * # }),
583
+ * # value = "tall"
584
+ * # })]
585
+ */
586
+ static VALUE
587
+ rb_xml_node_attribute_nodes(VALUE rb_node)
588
+ {
589
+ xmlNodePtr c_node;
590
+
591
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
592
+
593
+ return noko_xml_node_attrs(c_node);
594
+ }
595
+
596
+
597
+ /*
598
+ * :call-seq: attribute_with_ns(name, namespace) → Nokogiri::XML::Attr
599
+ *
600
+ * :category: Working With Node Attributes
601
+ *
602
+ * [Returns]
603
+ * Attribute (Nokogiri::XML::Attr) belonging to this node with matching +name+ and +namespace+.
604
+ *
605
+ * [Parameters]
606
+ * - +name+ (String): the simple (non-namespace-prefixed) name of the attribute
607
+ * - +namespace+ (String): the URI of the attribute's namespace
608
+ *
609
+ * See related: #attribute
610
+ *
611
+ * *Example:*
612
+ *
613
+ * doc = Nokogiri::XML(<<~EOF)
614
+ * <root xmlns:width='http://example.com/widths'
615
+ * xmlns:height='http://example.com/heights'>
616
+ * <child width:size='broad' height:size='tall'/>
617
+ * </root>
618
+ * EOF
619
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/widths")
620
+ * # => #(Attr:0x550 {
621
+ * # name = "size",
622
+ * # namespace = #(Namespace:0x564 {
623
+ * # prefix = "width",
624
+ * # href = "http://example.com/widths"
625
+ * # }),
626
+ * # value = "broad"
627
+ * # })
628
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/heights")
629
+ * # => #(Attr:0x578 {
630
+ * # name = "size",
631
+ * # namespace = #(Namespace:0x58c {
632
+ * # prefix = "height",
633
+ * # href = "http://example.com/heights"
634
+ * # }),
635
+ * # value = "tall"
636
+ * # })
637
+ */
638
+ static VALUE
639
+ rb_xml_node_attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
640
+ {
641
+ xmlNodePtr node;
642
+ xmlAttrPtr prop;
643
+ Noko_Node_Get_Struct(self, xmlNode, node);
644
+ prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name),
645
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace));
646
+
647
+ if (! prop) { return Qnil; }
648
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
649
+ }
650
+
651
+
652
+
653
+ /*
654
+ * call-seq: blank? → Boolean
655
+ *
656
+ * [Returns] +true+ if the node is an empty or whitespace-only text or cdata node, else +false+.
657
+ *
658
+ * *Example:*
659
+ *
660
+ * Nokogiri("<root><child/></root>").root.child.blank? # => false
661
+ * Nokogiri("<root>\t \n</root>").root.child.blank? # => true
662
+ * Nokogiri("<root><![CDATA[\t \n]]></root>").root.child.blank? # => true
663
+ * Nokogiri("<root>not-blank</root>").root.child
664
+ * .tap { |n| n.content = "" }.blank # => true
665
+ */
666
+ static VALUE
667
+ rb_xml_node_blank_eh(VALUE self)
668
+ {
669
+ xmlNodePtr node;
670
+ Noko_Node_Get_Struct(self, xmlNode, node);
671
+ return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
672
+ }
673
+
674
+
675
+ /*
676
+ * :call-seq: child() → Nokogiri::XML::Node
677
+ *
678
+ * :category: Traversing Document Structure
679
+ *
680
+ * [Returns] First of this node's children, or +nil+ if there are no children
681
+ *
682
+ * This is a convenience method and is equivalent to:
683
+ *
684
+ * node.children.first
685
+ *
686
+ * See related: #children
687
+ */
688
+ static VALUE
689
+ rb_xml_node_child(VALUE self)
690
+ {
691
+ xmlNodePtr node, child;
692
+ Noko_Node_Get_Struct(self, xmlNode, node);
693
+
694
+ child = node->children;
695
+ if (!child) { return Qnil; }
696
+
697
+ return noko_xml_node_wrap(Qnil, child);
698
+ }
699
+
700
+
701
+ /*
702
+ * :call-seq: children() → Nokogiri::XML::NodeSet
703
+ *
704
+ * :category: Traversing Document Structure
705
+ *
706
+ * [Returns] Nokogiri::XML::NodeSet containing this node's children.
707
+ */
708
+ static VALUE
709
+ rb_xml_node_children(VALUE self)
710
+ {
711
+ xmlNodePtr node;
712
+ xmlNodePtr child;
713
+ xmlNodeSetPtr set;
714
+ VALUE document;
715
+ VALUE node_set;
716
+
717
+ Noko_Node_Get_Struct(self, xmlNode, node);
718
+
719
+ child = node->children;
720
+ set = xmlXPathNodeSetCreate(child);
721
+
722
+ document = DOC_RUBY_OBJECT(node->doc);
723
+
724
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
725
+
726
+ child = child->next;
727
+ while (NULL != child) {
728
+ xmlXPathNodeSetAddUnique(set, child);
729
+ child = child->next;
730
+ }
731
+
732
+ node_set = noko_xml_node_set_wrap(set, document);
733
+
734
+ return node_set;
735
+ }
736
+
737
+
738
+ /*
739
+ * :call-seq:
740
+ * content() → String
741
+ * inner_text() → String
742
+ * text() → String
743
+ * to_str() → String
744
+ *
745
+ * [Returns]
746
+ * Contents of all the text nodes in this node's subtree, concatenated together into a single
747
+ * String.
748
+ *
749
+ * ⚠ Note that entities will _always_ be expanded in the returned String.
750
+ *
751
+ * See related: #inner_html
752
+ *
753
+ * *Example* of how entities are handled:
754
+ *
755
+ * Note that <tt>&lt;</tt> becomes <tt><</tt> in the returned String.
756
+ *
757
+ * doc = Nokogiri::XML.fragment("<child>a &lt; b</child>")
758
+ * doc.at_css("child").content
759
+ * # => "a < b"
760
+ *
761
+ * *Example* of how a subtree is handled:
762
+ *
763
+ * Note that the <tt><span></tt> tags are omitted and only the text node contents are returned,
764
+ * concatenated into a single string.
765
+ *
766
+ * doc = Nokogiri::XML.fragment("<child><span>first</span> <span>second</span></child>")
767
+ * doc.at_css("child").content
768
+ * # => "first second"
769
+ */
770
+ static VALUE
771
+ rb_xml_node_content(VALUE self)
772
+ {
773
+ xmlNodePtr node;
774
+ xmlChar *content;
775
+
776
+ Noko_Node_Get_Struct(self, xmlNode, node);
777
+
778
+ content = xmlNodeGetContent(node);
779
+ if (content) {
780
+ VALUE rval = NOKOGIRI_STR_NEW2(content);
781
+ xmlFree(content);
782
+ return rval;
783
+ }
784
+ return Qnil;
785
+ }
786
+
787
+
788
+ /*
789
+ * :call-seq: document() → Nokogiri::XML::Document
790
+ *
791
+ * :category: Traversing Document Structure
792
+ *
793
+ * [Returns] Parent Nokogiri::XML::Document for this node
794
+ */
795
+ static VALUE
796
+ rb_xml_node_document(VALUE self)
797
+ {
798
+ xmlNodePtr node;
799
+ Noko_Node_Get_Struct(self, xmlNode, node);
800
+ return DOC_RUBY_OBJECT(node->doc);
801
+ }
802
+
803
+ /*
804
+ * :call-seq: pointer_id() → Integer
805
+ *
806
+ * [Returns]
807
+ * A unique id for this node based on the internal memory structures. This method is used by #==
808
+ * to determine node identity.
809
+ */
810
+ static VALUE
811
+ rb_xml_node_pointer_id(VALUE self)
812
+ {
813
+ xmlNodePtr node;
814
+ Noko_Node_Get_Struct(self, xmlNode, node);
815
+
816
+ return rb_uint2inum((uintptr_t)(node));
817
+ }
818
+
819
+ /*
820
+ * :call-seq: encode_special_chars(string) → String
821
+ *
822
+ * Encode any special characters in +string+
823
+ */
824
+ static VALUE
825
+ encode_special_chars(VALUE self, VALUE string)
826
+ {
827
+ xmlNodePtr node;
828
+ xmlChar *encoded;
829
+ VALUE encoded_str;
830
+
831
+ Noko_Node_Get_Struct(self, xmlNode, node);
832
+ encoded = xmlEncodeSpecialChars(
833
+ node->doc,
834
+ (const xmlChar *)StringValueCStr(string)
835
+ );
836
+
837
+ encoded_str = NOKOGIRI_STR_NEW2(encoded);
838
+ xmlFree(encoded);
839
+
840
+ return encoded_str;
841
+ }
842
+
843
+ /*
844
+ * :call-seq:
845
+ * create_internal_subset(name, external_id, system_id)
846
+ *
847
+ * Create the internal subset of a document.
848
+ *
849
+ * doc.create_internal_subset("chapter", "-//OASIS//DTD DocBook XML//EN", "chapter.dtd")
850
+ * # => <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML//EN" "chapter.dtd">
851
+ *
852
+ * doc.create_internal_subset("chapter", nil, "chapter.dtd")
853
+ * # => <!DOCTYPE chapter SYSTEM "chapter.dtd">
854
+ */
855
+ static VALUE
856
+ create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
857
+ {
858
+ xmlNodePtr node;
859
+ xmlDocPtr doc;
860
+ xmlDtdPtr dtd;
861
+
862
+ Noko_Node_Get_Struct(self, xmlNode, node);
863
+
864
+ doc = node->doc;
865
+
866
+ if (xmlGetIntSubset(doc)) {
867
+ rb_raise(rb_eRuntimeError, "Document already has an internal subset");
868
+ }
869
+
870
+ dtd = xmlCreateIntSubset(
871
+ doc,
872
+ NIL_P(name) ? NULL : (const xmlChar *)StringValueCStr(name),
873
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValueCStr(external_id),
874
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
875
+ );
876
+
877
+ if (!dtd) { return Qnil; }
878
+
879
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
880
+ }
881
+
882
+ /*
883
+ * :call-seq:
884
+ * create_external_subset(name, external_id, system_id)
885
+ *
886
+ * Create an external subset
887
+ */
888
+ static VALUE
889
+ create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
890
+ {
891
+ xmlNodePtr node;
892
+ xmlDocPtr doc;
893
+ xmlDtdPtr dtd;
894
+
895
+ Noko_Node_Get_Struct(self, xmlNode, node);
896
+
897
+ doc = node->doc;
898
+
899
+ if (doc->extSubset) {
900
+ rb_raise(rb_eRuntimeError, "Document already has an external subset");
901
+ }
902
+
903
+ dtd = xmlNewDtd(
904
+ doc,
905
+ NIL_P(name) ? NULL : (const xmlChar *)StringValueCStr(name),
906
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValueCStr(external_id),
907
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
908
+ );
909
+
910
+ if (!dtd) { return Qnil; }
911
+
912
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
913
+ }
914
+
915
+ /*
916
+ * :call-seq:
917
+ * external_subset()
918
+ *
919
+ * Get the external subset
920
+ */
921
+ static VALUE
922
+ external_subset(VALUE self)
923
+ {
924
+ xmlNodePtr node;
925
+ xmlDocPtr doc;
926
+ xmlDtdPtr dtd;
927
+
928
+ Noko_Node_Get_Struct(self, xmlNode, node);
929
+
930
+ if (!node->doc) { return Qnil; }
931
+
932
+ doc = node->doc;
933
+ dtd = doc->extSubset;
934
+
935
+ if (!dtd) { return Qnil; }
936
+
937
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
938
+ }
939
+
940
+ /*
941
+ * :call-seq:
942
+ * internal_subset()
943
+ *
944
+ * Get the internal subset
945
+ */
946
+ static VALUE
947
+ internal_subset(VALUE self)
948
+ {
949
+ xmlNodePtr node;
950
+ xmlDocPtr doc;
951
+ xmlDtdPtr dtd;
952
+
953
+ Noko_Node_Get_Struct(self, xmlNode, node);
954
+
955
+ if (!node->doc) { return Qnil; }
956
+
957
+ doc = node->doc;
958
+ dtd = xmlGetIntSubset(doc);
959
+
960
+ if (!dtd) { return Qnil; }
961
+
962
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
963
+ }
964
+
965
+ /* :nodoc: */
966
+ static VALUE
967
+ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
968
+ {
969
+ xmlNodePtr c_self, c_other;
970
+ int c_level;
971
+ xmlDocPtr c_new_parent_doc;
972
+ VALUE rb_node_cache;
973
+
974
+ Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
975
+ c_level = (int)NUM2INT(rb_level);
976
+ c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
977
+
978
+ c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
979
+ if (c_self == NULL) { return Qnil; }
980
+
981
+ _xml_node_data_ptr_set(rb_self, c_self);
982
+ noko_xml_document_pin_node(c_self);
983
+
984
+ rb_node_cache = DOC_NODE_CACHE(c_new_parent_doc);
985
+ rb_ary_push(rb_node_cache, rb_self);
986
+ rb_funcall(rb_new_parent_doc, id_decorate, 1, rb_self);
987
+
988
+ return rb_self;
989
+ }
990
+
991
+ /*
992
+ * :call-seq:
993
+ * unlink() → self
994
+ *
995
+ * Unlink this node from its current context.
996
+ */
997
+ static VALUE
998
+ unlink_node(VALUE self)
999
+ {
1000
+ xmlNodePtr node;
1001
+ Noko_Node_Get_Struct(self, xmlNode, node);
1002
+ xmlUnlinkNode(node);
1003
+ noko_xml_document_pin_node(node);
1004
+ return self;
1005
+ }
1006
+
1007
+
1008
+ /*
1009
+ * call-seq:
1010
+ * next_sibling
1011
+ *
1012
+ * Returns the next sibling node
1013
+ */
1014
+ static VALUE
1015
+ next_sibling(VALUE self)
1016
+ {
1017
+ xmlNodePtr node, sibling;
1018
+ Noko_Node_Get_Struct(self, xmlNode, node);
1019
+
1020
+ sibling = node->next;
1021
+ if (!sibling) { return Qnil; }
1022
+
1023
+ return noko_xml_node_wrap(Qnil, sibling) ;
1024
+ }
1025
+
1026
+ /*
1027
+ * call-seq:
1028
+ * previous_sibling
1029
+ *
1030
+ * Returns the previous sibling node
1031
+ */
1032
+ static VALUE
1033
+ previous_sibling(VALUE self)
1034
+ {
1035
+ xmlNodePtr node, sibling;
1036
+ Noko_Node_Get_Struct(self, xmlNode, node);
1037
+
1038
+ sibling = node->prev;
1039
+ if (!sibling) { return Qnil; }
1040
+
1041
+ return noko_xml_node_wrap(Qnil, sibling);
1042
+ }
1043
+
1044
+ /*
1045
+ * call-seq:
1046
+ * next_element
1047
+ *
1048
+ * Returns the next Nokogiri::XML::Element type sibling node.
1049
+ */
1050
+ static VALUE
1051
+ next_element(VALUE self)
1052
+ {
1053
+ xmlNodePtr node, sibling;
1054
+ Noko_Node_Get_Struct(self, xmlNode, node);
1055
+
1056
+ sibling = xmlNextElementSibling(node);
1057
+ if (!sibling) { return Qnil; }
1058
+
1059
+ return noko_xml_node_wrap(Qnil, sibling);
1060
+ }
1061
+
1062
+ /*
1063
+ * call-seq:
1064
+ * previous_element
1065
+ *
1066
+ * Returns the previous Nokogiri::XML::Element type sibling node.
1067
+ */
1068
+ static VALUE
1069
+ previous_element(VALUE self)
1070
+ {
1071
+ xmlNodePtr node, sibling;
1072
+ Noko_Node_Get_Struct(self, xmlNode, node);
1073
+
1074
+ sibling = xmlPreviousElementSibling(node);
1075
+ if (!sibling) { return Qnil; }
1076
+
1077
+ return noko_xml_node_wrap(Qnil, sibling);
1078
+ }
1079
+
1080
+ /* :nodoc: */
1081
+ static VALUE
1082
+ replace(VALUE self, VALUE new_node)
1083
+ {
1084
+ VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
1085
+
1086
+ xmlNodePtr pivot;
1087
+ Noko_Node_Get_Struct(self, xmlNode, pivot);
1088
+ noko_xml_document_pin_node(pivot);
1089
+
1090
+ return reparent;
1091
+ }
1092
+
1093
+ /*
1094
+ * :call-seq:
1095
+ * element_children() → NodeSet
1096
+ * elements() → NodeSet
1097
+ *
1098
+ * [Returns]
1099
+ * The node's child elements as a NodeSet. Only children that are elements will be returned, which
1100
+ * notably excludes Text nodes.
1101
+ *
1102
+ * *Example:*
1103
+ *
1104
+ * Note that #children returns the Text node "hello" while #element_children does not.
1105
+ *
1106
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1107
+ * div.element_children
1108
+ * # => [#<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
1109
+ * div.children
1110
+ * # => [#<Nokogiri::XML::Text:0x64 "hello">,
1111
+ * # #<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
1112
+ */
1113
+ static VALUE
1114
+ rb_xml_node_element_children(VALUE self)
1115
+ {
1116
+ xmlNodePtr node;
1117
+ xmlNodePtr child;
1118
+ xmlNodeSetPtr set;
1119
+ VALUE document;
1120
+ VALUE node_set;
1121
+
1122
+ Noko_Node_Get_Struct(self, xmlNode, node);
1123
+
1124
+ child = xmlFirstElementChild(node);
1125
+ set = xmlXPathNodeSetCreate(child);
1126
+
1127
+ document = DOC_RUBY_OBJECT(node->doc);
1128
+
1129
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
1130
+
1131
+ child = xmlNextElementSibling(child);
1132
+ while (NULL != child) {
1133
+ xmlXPathNodeSetAddUnique(set, child);
1134
+ child = xmlNextElementSibling(child);
1135
+ }
1136
+
1137
+ node_set = noko_xml_node_set_wrap(set, document);
1138
+
1139
+ return node_set;
1140
+ }
1141
+
1142
+ /*
1143
+ * :call-seq:
1144
+ * first_element_child() → Node
1145
+ *
1146
+ * [Returns] The first child Node that is an element.
1147
+ *
1148
+ * *Example:*
1149
+ *
1150
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span></tt> element is
1151
+ * returned.
1152
+ *
1153
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1154
+ * div.first_element_child
1155
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "world")] })
1156
+ */
1157
+ static VALUE
1158
+ rb_xml_node_first_element_child(VALUE self)
1159
+ {
1160
+ xmlNodePtr node, child;
1161
+ Noko_Node_Get_Struct(self, xmlNode, node);
1162
+
1163
+ child = xmlFirstElementChild(node);
1164
+ if (!child) { return Qnil; }
1165
+
1166
+ return noko_xml_node_wrap(Qnil, child);
1167
+ }
1168
+
1169
+ /*
1170
+ * :call-seq:
1171
+ * last_element_child() → Node
1172
+ *
1173
+ * [Returns] The last child Node that is an element.
1174
+ *
1175
+ * *Example:*
1176
+ *
1177
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span>yes</span></tt>
1178
+ * element is returned.
1179
+ *
1180
+ * div = Nokogiri::HTML5("<div><span>no</span><span>yes</span>skip</div>").at_css("div")
1181
+ * div.last_element_child
1182
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "yes")] })
1183
+ */
1184
+ static VALUE
1185
+ rb_xml_node_last_element_child(VALUE self)
1186
+ {
1187
+ xmlNodePtr node, child;
1188
+ Noko_Node_Get_Struct(self, xmlNode, node);
1189
+
1190
+ child = xmlLastElementChild(node);
1191
+ if (!child) { return Qnil; }
1192
+
1193
+ return noko_xml_node_wrap(Qnil, child);
1194
+ }
1195
+
1196
+ /*
1197
+ * call-seq:
1198
+ * key?(attribute)
1199
+ *
1200
+ * Returns true if +attribute+ is set
1201
+ */
1202
+ static VALUE
1203
+ key_eh(VALUE self, VALUE attribute)
1204
+ {
1205
+ xmlNodePtr node;
1206
+ Noko_Node_Get_Struct(self, xmlNode, node);
1207
+ if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) {
1208
+ return Qtrue;
1209
+ }
1210
+ return Qfalse;
1211
+ }
1212
+
1213
+ /*
1214
+ * call-seq:
1215
+ * namespaced_key?(attribute, namespace)
1216
+ *
1217
+ * Returns true if +attribute+ is set with +namespace+
1218
+ */
1219
+ static VALUE
1220
+ namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
1221
+ {
1222
+ xmlNodePtr node;
1223
+ Noko_Node_Get_Struct(self, xmlNode, node);
1224
+ if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
1225
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) {
1226
+ return Qtrue;
1227
+ }
1228
+ return Qfalse;
1229
+ }
1230
+
1231
+ /*
1232
+ * call-seq:
1233
+ * []=(property, value)
1234
+ *
1235
+ * Set the +property+ to +value+
1236
+ */
1237
+ static VALUE
1238
+ set(VALUE self, VALUE property, VALUE value)
1239
+ {
1240
+ xmlNodePtr node, cur;
1241
+ xmlAttrPtr prop;
1242
+ Noko_Node_Get_Struct(self, xmlNode, node);
1243
+
1244
+ /* If a matching attribute node already exists, then xmlSetProp will destroy
1245
+ * the existing node's children. However, if Nokogiri has a node object
1246
+ * pointing to one of those children, we are left with a broken reference.
1247
+ *
1248
+ * We can avoid this by unlinking these nodes first.
1249
+ */
1250
+ if (node->type != XML_ELEMENT_NODE) {
1251
+ return (Qnil);
1252
+ }
1253
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(property));
1254
+ if (prop && prop->children) {
1255
+ for (cur = prop->children; cur; cur = cur->next) {
1256
+ if (cur->_private) {
1257
+ noko_xml_document_pin_node(cur);
1258
+ xmlUnlinkNode(cur);
1259
+ }
1260
+ }
1261
+ }
1262
+
1263
+ xmlSetProp(node, (xmlChar *)StringValueCStr(property),
1264
+ (xmlChar *)StringValueCStr(value));
1265
+
1266
+ return value;
1267
+ }
1268
+
1269
+ /*
1270
+ * call-seq:
1271
+ * get(attribute)
1272
+ *
1273
+ * Get the value for +attribute+
1274
+ */
1275
+ static VALUE
1276
+ get(VALUE self, VALUE rattribute)
1277
+ {
1278
+ xmlNodePtr node;
1279
+ xmlChar *value = 0;
1280
+ VALUE rvalue;
1281
+ xmlChar *colon;
1282
+ xmlChar *attribute, *attr_name, *prefix;
1283
+ xmlNsPtr ns;
1284
+
1285
+ if (NIL_P(rattribute)) { return Qnil; }
1286
+
1287
+ Noko_Node_Get_Struct(self, xmlNode, node);
1288
+ attribute = xmlCharStrdup(StringValueCStr(rattribute));
1289
+
1290
+ colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':'));
1291
+ if (colon) {
1292
+ /* split the attribute string into separate prefix and name by
1293
+ * null-terminating the prefix at the colon */
1294
+ prefix = attribute;
1295
+ attr_name = colon + 1;
1296
+ (*colon) = 0;
1297
+
1298
+ ns = xmlSearchNs(node->doc, node, prefix);
1299
+ if (ns) {
1300
+ value = xmlGetNsProp(node, attr_name, ns->href);
1301
+ } else {
1302
+ value = xmlGetProp(node, (xmlChar *)StringValueCStr(rattribute));
1303
+ }
1304
+ } else {
1305
+ value = xmlGetNoNsProp(node, attribute);
1306
+ }
1307
+
1308
+ xmlFree((void *)attribute);
1309
+ if (!value) { return Qnil; }
1310
+
1311
+ rvalue = NOKOGIRI_STR_NEW2(value);
1312
+ xmlFree((void *)value);
1313
+
1314
+ return rvalue ;
1315
+ }
1316
+
1317
+ /*
1318
+ * call-seq:
1319
+ * set_namespace(namespace)
1320
+ *
1321
+ * Set the namespace to +namespace+
1322
+ */
1323
+ static VALUE
1324
+ set_namespace(VALUE self, VALUE namespace)
1325
+ {
1326
+ xmlNodePtr node;
1327
+ xmlNsPtr ns = NULL;
1328
+
1329
+ Noko_Node_Get_Struct(self, xmlNode, node);
1330
+
1331
+ if (!NIL_P(namespace)) {
1332
+ Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
1333
+ }
1334
+
1335
+ xmlSetNs(node, ns);
1336
+
1337
+ return self;
1338
+ }
1339
+
1340
+ /*
1341
+ * :call-seq:
1342
+ * namespace() → Namespace
1343
+ *
1344
+ * [Returns] The Namespace of the element or attribute node, or +nil+ if there is no namespace.
1345
+ *
1346
+ * *Example:*
1347
+ *
1348
+ * doc = Nokogiri::XML(<<~EOF)
1349
+ * <root>
1350
+ * <first/>
1351
+ * <second xmlns="http://example.com/child"/>
1352
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1353
+ * </root>
1354
+ * EOF
1355
+ * doc.at_xpath("//first").namespace
1356
+ * # => nil
1357
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace
1358
+ * # => #(Namespace:0x3c { href = "http://example.com/child" })
1359
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace
1360
+ * # => #(Namespace:0x50 { prefix = "foo", href = "http://example.com/foo" })
1361
+ */
1362
+ static VALUE
1363
+ rb_xml_node_namespace(VALUE rb_node)
1364
+ {
1365
+ xmlNodePtr c_node ;
1366
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1367
+
1368
+ if (c_node->ns) {
1369
+ return noko_xml_namespace_wrap(c_node->ns, c_node->doc);
1370
+ }
1371
+
1372
+ return Qnil ;
1373
+ }
1374
+
1375
+ /*
1376
+ * :call-seq:
1377
+ * namespace_definitions() → Array<Nokogiri::XML::Namespace>
1378
+ *
1379
+ * [Returns]
1380
+ * Namespaces that are defined directly on this node, as an Array of Namespace objects. The array
1381
+ * will be empty if no namespaces are defined on this node.
1382
+ *
1383
+ * *Example:*
1384
+ *
1385
+ * doc = Nokogiri::XML(<<~EOF)
1386
+ * <root xmlns="http://example.com/root">
1387
+ * <first/>
1388
+ * <second xmlns="http://example.com/child" xmlns:unused="http://example.com/unused"/>
1389
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1390
+ * </root>
1391
+ * EOF
1392
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_definitions
1393
+ * # => []
1394
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace_definitions
1395
+ * # => [#(Namespace:0x3c { href = "http://example.com/child" }),
1396
+ * # #(Namespace:0x50 {
1397
+ * # prefix = "unused",
1398
+ * # href = "http://example.com/unused"
1399
+ * # })]
1400
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace_definitions
1401
+ * # => [#(Namespace:0x64 { prefix = "foo", href = "http://example.com/foo" })]
1402
+ */
1403
+ static VALUE
1404
+ namespace_definitions(VALUE rb_node)
1405
+ {
1406
+ /* this code in the mode of xmlHasProp() */
1407
+ xmlNodePtr c_node ;
1408
+ xmlNsPtr c_namespace;
1409
+ VALUE definitions = rb_ary_new();
1410
+
1411
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1412
+
1413
+ c_namespace = c_node->nsDef;
1414
+ if (!c_namespace) {
1415
+ return definitions;
1416
+ }
1417
+
1418
+ while (c_namespace != NULL) {
1419
+ rb_ary_push(definitions, noko_xml_namespace_wrap(c_namespace, c_node->doc));
1420
+ c_namespace = c_namespace->next;
1421
+ }
1422
+
1423
+ return definitions;
1424
+ }
1425
+
1426
+ /*
1427
+ * :call-seq:
1428
+ * namespace_scopes() → Array<Nokogiri::XML::Namespace>
1429
+ *
1430
+ * [Returns] Array of all the Namespaces on this node and its ancestors.
1431
+ *
1432
+ * See also #namespaces
1433
+ *
1434
+ * *Example:*
1435
+ *
1436
+ * doc = Nokogiri::XML(<<~EOF)
1437
+ * <root xmlns="http://example.com/root" xmlns:bar="http://example.com/bar">
1438
+ * <first/>
1439
+ * <second xmlns="http://example.com/child"/>
1440
+ * <third xmlns:foo="http://example.com/foo"/>
1441
+ * </root>
1442
+ * EOF
1443
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes
1444
+ * # => [#(Namespace:0x3c { href = "http://example.com/root" }),
1445
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1446
+ * doc.at_xpath("//child:second", "child" => "http://example.com/child").namespace_scopes
1447
+ * # => [#(Namespace:0x64 { href = "http://example.com/child" }),
1448
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1449
+ * doc.at_xpath("//root:third", "root" => "http://example.com/root").namespace_scopes
1450
+ * # => [#(Namespace:0x78 { prefix = "foo", href = "http://example.com/foo" }),
1451
+ * # #(Namespace:0x3c { href = "http://example.com/root" }),
1452
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1453
+ */
1454
+ static VALUE
1455
+ rb_xml_node_namespace_scopes(VALUE rb_node)
1456
+ {
1457
+ xmlNodePtr c_node ;
1458
+ xmlNsPtr *namespaces;
1459
+ VALUE scopes = rb_ary_new();
1460
+ int j;
1461
+
1462
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1463
+
1464
+ namespaces = xmlGetNsList(c_node->doc, c_node);
1465
+ if (!namespaces) {
1466
+ return scopes;
1467
+ }
1468
+
1469
+ for (j = 0 ; namespaces[j] != NULL ; ++j) {
1470
+ rb_ary_push(scopes, noko_xml_namespace_wrap(namespaces[j], c_node->doc));
1471
+ }
1472
+
1473
+ xmlFree(namespaces);
1474
+ return scopes;
1475
+ }
1476
+
1477
+ /*
1478
+ * call-seq:
1479
+ * node_type
1480
+ *
1481
+ * Get the type for this Node
1482
+ */
1483
+ static VALUE
1484
+ node_type(VALUE self)
1485
+ {
1486
+ xmlNodePtr node;
1487
+ Noko_Node_Get_Struct(self, xmlNode, node);
1488
+ return INT2NUM(node->type);
1489
+ }
1490
+
1491
+ /*
1492
+ * call-seq:
1493
+ * native_content=(input)
1494
+ *
1495
+ * Set the content of this node to +input+.
1496
+ *
1497
+ * [Parameters]
1498
+ * - +input+ (String) The new content for this node.
1499
+ *
1500
+ * ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
1501
+ * ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
1502
+ * will contain the entity-escaped version of the input (see example below). For Element and Attr
1503
+ * nodes, it treats the input as parsed content and expects it to be valid markup that is already
1504
+ * entity-escaped.
1505
+ *
1506
+ * 💡 Use Node#content= for a more consistent API across node types.
1507
+ *
1508
+ * [Example]
1509
+ * Note the behavior differences of this method between Text and Element nodes:
1510
+ *
1511
+ * doc = Nokogiri::HTML::Document.parse(<<~HTML)
1512
+ * <html>
1513
+ * <body>
1514
+ * <div id="first">asdf</div>
1515
+ * <div id="second">asdf</div>
1516
+ * HTML
1517
+ *
1518
+ * text_node = doc.at_css("div#first").children.first
1519
+ * div_node = doc.at_css("div#second")
1520
+ *
1521
+ * value = "You &amp; Me"
1522
+ *
1523
+ * text_node.native_content = value
1524
+ * div_node.native_content = value
1525
+ *
1526
+ * doc.css("div").to_html
1527
+ * # => "<div id=\"first\">You &amp;amp; Me</div>
1528
+ * # <div id=\"second\">You &amp; Me</div>"
1529
+ *
1530
+ * See also: #content=
1531
+ */
1532
+ static VALUE
1533
+ set_native_content(VALUE self, VALUE content)
1534
+ {
1535
+ xmlNodePtr node, child, next ;
1536
+ Noko_Node_Get_Struct(self, xmlNode, node);
1537
+
1538
+ child = node->children;
1539
+ while (NULL != child) {
1540
+ next = child->next ;
1541
+ xmlUnlinkNode(child) ;
1542
+ noko_xml_document_pin_node(child);
1543
+ child = next ;
1544
+ }
1545
+
1546
+ xmlNodeSetContent(node, (xmlChar *)StringValueCStr(content));
1547
+ return content;
1548
+ }
1549
+
1550
+ /*
1551
+ * call-seq:
1552
+ * lang=
1553
+ *
1554
+ * Set the language of a node, i.e. the values of the xml:lang attribute.
1555
+ */
1556
+ static VALUE
1557
+ set_lang(VALUE self_rb, VALUE lang_rb)
1558
+ {
1559
+ xmlNodePtr self ;
1560
+ xmlChar *lang ;
1561
+
1562
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1563
+ lang = (xmlChar *)StringValueCStr(lang_rb);
1564
+
1565
+ xmlNodeSetLang(self, lang);
1566
+
1567
+ return Qnil ;
1568
+ }
1569
+
1570
+ /*
1571
+ * call-seq:
1572
+ * lang
1573
+ *
1574
+ * Searches the language of a node, i.e. the values of the xml:lang attribute or
1575
+ * the one carried by the nearest ancestor.
1576
+ */
1577
+ static VALUE
1578
+ get_lang(VALUE self_rb)
1579
+ {
1580
+ xmlNodePtr self ;
1581
+ xmlChar *lang ;
1582
+ VALUE lang_rb ;
1583
+
1584
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1585
+
1586
+ lang = xmlNodeGetLang(self);
1587
+ if (lang) {
1588
+ lang_rb = NOKOGIRI_STR_NEW2(lang);
1589
+ xmlFree(lang);
1590
+ return lang_rb ;
1591
+ }
1592
+
1593
+ return Qnil ;
1594
+ }
1595
+
1596
+ /* :nodoc: */
1597
+ static VALUE
1598
+ add_child(VALUE self, VALUE new_child)
1599
+ {
1600
+ return reparent_node_with(self, new_child, xmlAddChild);
1601
+ }
1602
+
1603
+ /*
1604
+ * call-seq:
1605
+ * parent
1606
+ *
1607
+ * Get the parent Node for this Node
1608
+ */
1609
+ static VALUE
1610
+ get_parent(VALUE self)
1611
+ {
1612
+ xmlNodePtr node, parent;
1613
+ Noko_Node_Get_Struct(self, xmlNode, node);
1614
+
1615
+ parent = node->parent;
1616
+ if (!parent) { return Qnil; }
1617
+
1618
+ return noko_xml_node_wrap(Qnil, parent) ;
1619
+ }
1620
+
1621
+ /*
1622
+ * call-seq:
1623
+ * name=(new_name)
1624
+ *
1625
+ * Set the name for this Node
1626
+ */
1627
+ static VALUE
1628
+ set_name(VALUE self, VALUE new_name)
1629
+ {
1630
+ xmlNodePtr node;
1631
+ Noko_Node_Get_Struct(self, xmlNode, node);
1632
+ xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name));
1633
+ return new_name;
1634
+ }
1635
+
1636
+ /*
1637
+ * call-seq:
1638
+ * name
1639
+ *
1640
+ * Returns the name for this Node
1641
+ */
1642
+ static VALUE
1643
+ get_name(VALUE self)
1644
+ {
1645
+ xmlNodePtr node;
1646
+ Noko_Node_Get_Struct(self, xmlNode, node);
1647
+ if (node->name) {
1648
+ return NOKOGIRI_STR_NEW2(node->name);
1649
+ }
1650
+ return Qnil;
1651
+ }
1652
+
1653
+ /*
1654
+ * call-seq:
1655
+ * path
1656
+ *
1657
+ * Returns the path associated with this Node
1658
+ */
1659
+ static VALUE
1660
+ rb_xml_node_path(VALUE rb_node)
1661
+ {
1662
+ xmlNodePtr c_node;
1663
+ xmlChar *c_path ;
1664
+ VALUE rval;
1665
+
1666
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1667
+
1668
+ c_path = xmlGetNodePath(c_node);
1669
+ if (c_path == NULL) {
1670
+ // see https://github.com/sparklemotion/nokogiri/issues/2250
1671
+ // this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we
1672
+ // do this for now to preserve the behavior across libxml2 versions.
1673
+ rval = NOKOGIRI_STR_NEW2("?");
1674
+ } else {
1675
+ rval = NOKOGIRI_STR_NEW2(c_path);
1676
+ xmlFree(c_path);
1677
+ }
1678
+
1679
+ return rval ;
1680
+ }
1681
+
1682
+ /* :nodoc: */
1683
+ static VALUE
1684
+ add_next_sibling(VALUE self, VALUE new_sibling)
1685
+ {
1686
+ return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
1687
+ }
1688
+
1689
+ /* :nodoc: */
1690
+ static VALUE
1691
+ add_previous_sibling(VALUE self, VALUE new_sibling)
1692
+ {
1693
+ return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
1694
+ }
1695
+
1696
+ /*
1697
+ * call-seq:
1698
+ * native_write_to(io, encoding, options)
1699
+ *
1700
+ * Write this Node to +io+ with +encoding+ and +options+
1701
+ */
1702
+ static VALUE
1703
+ native_write_to(
1704
+ VALUE self,
1705
+ VALUE io,
1706
+ VALUE encoding,
1707
+ VALUE indent_string,
1708
+ VALUE options
1709
+ )
1710
+ {
1711
+ xmlNodePtr node;
1712
+ const char *before_indent;
1713
+ xmlSaveCtxtPtr savectx;
1714
+
1715
+ Noko_Node_Get_Struct(self, xmlNode, node);
1716
+
1717
+ xmlIndentTreeOutput = 1;
1718
+
1719
+ before_indent = xmlTreeIndentString;
1720
+
1721
+ xmlTreeIndentString = StringValueCStr(indent_string);
1722
+
1723
+ savectx = xmlSaveToIO(
1724
+ (xmlOutputWriteCallback)noko_io_write,
1725
+ (xmlOutputCloseCallback)noko_io_close,
1726
+ (void *)io,
1727
+ RTEST(encoding) ? StringValueCStr(encoding) : NULL,
1728
+ (int)NUM2INT(options)
1729
+ );
1730
+
1731
+ xmlSaveTree(savectx, node);
1732
+ xmlSaveClose(savectx);
1733
+
1734
+ xmlTreeIndentString = before_indent;
1735
+ return io;
1736
+ }
1737
+
1738
+
1739
+ static inline void
1740
+ output_partial_string(VALUE out, char const *str, size_t length)
1741
+ {
1742
+ if (length) {
1743
+ rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
1744
+ }
1745
+ }
1746
+
1747
+ static inline void
1748
+ output_char(VALUE out, char ch)
1749
+ {
1750
+ output_partial_string(out, &ch, 1);
1751
+ }
1752
+
1753
+ static inline void
1754
+ output_string(VALUE out, char const *str)
1755
+ {
1756
+ output_partial_string(out, str, strlen(str));
1757
+ }
1758
+
1759
+ static inline void
1760
+ output_tagname(VALUE out, xmlNodePtr elem)
1761
+ {
1762
+ // Elements in the HTML, MathML, and SVG namespaces do not use a namespace
1763
+ // prefix in the HTML syntax.
1764
+ char const *name = (char const *)elem->name;
1765
+ xmlNsPtr ns = elem->ns;
1766
+ if (ns && ns->href && ns->prefix
1767
+ && strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
1768
+ && strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
1769
+ && strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
1770
+ output_string(out, (char const *)elem->ns->prefix);
1771
+ output_char(out, ':');
1772
+ char const *colon = strchr(name, ':');
1773
+ if (colon) {
1774
+ name = colon + 1;
1775
+ }
1776
+ }
1777
+ output_string(out, name);
1778
+ }
1779
+
1780
+ static inline void
1781
+ output_attr_name(VALUE out, xmlAttrPtr attr)
1782
+ {
1783
+ xmlNsPtr ns = attr->ns;
1784
+ char const *name = (char const *)attr->name;
1785
+ if (ns && ns->href) {
1786
+ char const *uri = (char const *)ns->href;
1787
+ char const *localname = strchr(name, ':');
1788
+ if (localname) {
1789
+ ++localname;
1790
+ } else {
1791
+ localname = name;
1792
+ }
1793
+
1794
+ if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
1795
+ output_string(out, "xml:");
1796
+ name = localname;
1797
+ } else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
1798
+ // xmlns:xmlns -> xmlns
1799
+ // xmlns:foo -> xmlns:foo
1800
+ if (strcmp(localname, "xmlns")) {
1801
+ output_string(out, "xmlns:");
1802
+ }
1803
+ name = localname;
1804
+ } else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
1805
+ output_string(out, "xlink:");
1806
+ name = localname;
1807
+ } else if (ns->prefix) {
1808
+ output_string(out, (char const *)ns->prefix);
1809
+ output_char(out, ':');
1810
+ name = localname;
1811
+ }
1812
+ }
1813
+ output_string(out, name);
1814
+ }
1815
+
1816
+ static void
1817
+ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1818
+ {
1819
+ xmlChar const *next = start;
1820
+ int ch;
1821
+
1822
+ while ((ch = *next) != 0) {
1823
+ char const *replacement = NULL;
1824
+ size_t replaced_bytes = 1;
1825
+ if (ch == '&') {
1826
+ replacement = "&amp;";
1827
+ } else if (ch == 0xC2 && next[1] == 0xA0) {
1828
+ // U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
1829
+ replacement = "&nbsp;";
1830
+ replaced_bytes = 2;
1831
+ } else if (attr && ch == '"') {
1832
+ replacement = "&quot;";
1833
+ } else if (!attr && ch == '<') {
1834
+ replacement = "&lt;";
1835
+ } else if (!attr && ch == '>') {
1836
+ replacement = "&gt;";
1837
+ } else {
1838
+ ++next;
1839
+ continue;
1840
+ }
1841
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1842
+ output_string(out, replacement);
1843
+ next += replaced_bytes;
1844
+ start = next;
1845
+ }
1846
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1847
+ }
1848
+
1849
+ static bool
1850
+ should_prepend_newline(xmlNodePtr node)
1851
+ {
1852
+ char const *name = (char const *)node->name;
1853
+ xmlNodePtr child = node->children;
1854
+
1855
+ if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
1856
+ return false;
1857
+ }
1858
+
1859
+ return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
1860
+ }
1861
+
1862
+ static VALUE
1863
+ rb_prepend_newline(VALUE self)
1864
+ {
1865
+ xmlNodePtr node;
1866
+ Noko_Node_Get_Struct(self, xmlNode, node);
1867
+ return should_prepend_newline(node) ? Qtrue : Qfalse;
1868
+ }
1869
+
1870
+ static bool
1871
+ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1872
+ {
1873
+ char const *name = (char const *)node->name;
1874
+ if (name == NULL) { // fragments don't have a name
1875
+ return false;
1876
+ }
1877
+
1878
+ if (node->ns != NULL) {
1879
+ // if the node has a namespace, it's in a foreign context and is not one of the HTML tags we're
1880
+ // matching against.
1881
+ return false;
1882
+ }
1883
+
1884
+ for (size_t idx = 0; idx < num_tagnames; ++idx) {
1885
+ if (!strcmp(name, tagnames[idx])) {
1886
+ return true;
1887
+ }
1888
+ }
1889
+ return false;
1890
+ }
1891
+
1892
+ static void
1893
+ output_node(
1894
+ VALUE out,
1895
+ xmlNodePtr node,
1896
+ bool preserve_newline
1897
+ )
1898
+ {
1899
+ static char const *const VOID_ELEMENTS[] = {
1900
+ "area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
1901
+ "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
1902
+ };
1903
+
1904
+ static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
1905
+ "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
1906
+ };
1907
+
1908
+ switch (node->type) {
1909
+ case XML_ELEMENT_NODE:
1910
+ // Serialize the start tag.
1911
+ output_char(out, '<');
1912
+ output_tagname(out, node);
1913
+
1914
+ // Add attributes.
1915
+ for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1916
+ output_char(out, ' ');
1917
+ output_node(out, (xmlNodePtr)attr, preserve_newline);
1918
+ }
1919
+ output_char(out, '>');
1920
+
1921
+ // Add children and end tag if element is not void.
1922
+ if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
1923
+ if (preserve_newline && should_prepend_newline(node)) {
1924
+ output_char(out, '\n');
1925
+ }
1926
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1927
+ output_node(out, child, preserve_newline);
1928
+ }
1929
+ output_string(out, "</");
1930
+ output_tagname(out, node);
1931
+ output_char(out, '>');
1932
+ }
1933
+ break;
1934
+
1935
+ case XML_ATTRIBUTE_NODE: {
1936
+ xmlAttrPtr attr = (xmlAttrPtr)node;
1937
+ output_attr_name(out, attr);
1938
+ if (attr->children) {
1939
+ output_string(out, "=\"");
1940
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1941
+ output_escaped_string(out, value, true);
1942
+ xmlFree(value);
1943
+ output_char(out, '"');
1944
+ } else {
1945
+ // Output name=""
1946
+ output_string(out, "=\"\"");
1947
+ }
1948
+ }
1949
+ break;
1950
+
1951
+ case XML_TEXT_NODE:
1952
+ if (node->parent
1953
+ && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
1954
+ sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
1955
+ output_string(out, (char const *)node->content);
1956
+ } else {
1957
+ output_escaped_string(out, node->content, false);
1958
+ }
1959
+ break;
1960
+
1961
+ case XML_CDATA_SECTION_NODE:
1962
+ output_string(out, "<![CDATA[");
1963
+ output_string(out, (char const *)node->content);
1964
+ output_string(out, "]]>");
1965
+ break;
1966
+
1967
+ case XML_COMMENT_NODE:
1968
+ output_string(out, "<!--");
1969
+ output_string(out, (char const *)node->content);
1970
+ output_string(out, "-->");
1971
+ break;
1972
+
1973
+ case XML_PI_NODE:
1974
+ output_string(out, "<?");
1975
+ output_string(out, (char const *)node->content);
1976
+ output_char(out, '>');
1977
+ break;
1978
+
1979
+ case XML_DOCUMENT_TYPE_NODE:
1980
+ case XML_DTD_NODE:
1981
+ output_string(out, "<!DOCTYPE ");
1982
+ output_string(out, (char const *)node->name);
1983
+ output_string(out, ">");
1984
+ break;
1985
+
1986
+ case XML_DOCUMENT_NODE:
1987
+ case XML_DOCUMENT_FRAG_NODE:
1988
+ case XML_HTML_DOCUMENT_NODE:
1989
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1990
+ output_node(out, child, preserve_newline);
1991
+ }
1992
+ break;
1993
+
1994
+ default:
1995
+ rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
1996
+ break;
1997
+ }
1998
+ }
1999
+
2000
+ static VALUE
2001
+ html_standard_serialize(
2002
+ VALUE self,
2003
+ VALUE preserve_newline
2004
+ )
2005
+ {
2006
+ xmlNodePtr node;
2007
+ Noko_Node_Get_Struct(self, xmlNode, node);
2008
+ VALUE output = rb_str_buf_new(4096);
2009
+ output_node(output, node, RTEST(preserve_newline));
2010
+ return output;
2011
+ }
2012
+
2013
+ /*
2014
+ * :call-seq:
2015
+ * line() → Integer
2016
+ *
2017
+ * [Returns] The line number of this Node.
2018
+ *
2019
+ * ---
2020
+ *
2021
+ * <b> ⚠ The CRuby and JRuby implementations differ in important ways! </b>
2022
+ *
2023
+ * Semantic differences:
2024
+ * - The CRuby method reflects the node's line number <i>in the parsed string</i>
2025
+ * - The JRuby method reflects the node's line number <i>in the final DOM structure</i> after
2026
+ * corrections have been applied
2027
+ *
2028
+ * Performance differences:
2029
+ * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time]
2030
+ * (constant time)
2031
+ * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear
2032
+ * time, where n is the number of nodes before/above the element in the DOM)
2033
+ *
2034
+ * If you'd like to help improve the JRuby implementation, please review these issues and reach out
2035
+ * to the maintainers:
2036
+ * - https://github.com/sparklemotion/nokogiri/issues/1223
2037
+ * - https://github.com/sparklemotion/nokogiri/pull/2177
2038
+ * - https://github.com/sparklemotion/nokogiri/issues/2380
2039
+ */
2040
+ static VALUE
2041
+ rb_xml_node_line(VALUE rb_node)
2042
+ {
2043
+ xmlNodePtr c_node;
2044
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2045
+
2046
+ return LONG2NUM(xmlGetLineNo(c_node));
2047
+ }
2048
+
2049
+ /*
2050
+ * call-seq:
2051
+ * line=(num)
2052
+ *
2053
+ * Sets the line for this Node. num must be less than 65535.
2054
+ */
2055
+ static VALUE
2056
+ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
2057
+ {
2058
+ xmlNodePtr c_node;
2059
+ int line_number = NUM2INT(rb_line_number);
2060
+
2061
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2062
+
2063
+ // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2064
+ // search for "psvi" in SAX2.c and tree.c to learn more.
2065
+ if (line_number < 65535) {
2066
+ c_node->line = (short unsigned)line_number;
2067
+ } else {
2068
+ c_node->line = 65535;
2069
+ if (c_node->type == XML_TEXT_NODE) {
2070
+ c_node->psvi = (void *)(ptrdiff_t)line_number;
2071
+ }
2072
+ }
2073
+
2074
+ return rb_line_number;
2075
+ }
2076
+
2077
+ /* :nodoc: documented in lib/nokogiri/xml/node.rb */
2078
+ static VALUE
2079
+ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
2080
+ {
2081
+ xmlNodePtr c_document_node;
2082
+ xmlNodePtr c_node;
2083
+ VALUE rb_name;
2084
+ VALUE rb_document_node;
2085
+ VALUE rest;
2086
+ VALUE rb_node;
2087
+
2088
+ rb_scan_args(argc, argv, "2*", &rb_name, &rb_document_node, &rest);
2089
+
2090
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlNode)) {
2091
+ rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
2092
+ }
2093
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
2094
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
2095
+ }
2096
+ Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
2097
+
2098
+ c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name));
2099
+ c_node->doc = c_document_node->doc;
2100
+ noko_xml_document_pin_node(c_node);
2101
+
2102
+ rb_node = noko_xml_node_wrap(
2103
+ klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
2104
+ c_node
2105
+ );
2106
+ rb_obj_call_init(rb_node, argc, argv);
2107
+
2108
+ if (rb_block_given_p()) { rb_yield(rb_node); }
2109
+
2110
+ return rb_node;
2111
+ }
2112
+
2113
+ /*
2114
+ * call-seq:
2115
+ * dump_html
2116
+ *
2117
+ * Returns the Node as html.
2118
+ */
2119
+ static VALUE
2120
+ dump_html(VALUE self)
2121
+ {
2122
+ xmlBufferPtr buf ;
2123
+ xmlNodePtr node ;
2124
+ VALUE html;
2125
+
2126
+ Noko_Node_Get_Struct(self, xmlNode, node);
2127
+
2128
+ buf = xmlBufferCreate() ;
2129
+ htmlNodeDump(buf, node->doc, node);
2130
+ html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
2131
+ xmlBufferFree(buf);
2132
+ return html ;
2133
+ }
2134
+
2135
+ /*
2136
+ * call-seq:
2137
+ * compare(other)
2138
+ *
2139
+ * Compare this Node to +other+ with respect to their Document
2140
+ */
2141
+ static VALUE
2142
+ compare(VALUE self, VALUE _other)
2143
+ {
2144
+ xmlNodePtr node, other;
2145
+ Noko_Node_Get_Struct(self, xmlNode, node);
2146
+ Noko_Node_Get_Struct(_other, xmlNode, other);
2147
+
2148
+ return INT2NUM(xmlXPathCmpNodes(other, node));
2149
+ }
2150
+
2151
+
2152
+ /*
2153
+ * call-seq:
2154
+ * process_xincludes(flags)
2155
+ *
2156
+ * Loads and substitutes all xinclude elements below the node. The
2157
+ * parser context will be initialized with +flags+.
2158
+ */
2159
+ static VALUE
2160
+ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2161
+ {
2162
+ int status ;
2163
+ xmlNodePtr c_node;
2164
+ VALUE rb_errors = rb_ary_new();
2165
+ libxmlStructuredErrorHandlerState handler_state;
2166
+
2167
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2168
+
2169
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
2170
+
2171
+ status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
2172
+
2173
+ noko__structured_error_func_restore(&handler_state);
2174
+
2175
+ if (status < 0) {
2176
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
2177
+
2178
+ if (RB_TEST(exception)) {
2179
+ rb_exc_raise(exception);
2180
+ } else {
2181
+ rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
2182
+ }
2183
+ }
2184
+
2185
+ return rb_node;
2186
+ }
2187
+
2188
+
2189
+ /* TODO: DOCUMENT ME */
2190
+ static VALUE
2191
+ in_context(VALUE self, VALUE _str, VALUE _options)
2192
+ {
2193
+ xmlNodePtr node, list = 0, tmp, child_iter, node_children, doc_children;
2194
+ xmlNodeSetPtr set;
2195
+ xmlParserErrors error;
2196
+ VALUE doc, err;
2197
+ int doc_is_empty;
2198
+
2199
+ Noko_Node_Get_Struct(self, xmlNode, node);
2200
+
2201
+ doc = DOC_RUBY_OBJECT(node->doc);
2202
+ err = rb_iv_get(doc, "@errors");
2203
+ doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
2204
+ node_children = node->children;
2205
+ doc_children = node->doc->children;
2206
+
2207
+ xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
2208
+
2209
+ /* This function adds a fake node to the child of +node+. If the parser
2210
+ * does not exit cleanly with XML_ERR_OK, the list is freed. This can
2211
+ * leave the child pointers in a bad state if they were originally empty.
2212
+ *
2213
+ * http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
2214
+ * */
2215
+ error = xmlParseInNodeContext(node, StringValuePtr(_str),
2216
+ (int)RSTRING_LEN(_str),
2217
+ (int)NUM2INT(_options), &list);
2218
+
2219
+ /* xmlParseInNodeContext should not mutate the original document or node,
2220
+ * so reassigning these pointers should be OK. The reason we're reassigning
2221
+ * is because if there were errors, it's possible for the child pointers
2222
+ * to be manipulated. */
2223
+ if (error != XML_ERR_OK) {
2224
+ node->doc->children = doc_children;
2225
+ node->children = node_children;
2226
+ }
2227
+
2228
+ /* make sure parent/child pointers are coherent so an unlink will work
2229
+ * properly (#331)
2230
+ */
2231
+ child_iter = node->doc->children ;
2232
+ while (child_iter) {
2233
+ child_iter->parent = (xmlNodePtr)node->doc;
2234
+ child_iter = child_iter->next;
2235
+ }
2236
+
2237
+ xmlSetStructuredErrorFunc(NULL, NULL);
2238
+
2239
+ /*
2240
+ * Workaround for a libxml2 bug where a parsing error may leave a broken
2241
+ * node reference in node->doc->children.
2242
+ *
2243
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2244
+ *
2245
+ * This workaround is limited to when a parse error occurs, the document
2246
+ * went from having no children to having children, and the context node is
2247
+ * part of a document fragment.
2248
+ *
2249
+ * TODO: This was fixed in libxml 2.8.0 by 71a243d
2250
+ */
2251
+ if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
2252
+ child_iter = node;
2253
+ while (child_iter->parent) {
2254
+ child_iter = child_iter->parent;
2255
+ }
2256
+
2257
+ if (child_iter->type == XML_DOCUMENT_FRAG_NODE) {
2258
+ node->doc->children = NULL;
2259
+ }
2260
+ }
2261
+
2262
+ /* FIXME: This probably needs to handle more constants... */
2263
+ switch (error) {
2264
+ case XML_ERR_INTERNAL_ERROR:
2265
+ case XML_ERR_NO_MEMORY:
2266
+ rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
2267
+ break;
2268
+ default:
2269
+ break;
2270
+ }
2271
+
2272
+ set = xmlXPathNodeSetCreate(NULL);
2273
+
2274
+ while (list) {
2275
+ tmp = list->next;
2276
+ list->next = NULL;
2277
+ xmlXPathNodeSetAddUnique(set, list);
2278
+ noko_xml_document_pin_node(list);
2279
+ list = tmp;
2280
+ }
2281
+
2282
+ return noko_xml_node_set_wrap(set, doc);
2283
+ }
2284
+
2285
+ /* :nodoc: */
2286
+ VALUE
2287
+ rb_xml_node_data_ptr_eh(VALUE self)
2288
+ {
2289
+ xmlNodePtr c_node;
2290
+ Noko_Node_Get_Struct(self, xmlNode, c_node);
2291
+ return c_node ? Qtrue : Qfalse;
2292
+ }
2293
+
2294
+ VALUE
2295
+ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2296
+ {
2297
+ VALUE rb_document, rb_node_cache, rb_node;
2298
+ nokogiriTuplePtr node_has_a_document;
2299
+ xmlDocPtr c_doc;
2300
+
2301
+ assert(c_node);
2302
+
2303
+ if (c_node->type == XML_DOCUMENT_NODE || c_node->type == XML_HTML_DOCUMENT_NODE) {
2304
+ return DOC_RUBY_OBJECT(c_node->doc);
2305
+ }
2306
+
2307
+ c_doc = c_node->doc;
2308
+
2309
+ // Nodes yielded from XML::Reader don't have a fully-realized Document
2310
+ node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc);
2311
+
2312
+ if (c_node->_private && node_has_a_document) {
2313
+ return (VALUE)c_node->_private;
2314
+ }
2315
+
2316
+ if (!RTEST(rb_class)) {
2317
+ switch (c_node->type) {
2318
+ case XML_ELEMENT_NODE:
2319
+ rb_class = cNokogiriXmlElement;
2320
+ break;
2321
+ case XML_TEXT_NODE:
2322
+ rb_class = cNokogiriXmlText;
2323
+ break;
2324
+ case XML_ATTRIBUTE_NODE:
2325
+ rb_class = cNokogiriXmlAttr;
2326
+ break;
2327
+ case XML_ENTITY_REF_NODE:
2328
+ rb_class = cNokogiriXmlEntityReference;
2329
+ break;
2330
+ case XML_COMMENT_NODE:
2331
+ rb_class = cNokogiriXmlComment;
2332
+ break;
2333
+ case XML_DOCUMENT_FRAG_NODE:
2334
+ rb_class = cNokogiriXmlDocumentFragment;
2335
+ break;
2336
+ case XML_PI_NODE:
2337
+ rb_class = cNokogiriXmlProcessingInstruction;
2338
+ break;
2339
+ case XML_ENTITY_DECL:
2340
+ rb_class = cNokogiriXmlEntityDecl;
2341
+ break;
2342
+ case XML_CDATA_SECTION_NODE:
2343
+ rb_class = cNokogiriXmlCData;
2344
+ break;
2345
+ case XML_DTD_NODE:
2346
+ rb_class = cNokogiriXmlDtd;
2347
+ break;
2348
+ case XML_ATTRIBUTE_DECL:
2349
+ rb_class = cNokogiriXmlAttributeDecl;
2350
+ break;
2351
+ case XML_ELEMENT_DECL:
2352
+ rb_class = cNokogiriXmlElementDecl;
2353
+ break;
2354
+ default:
2355
+ rb_class = cNokogiriXmlNode;
2356
+ }
2357
+ }
2358
+
2359
+ rb_node = _xml_node_alloc(rb_class);
2360
+ _xml_node_data_ptr_set(rb_node, c_node);
2361
+
2362
+ if (node_has_a_document) {
2363
+ rb_document = DOC_RUBY_OBJECT(c_doc);
2364
+ rb_node_cache = DOC_NODE_CACHE(c_doc);
2365
+ rb_ary_push(rb_node_cache, rb_node);
2366
+ rb_funcall(rb_document, id_decorate, 1, rb_node);
2367
+ }
2368
+
2369
+ return rb_node ;
2370
+ }
2371
+
2372
+
2373
+ /*
2374
+ * return Array<Nokogiri::XML::Attr> containing the node's attributes
2375
+ */
2376
+ VALUE
2377
+ noko_xml_node_attrs(xmlNodePtr c_node)
2378
+ {
2379
+ VALUE rb_properties = rb_ary_new();
2380
+ xmlAttrPtr c_property;
2381
+
2382
+ c_property = c_node->properties ;
2383
+ while (c_property != NULL) {
2384
+ rb_ary_push(rb_properties, noko_xml_node_wrap(Qnil, (xmlNodePtr)c_property));
2385
+ c_property = c_property->next ;
2386
+ }
2387
+
2388
+ return rb_properties;
2389
+ }
2390
+
2391
+ void
2392
+ noko_init_xml_node(void)
2393
+ {
2394
+ cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2395
+
2396
+ rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
2397
+
2398
+ rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2399
+
2400
+ rb_define_method(cNokogiriXmlNode, "add_namespace_definition", rb_xml_node_add_namespace_definition, 2);
2401
+ rb_define_method(cNokogiriXmlNode, "attribute", rb_xml_node_attribute, 1);
2402
+ rb_define_method(cNokogiriXmlNode, "attribute_nodes", rb_xml_node_attribute_nodes, 0);
2403
+ rb_define_method(cNokogiriXmlNode, "attribute_with_ns", rb_xml_node_attribute_with_ns, 2);
2404
+ rb_define_method(cNokogiriXmlNode, "blank?", rb_xml_node_blank_eh, 0);
2405
+ rb_define_method(cNokogiriXmlNode, "child", rb_xml_node_child, 0);
2406
+ rb_define_method(cNokogiriXmlNode, "children", rb_xml_node_children, 0);
2407
+ rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2408
+ rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2409
+ rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2410
+ rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
2411
+ rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2412
+ rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2413
+ rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2414
+ rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
2415
+ rb_define_method(cNokogiriXmlNode, "first_element_child", rb_xml_node_first_element_child, 0);
2416
+ rb_define_method(cNokogiriXmlNode, "internal_subset", internal_subset, 0);
2417
+ rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1);
2418
+ rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0);
2419
+ rb_define_method(cNokogiriXmlNode, "lang=", set_lang, 1);
2420
+ rb_define_method(cNokogiriXmlNode, "last_element_child", rb_xml_node_last_element_child, 0);
2421
+ rb_define_method(cNokogiriXmlNode, "line", rb_xml_node_line, 0);
2422
+ rb_define_method(cNokogiriXmlNode, "line=", rb_xml_node_line_set, 1);
2423
+ rb_define_method(cNokogiriXmlNode, "namespace", rb_xml_node_namespace, 0);
2424
+ rb_define_method(cNokogiriXmlNode, "namespace_definitions", namespace_definitions, 0);
2425
+ rb_define_method(cNokogiriXmlNode, "namespace_scopes", rb_xml_node_namespace_scopes, 0);
2426
+ rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2);
2427
+ rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1);
2428
+ rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0);
2429
+ rb_define_method(cNokogiriXmlNode, "next_sibling", next_sibling, 0);
2430
+ rb_define_method(cNokogiriXmlNode, "node_name", get_name, 0);
2431
+ rb_define_method(cNokogiriXmlNode, "node_name=", set_name, 1);
2432
+ rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0);
2433
+ rb_define_method(cNokogiriXmlNode, "parent", get_parent, 0);
2434
+ rb_define_method(cNokogiriXmlNode, "path", rb_xml_node_path, 0);
2435
+ rb_define_method(cNokogiriXmlNode, "pointer_id", rb_xml_node_pointer_id, 0);
2436
+ rb_define_method(cNokogiriXmlNode, "previous_element", previous_element, 0);
2437
+ rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2438
+ rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2439
+
2440
+ rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
2441
+
2442
+ rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2443
+ rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2444
+ rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
2445
+ rb_define_private_method(cNokogiriXmlNode, "compare", compare, 1);
2446
+ rb_define_private_method(cNokogiriXmlNode, "dump_html", dump_html, 0);
2447
+ rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
2448
+ rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
2449
+ rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2450
+ rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2451
+ rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2452
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
2453
+ rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2454
+ rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2455
+ rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
2456
+
2457
+ id_decorate = rb_intern("decorate");
2458
+ id_decorate_bang = rb_intern("decorate!");
2459
+ }