nokogiri 1.18.0.rc1-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +502 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +297 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +49 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +324 -0
@@ -0,0 +1,2459 @@
1
+ #include <nokogiri.h>
2
+
3
+ #include <stdbool.h>
4
+
5
+ // :stopdoc:
6
+
7
+ VALUE cNokogiriXmlNode ;
8
+ static ID id_decorate, id_decorate_bang;
9
+
10
+ typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
11
+
12
+ static void
13
+ _xml_node_mark(void *ptr)
14
+ {
15
+ xmlNodePtr node = ptr;
16
+
17
+ if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
18
+ return;
19
+ }
20
+
21
+ xmlDocPtr doc = node->doc;
22
+ if (doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
23
+ if (DOC_RUBY_OBJECT_TEST(doc)) {
24
+ rb_gc_mark(DOC_RUBY_OBJECT(doc));
25
+ }
26
+ } else if (node->doc->_private) {
27
+ rb_gc_mark((VALUE)doc->_private);
28
+ }
29
+ }
30
+
31
+ static void
32
+ _xml_node_update_references(void *ptr)
33
+ {
34
+ xmlNodePtr node = ptr;
35
+
36
+ if (node->_private) {
37
+ node->_private = (void *)rb_gc_location((VALUE)node->_private);
38
+ }
39
+ }
40
+
41
+ static const rb_data_type_t xml_node_type = {
42
+ .wrap_struct_name = "xmlNode",
43
+ .function = {
44
+ .dmark = _xml_node_mark,
45
+ .dcompact = _xml_node_update_references,
46
+ },
47
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
48
+ };
49
+
50
+ static VALUE
51
+ _xml_node_alloc(VALUE klass)
52
+ {
53
+ return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
54
+ }
55
+
56
+ static void
57
+ _xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
58
+ {
59
+ assert(DATA_PTR(rb_node) == NULL);
60
+ assert(c_node->_private == NULL);
61
+
62
+ DATA_PTR(rb_node) = c_node;
63
+ c_node->_private = (void *)rb_node;
64
+
65
+ return;
66
+ }
67
+
68
+ static void
69
+ relink_namespace(xmlNodePtr reparented)
70
+ {
71
+ xmlNodePtr child;
72
+ xmlAttrPtr attr;
73
+
74
+ if (reparented->type != XML_ATTRIBUTE_NODE &&
75
+ reparented->type != XML_ELEMENT_NODE) { return; }
76
+
77
+ if (reparented->ns == NULL || reparented->ns->prefix == NULL) {
78
+ xmlNsPtr ns = NULL;
79
+ xmlChar *name = NULL, *prefix = NULL;
80
+
81
+ name = xmlSplitQName2(reparented->name, &prefix);
82
+
83
+ if (reparented->type == XML_ATTRIBUTE_NODE) {
84
+ if (prefix == NULL || strcmp((char *)prefix, XMLNS_PREFIX) == 0) {
85
+ xmlFree(name);
86
+ xmlFree(prefix);
87
+ return;
88
+ }
89
+ }
90
+
91
+ ns = xmlSearchNs(reparented->doc, reparented, prefix);
92
+
93
+ if (ns != NULL) {
94
+ xmlNodeSetName(reparented, name);
95
+ xmlSetNs(reparented, ns);
96
+ }
97
+
98
+ xmlFree(name);
99
+ xmlFree(prefix);
100
+ }
101
+
102
+ /* Avoid segv when relinking against unlinked nodes. */
103
+ if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; }
104
+
105
+ /* Make sure that our reparented node has the correct namespaces */
106
+ if (!reparented->ns &&
107
+ (reparented->doc != (xmlDocPtr)reparented->parent) &&
108
+ (rb_iv_get(DOC_RUBY_OBJECT(reparented->doc), "@namespace_inheritance") == Qtrue)) {
109
+ xmlSetNs(reparented, reparented->parent->ns);
110
+ }
111
+
112
+ /* Search our parents for an existing definition */
113
+ if (reparented->nsDef) {
114
+ xmlNsPtr curr = reparented->nsDef;
115
+ xmlNsPtr prev = NULL;
116
+
117
+ while (curr) {
118
+ xmlNsPtr ns = xmlSearchNsByHref(
119
+ reparented->doc,
120
+ reparented->parent,
121
+ curr->href
122
+ );
123
+ /* If we find the namespace is already declared, remove it from this
124
+ * definition list. */
125
+ if (ns && ns != curr && xmlStrEqual(ns->prefix, curr->prefix)) {
126
+ if (prev) {
127
+ prev->next = curr->next;
128
+ } else {
129
+ reparented->nsDef = curr->next;
130
+ }
131
+ noko_xml_document_pin_namespace(curr, reparented->doc);
132
+ } else {
133
+ prev = curr;
134
+ }
135
+ curr = curr->next;
136
+ }
137
+ }
138
+
139
+ /*
140
+ * Search our parents for an existing definition of current namespace,
141
+ * because the definition it's pointing to may have just been removed nsDef.
142
+ *
143
+ * And although that would technically probably be OK, I'd feel better if we
144
+ * referred to a namespace that's still present in a node's nsDef somewhere
145
+ * in the doc.
146
+ */
147
+ if (reparented->ns) {
148
+ xmlNsPtr ns = xmlSearchNs(reparented->doc, reparented, reparented->ns->prefix);
149
+ if (ns
150
+ && ns != reparented->ns
151
+ && xmlStrEqual(ns->prefix, reparented->ns->prefix)
152
+ && xmlStrEqual(ns->href, reparented->ns->href)
153
+ ) {
154
+ xmlSetNs(reparented, ns);
155
+ }
156
+ }
157
+
158
+ /* Only walk all children if there actually is a namespace we need to */
159
+ /* reparent. */
160
+ if (NULL == reparented->ns) { return; }
161
+
162
+ /* When a node gets reparented, walk its children to make sure that */
163
+ /* their namespaces are reparented as well. */
164
+ child = reparented->children;
165
+ while (NULL != child) {
166
+ relink_namespace(child);
167
+ child = child->next;
168
+ }
169
+
170
+ if (reparented->type == XML_ELEMENT_NODE) {
171
+ attr = reparented->properties;
172
+ while (NULL != attr) {
173
+ relink_namespace((xmlNodePtr)attr);
174
+ attr = attr->next;
175
+ }
176
+ }
177
+ }
178
+
179
+
180
+ /* internal function meant to wrap xmlReplaceNode
181
+ and fix some issues we have with libxml2 merging nodes */
182
+ static xmlNodePtr
183
+ xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
184
+ {
185
+ xmlNodePtr retval ;
186
+
187
+ retval = xmlReplaceNode(pivot, new_node) ;
188
+
189
+ if (retval == pivot) {
190
+ retval = new_node ; /* return semantics for reparent_node_with */
191
+ }
192
+
193
+ /* work around libxml2 issue: https://bugzilla.gnome.org/show_bug.cgi?id=615612 */
194
+ if (retval && retval->type == XML_TEXT_NODE) {
195
+ if (retval->prev && retval->prev->type == XML_TEXT_NODE) {
196
+ retval = xmlTextMerge(retval->prev, retval);
197
+ }
198
+ if (retval->next && retval->next->type == XML_TEXT_NODE) {
199
+ retval = xmlTextMerge(retval, retval->next);
200
+ }
201
+ }
202
+
203
+ return retval ;
204
+ }
205
+
206
+
207
+ static void
208
+ raise_if_ancestor_of_self(xmlNodePtr self)
209
+ {
210
+ for (xmlNodePtr ancestor = self->parent ; ancestor ; ancestor = ancestor->parent) {
211
+ if (self == ancestor) {
212
+ rb_raise(rb_eRuntimeError, "cycle detected: node '%s' is an ancestor of itself", self->name);
213
+ }
214
+ }
215
+ }
216
+
217
+
218
+ static VALUE
219
+ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
220
+ {
221
+ VALUE reparented_obj ;
222
+ xmlNodePtr reparentee, original_reparentee, pivot, reparented, next_text, new_next_text, parent ;
223
+ int original_ns_prefix_is_default = 0 ;
224
+
225
+ if (!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) {
226
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
227
+ }
228
+ if (rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument)) {
229
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
230
+ }
231
+
232
+ Noko_Node_Get_Struct(reparentee_obj, xmlNode, reparentee);
233
+ Noko_Node_Get_Struct(pivot_obj, xmlNode, pivot);
234
+
235
+ /*
236
+ * Check if nodes given are appropriate to have a parent-child
237
+ * relationship, based on the DOM specification.
238
+ *
239
+ * cf. http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/core.html#ID-1590626202
240
+ */
241
+ if (prf == xmlAddChild) {
242
+ parent = pivot;
243
+ } else {
244
+ parent = pivot->parent;
245
+ }
246
+
247
+ if (parent) {
248
+ switch (parent->type) {
249
+ case XML_DOCUMENT_NODE:
250
+ case XML_HTML_DOCUMENT_NODE:
251
+ switch (reparentee->type) {
252
+ case XML_ELEMENT_NODE:
253
+ case XML_PI_NODE:
254
+ case XML_COMMENT_NODE:
255
+ case XML_DOCUMENT_TYPE_NODE:
256
+ /*
257
+ * The DOM specification says no to adding text-like nodes
258
+ * directly to a document, but we allow it for compatibility.
259
+ */
260
+ case XML_TEXT_NODE:
261
+ case XML_CDATA_SECTION_NODE:
262
+ case XML_ENTITY_REF_NODE:
263
+ goto ok;
264
+ default:
265
+ break;
266
+ }
267
+ break;
268
+ case XML_DOCUMENT_FRAG_NODE:
269
+ case XML_ENTITY_REF_NODE:
270
+ case XML_ELEMENT_NODE:
271
+ switch (reparentee->type) {
272
+ case XML_ELEMENT_NODE:
273
+ case XML_PI_NODE:
274
+ case XML_COMMENT_NODE:
275
+ case XML_TEXT_NODE:
276
+ case XML_CDATA_SECTION_NODE:
277
+ case XML_ENTITY_REF_NODE:
278
+ goto ok;
279
+ default:
280
+ break;
281
+ }
282
+ break;
283
+ case XML_ATTRIBUTE_NODE:
284
+ switch (reparentee->type) {
285
+ case XML_TEXT_NODE:
286
+ case XML_ENTITY_REF_NODE:
287
+ goto ok;
288
+ default:
289
+ break;
290
+ }
291
+ break;
292
+ case XML_TEXT_NODE:
293
+ /*
294
+ * xmlAddChild() breaks the DOM specification in that it allows
295
+ * adding a text node to another, in which case text nodes are
296
+ * coalesced, but since our JRuby version does not support such
297
+ * operation, we should inhibit it.
298
+ */
299
+ break;
300
+ default:
301
+ break;
302
+ }
303
+
304
+ rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj));
305
+ }
306
+
307
+ ok:
308
+ original_reparentee = reparentee;
309
+
310
+ if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
311
+ /*
312
+ * if the reparentee is a text node, there's a very good chance it will be
313
+ * merged with an adjacent text node after being reparented, and in that case
314
+ * libxml will free the underlying C struct.
315
+ *
316
+ * since we clearly have a ruby object which references the underlying
317
+ * memory, we can't let the C struct get freed. let's pickle the original
318
+ * reparentee by rooting it; and then we'll reparent a duplicate of the
319
+ * node that we don't care about preserving.
320
+ *
321
+ * alternatively, if the reparentee is from a different document than the
322
+ * pivot node, libxml2 is going to get confused about which document's
323
+ * "dictionary" the node's strings belong to (this is an otherwise
324
+ * uninteresting libxml2 implementation detail). as a result, we cannot
325
+ * reparent the actual reparentee, so we reparent a duplicate.
326
+ */
327
+ if (reparentee->type == XML_TEXT_NODE && reparentee->_private) {
328
+ /*
329
+ * additionally, since we know this C struct isn't going to be related to
330
+ * a Ruby object anymore, let's break the relationship on this end as
331
+ * well.
332
+ *
333
+ * this is not absolutely necessary unless libxml-ruby is also in effect,
334
+ * in which case its global callback `rxml_node_deregisterNode` will try
335
+ * to do things to our data.
336
+ *
337
+ * for more details on this particular (and particularly nasty) edge
338
+ * case, see:
339
+ *
340
+ * https://github.com/sparklemotion/nokogiri/issues/1426
341
+ */
342
+ reparentee->_private = NULL ;
343
+ }
344
+
345
+ if (reparentee->ns != NULL && reparentee->ns->prefix == NULL) {
346
+ original_ns_prefix_is_default = 1;
347
+ }
348
+
349
+ noko_xml_document_pin_node(reparentee);
350
+
351
+ if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
352
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
353
+ }
354
+
355
+ if (original_ns_prefix_is_default && reparentee->ns != NULL && reparentee->ns->prefix != NULL) {
356
+ /*
357
+ * issue #391, where new node's prefix may become the string "default"
358
+ * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior.
359
+ */
360
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(reparentee->ns->prefix));
361
+ reparentee->ns->prefix = NULL;
362
+ }
363
+ }
364
+
365
+ xmlUnlinkNode(original_reparentee);
366
+
367
+ if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling && prf != xmlAddChild
368
+ && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
369
+ /*
370
+ * libxml merges text nodes in a right-to-left fashion, meaning that if
371
+ * there are two text nodes who would be adjacent, the right (or following,
372
+ * or next) node will be merged into the left (or preceding, or previous)
373
+ * node.
374
+ *
375
+ * and by "merged" I mean the string contents will be concatenated onto the
376
+ * left node's contents, and then the node will be freed.
377
+ *
378
+ * which means that if we have a ruby object wrapped around the right node,
379
+ * its memory would be freed out from under it.
380
+ *
381
+ * so, we detect this edge case and unlink-and-root the text node before it gets
382
+ * merged. then we dup the node and insert that duplicate back into the
383
+ * document where the real node was.
384
+ *
385
+ * yes, this is totally lame.
386
+ */
387
+ next_text = pivot->next ;
388
+ new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;
389
+
390
+ xmlUnlinkNode(next_text);
391
+ noko_xml_document_pin_node(next_text);
392
+
393
+ xmlAddNextSibling(pivot, new_next_text);
394
+ }
395
+
396
+ if (!(reparented = (*prf)(pivot, reparentee))) {
397
+ rb_raise(rb_eRuntimeError, "Could not reparent node");
398
+ }
399
+
400
+ /*
401
+ * make sure the ruby object is pointed at the just-reparented node, which
402
+ * might be a duplicate (see above) or might be the result of merging
403
+ * adjacent text nodes.
404
+ */
405
+ DATA_PTR(reparentee_obj) = reparented ;
406
+ reparented_obj = noko_xml_node_wrap(Qnil, reparented);
407
+
408
+ rb_funcall(reparented_obj, id_decorate_bang, 0);
409
+
410
+ /* if we've created a cycle, raise an exception */
411
+ raise_if_ancestor_of_self(reparented);
412
+
413
+ relink_namespace(reparented);
414
+
415
+ return reparented_obj ;
416
+ }
417
+
418
+ // :startdoc:
419
+
420
+ /*
421
+ * :call-seq:
422
+ * add_namespace_definition(prefix, href) → Nokogiri::XML::Namespace
423
+ * add_namespace(prefix, href) → Nokogiri::XML::Namespace
424
+ *
425
+ * :category: Manipulating Document Structure
426
+ *
427
+ * Adds a namespace definition to this node with +prefix+ using +href+ value, as if this node had
428
+ * included an attribute "xmlns:prefix=href".
429
+ *
430
+ * A default namespace definition for this node can be added by passing +nil+ for +prefix+.
431
+ *
432
+ * [Parameters]
433
+ * - +prefix+ (String, +nil+) An {XML Name}[https://www.w3.org/TR/xml-names/#ns-decl]
434
+ * - +href+ (String) The {URI reference}[https://www.w3.org/TR/xml-names/#sec-namespaces]
435
+ *
436
+ * [Returns] The new Nokogiri::XML::Namespace
437
+ *
438
+ * *Example:* adding a non-default namespace definition
439
+ *
440
+ * doc = Nokogiri::XML("<store><inventory></inventory></store>")
441
+ * inventory = doc.at_css("inventory")
442
+ * inventory.add_namespace_definition("automobile", "http://alices-autos.com/")
443
+ * inventory.add_namespace_definition("bicycle", "http://bobs-bikes.com/")
444
+ * inventory.add_child("<automobile:tire>Michelin model XGV, size 75R</automobile:tire>")
445
+ * doc.to_xml
446
+ * # => "<?xml version=\"1.0\"?>\n" +
447
+ * # "<store>\n" +
448
+ * # " <inventory xmlns:automobile=\"http://alices-autos.com/\" xmlns:bicycle=\"http://bobs-bikes.com/\">\n" +
449
+ * # " <automobile:tire>Michelin model XGV, size 75R</automobile:tire>\n" +
450
+ * # " </inventory>\n" +
451
+ * # "</store>\n"
452
+ *
453
+ * *Example:* adding a default namespace definition
454
+ *
455
+ * doc = Nokogiri::XML("<store><inventory><tire>Michelin model XGV, size 75R</tire></inventory></store>")
456
+ * doc.at_css("tire").add_namespace_definition(nil, "http://bobs-bikes.com/")
457
+ * doc.to_xml
458
+ * # => "<?xml version=\"1.0\"?>\n" +
459
+ * # "<store>\n" +
460
+ * # " <inventory>\n" +
461
+ * # " <tire xmlns=\"http://bobs-bikes.com/\">Michelin model XGV, size 75R</tire>\n" +
462
+ * # " </inventory>\n" +
463
+ * # "</store>\n"
464
+ *
465
+ */
466
+ static VALUE
467
+ rb_xml_node_add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_href)
468
+ {
469
+ xmlNodePtr c_node, element;
470
+ xmlNsPtr c_namespace;
471
+ const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix));
472
+
473
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
474
+ element = c_node ;
475
+
476
+ c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix);
477
+
478
+ if (!c_namespace) {
479
+ if (c_node->type != XML_ELEMENT_NODE) {
480
+ element = c_node->parent;
481
+ }
482
+ c_namespace = xmlNewNs(element, (const xmlChar *)StringValueCStr(rb_href), c_prefix);
483
+ }
484
+
485
+ if (!c_namespace) {
486
+ return Qnil ;
487
+ }
488
+
489
+ if (NIL_P(rb_prefix) || c_node != element) {
490
+ xmlSetNs(c_node, c_namespace);
491
+ }
492
+
493
+ return noko_xml_namespace_wrap(c_namespace, c_node->doc);
494
+ }
495
+
496
+
497
+ /*
498
+ * :call-seq: attribute(name) → Nokogiri::XML::Attr
499
+ *
500
+ * :category: Working With Node Attributes
501
+ *
502
+ * [Returns] Attribute (Nokogiri::XML::Attr) belonging to this node with name +name+.
503
+ *
504
+ * ⚠ Note that attribute namespaces are ignored and only the simple (non-namespace-prefixed) name is
505
+ * used to find a matching attribute. In case of a simple name collision, only one of the matching
506
+ * attributes will be returned. In this case, you will need to use #attribute_with_ns.
507
+ *
508
+ * *Example:*
509
+ *
510
+ * doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
511
+ * child = doc.at_css("child")
512
+ * child.attribute("size") # => #<Nokogiri::XML::Attr:0x550 name="size" value="large">
513
+ * child.attribute("class") # => #<Nokogiri::XML::Attr:0x564 name="class" value="big wide tall">
514
+ *
515
+ * *Example* showing that namespaced attributes will not be returned:
516
+ *
517
+ * ⚠ Note that only one of the two matching attributes is returned.
518
+ *
519
+ * doc = Nokogiri::XML(<<~EOF)
520
+ * <root xmlns:width='http://example.com/widths'
521
+ * xmlns:height='http://example.com/heights'>
522
+ * <child width:size='broad' height:size='tall'/>
523
+ * </root>
524
+ * EOF
525
+ * doc.at_css("child").attribute("size")
526
+ * # => #(Attr:0x550 {
527
+ * # name = "size",
528
+ * # namespace = #(Namespace:0x564 {
529
+ * # prefix = "width",
530
+ * # href = "http://example.com/widths"
531
+ * # }),
532
+ * # value = "broad"
533
+ * # })
534
+ */
535
+ static VALUE
536
+ rb_xml_node_attribute(VALUE self, VALUE name)
537
+ {
538
+ xmlNodePtr node;
539
+ xmlAttrPtr prop;
540
+ Noko_Node_Get_Struct(self, xmlNode, node);
541
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name));
542
+
543
+ if (! prop) { return Qnil; }
544
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
545
+ }
546
+
547
+
548
+ /*
549
+ * :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
550
+ *
551
+ * :category: Working With Node Attributes
552
+ *
553
+ * [Returns] Attributes (an Array of Nokogiri::XML::Attr) belonging to this node.
554
+ *
555
+ * Note that this is the preferred alternative to #attributes when the simple
556
+ * (non-namespace-prefixed) attribute names may collide.
557
+ *
558
+ * *Example:*
559
+ *
560
+ * Contrast this with the colliding-name example from #attributes.
561
+ *
562
+ * doc = Nokogiri::XML(<<~EOF)
563
+ * <root xmlns:width='http://example.com/widths'
564
+ * xmlns:height='http://example.com/heights'>
565
+ * <child width:size='broad' height:size='tall'/>
566
+ * </root>
567
+ * EOF
568
+ * doc.at_css("child").attribute_nodes
569
+ * # => [#(Attr:0x550 {
570
+ * # name = "size",
571
+ * # namespace = #(Namespace:0x564 {
572
+ * # prefix = "width",
573
+ * # href = "http://example.com/widths"
574
+ * # }),
575
+ * # value = "broad"
576
+ * # }),
577
+ * # #(Attr:0x578 {
578
+ * # name = "size",
579
+ * # namespace = #(Namespace:0x58c {
580
+ * # prefix = "height",
581
+ * # href = "http://example.com/heights"
582
+ * # }),
583
+ * # value = "tall"
584
+ * # })]
585
+ */
586
+ static VALUE
587
+ rb_xml_node_attribute_nodes(VALUE rb_node)
588
+ {
589
+ xmlNodePtr c_node;
590
+
591
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
592
+
593
+ return noko_xml_node_attrs(c_node);
594
+ }
595
+
596
+
597
+ /*
598
+ * :call-seq: attribute_with_ns(name, namespace) → Nokogiri::XML::Attr
599
+ *
600
+ * :category: Working With Node Attributes
601
+ *
602
+ * [Returns]
603
+ * Attribute (Nokogiri::XML::Attr) belonging to this node with matching +name+ and +namespace+.
604
+ *
605
+ * [Parameters]
606
+ * - +name+ (String): the simple (non-namespace-prefixed) name of the attribute
607
+ * - +namespace+ (String): the URI of the attribute's namespace
608
+ *
609
+ * See related: #attribute
610
+ *
611
+ * *Example:*
612
+ *
613
+ * doc = Nokogiri::XML(<<~EOF)
614
+ * <root xmlns:width='http://example.com/widths'
615
+ * xmlns:height='http://example.com/heights'>
616
+ * <child width:size='broad' height:size='tall'/>
617
+ * </root>
618
+ * EOF
619
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/widths")
620
+ * # => #(Attr:0x550 {
621
+ * # name = "size",
622
+ * # namespace = #(Namespace:0x564 {
623
+ * # prefix = "width",
624
+ * # href = "http://example.com/widths"
625
+ * # }),
626
+ * # value = "broad"
627
+ * # })
628
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/heights")
629
+ * # => #(Attr:0x578 {
630
+ * # name = "size",
631
+ * # namespace = #(Namespace:0x58c {
632
+ * # prefix = "height",
633
+ * # href = "http://example.com/heights"
634
+ * # }),
635
+ * # value = "tall"
636
+ * # })
637
+ */
638
+ static VALUE
639
+ rb_xml_node_attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
640
+ {
641
+ xmlNodePtr node;
642
+ xmlAttrPtr prop;
643
+ Noko_Node_Get_Struct(self, xmlNode, node);
644
+ prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name),
645
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace));
646
+
647
+ if (! prop) { return Qnil; }
648
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
649
+ }
650
+
651
+
652
+
653
+ /*
654
+ * call-seq: blank? → Boolean
655
+ *
656
+ * [Returns] +true+ if the node is an empty or whitespace-only text or cdata node, else +false+.
657
+ *
658
+ * *Example:*
659
+ *
660
+ * Nokogiri("<root><child/></root>").root.child.blank? # => false
661
+ * Nokogiri("<root>\t \n</root>").root.child.blank? # => true
662
+ * Nokogiri("<root><![CDATA[\t \n]]></root>").root.child.blank? # => true
663
+ * Nokogiri("<root>not-blank</root>").root.child
664
+ * .tap { |n| n.content = "" }.blank # => true
665
+ */
666
+ static VALUE
667
+ rb_xml_node_blank_eh(VALUE self)
668
+ {
669
+ xmlNodePtr node;
670
+ Noko_Node_Get_Struct(self, xmlNode, node);
671
+ return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
672
+ }
673
+
674
+
675
+ /*
676
+ * :call-seq: child() → Nokogiri::XML::Node
677
+ *
678
+ * :category: Traversing Document Structure
679
+ *
680
+ * [Returns] First of this node's children, or +nil+ if there are no children
681
+ *
682
+ * This is a convenience method and is equivalent to:
683
+ *
684
+ * node.children.first
685
+ *
686
+ * See related: #children
687
+ */
688
+ static VALUE
689
+ rb_xml_node_child(VALUE self)
690
+ {
691
+ xmlNodePtr node, child;
692
+ Noko_Node_Get_Struct(self, xmlNode, node);
693
+
694
+ child = node->children;
695
+ if (!child) { return Qnil; }
696
+
697
+ return noko_xml_node_wrap(Qnil, child);
698
+ }
699
+
700
+
701
+ /*
702
+ * :call-seq: children() → Nokogiri::XML::NodeSet
703
+ *
704
+ * :category: Traversing Document Structure
705
+ *
706
+ * [Returns] Nokogiri::XML::NodeSet containing this node's children.
707
+ */
708
+ static VALUE
709
+ rb_xml_node_children(VALUE self)
710
+ {
711
+ xmlNodePtr node;
712
+ xmlNodePtr child;
713
+ xmlNodeSetPtr set;
714
+ VALUE document;
715
+ VALUE node_set;
716
+
717
+ Noko_Node_Get_Struct(self, xmlNode, node);
718
+
719
+ child = node->children;
720
+ set = xmlXPathNodeSetCreate(child);
721
+
722
+ document = DOC_RUBY_OBJECT(node->doc);
723
+
724
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
725
+
726
+ child = child->next;
727
+ while (NULL != child) {
728
+ xmlXPathNodeSetAddUnique(set, child);
729
+ child = child->next;
730
+ }
731
+
732
+ node_set = noko_xml_node_set_wrap(set, document);
733
+
734
+ return node_set;
735
+ }
736
+
737
+
738
+ /*
739
+ * :call-seq:
740
+ * content() → String
741
+ * inner_text() → String
742
+ * text() → String
743
+ * to_str() → String
744
+ *
745
+ * [Returns]
746
+ * Contents of all the text nodes in this node's subtree, concatenated together into a single
747
+ * String.
748
+ *
749
+ * ⚠ Note that entities will _always_ be expanded in the returned String.
750
+ *
751
+ * See related: #inner_html
752
+ *
753
+ * *Example* of how entities are handled:
754
+ *
755
+ * Note that <tt>&lt;</tt> becomes <tt><</tt> in the returned String.
756
+ *
757
+ * doc = Nokogiri::XML.fragment("<child>a &lt; b</child>")
758
+ * doc.at_css("child").content
759
+ * # => "a < b"
760
+ *
761
+ * *Example* of how a subtree is handled:
762
+ *
763
+ * Note that the <tt><span></tt> tags are omitted and only the text node contents are returned,
764
+ * concatenated into a single string.
765
+ *
766
+ * doc = Nokogiri::XML.fragment("<child><span>first</span> <span>second</span></child>")
767
+ * doc.at_css("child").content
768
+ * # => "first second"
769
+ */
770
+ static VALUE
771
+ rb_xml_node_content(VALUE self)
772
+ {
773
+ xmlNodePtr node;
774
+ xmlChar *content;
775
+
776
+ Noko_Node_Get_Struct(self, xmlNode, node);
777
+
778
+ content = xmlNodeGetContent(node);
779
+ if (content) {
780
+ VALUE rval = NOKOGIRI_STR_NEW2(content);
781
+ xmlFree(content);
782
+ return rval;
783
+ }
784
+ return Qnil;
785
+ }
786
+
787
+
788
+ /*
789
+ * :call-seq: document() → Nokogiri::XML::Document
790
+ *
791
+ * :category: Traversing Document Structure
792
+ *
793
+ * [Returns] Parent Nokogiri::XML::Document for this node
794
+ */
795
+ static VALUE
796
+ rb_xml_node_document(VALUE self)
797
+ {
798
+ xmlNodePtr node;
799
+ Noko_Node_Get_Struct(self, xmlNode, node);
800
+ return DOC_RUBY_OBJECT(node->doc);
801
+ }
802
+
803
+ /*
804
+ * :call-seq: pointer_id() → Integer
805
+ *
806
+ * [Returns]
807
+ * A unique id for this node based on the internal memory structures. This method is used by #==
808
+ * to determine node identity.
809
+ */
810
+ static VALUE
811
+ rb_xml_node_pointer_id(VALUE self)
812
+ {
813
+ xmlNodePtr node;
814
+ Noko_Node_Get_Struct(self, xmlNode, node);
815
+
816
+ return rb_uint2inum((uintptr_t)(node));
817
+ }
818
+
819
+ /*
820
+ * :call-seq: encode_special_chars(string) → String
821
+ *
822
+ * Encode any special characters in +string+
823
+ */
824
+ static VALUE
825
+ encode_special_chars(VALUE self, VALUE string)
826
+ {
827
+ xmlNodePtr node;
828
+ xmlChar *encoded;
829
+ VALUE encoded_str;
830
+
831
+ Noko_Node_Get_Struct(self, xmlNode, node);
832
+ encoded = xmlEncodeSpecialChars(
833
+ node->doc,
834
+ (const xmlChar *)StringValueCStr(string)
835
+ );
836
+
837
+ encoded_str = NOKOGIRI_STR_NEW2(encoded);
838
+ xmlFree(encoded);
839
+
840
+ return encoded_str;
841
+ }
842
+
843
+ /*
844
+ * :call-seq:
845
+ * create_internal_subset(name, external_id, system_id)
846
+ *
847
+ * Create the internal subset of a document.
848
+ *
849
+ * doc.create_internal_subset("chapter", "-//OASIS//DTD DocBook XML//EN", "chapter.dtd")
850
+ * # => <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML//EN" "chapter.dtd">
851
+ *
852
+ * doc.create_internal_subset("chapter", nil, "chapter.dtd")
853
+ * # => <!DOCTYPE chapter SYSTEM "chapter.dtd">
854
+ */
855
+ static VALUE
856
+ create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
857
+ {
858
+ xmlNodePtr node;
859
+ xmlDocPtr doc;
860
+ xmlDtdPtr dtd;
861
+
862
+ Noko_Node_Get_Struct(self, xmlNode, node);
863
+
864
+ doc = node->doc;
865
+
866
+ if (xmlGetIntSubset(doc)) {
867
+ rb_raise(rb_eRuntimeError, "Document already has an internal subset");
868
+ }
869
+
870
+ dtd = xmlCreateIntSubset(
871
+ doc,
872
+ NIL_P(name) ? NULL : (const xmlChar *)StringValueCStr(name),
873
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValueCStr(external_id),
874
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
875
+ );
876
+
877
+ if (!dtd) { return Qnil; }
878
+
879
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
880
+ }
881
+
882
+ /*
883
+ * :call-seq:
884
+ * create_external_subset(name, external_id, system_id)
885
+ *
886
+ * Create an external subset
887
+ */
888
+ static VALUE
889
+ create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
890
+ {
891
+ xmlNodePtr node;
892
+ xmlDocPtr doc;
893
+ xmlDtdPtr dtd;
894
+
895
+ Noko_Node_Get_Struct(self, xmlNode, node);
896
+
897
+ doc = node->doc;
898
+
899
+ if (doc->extSubset) {
900
+ rb_raise(rb_eRuntimeError, "Document already has an external subset");
901
+ }
902
+
903
+ dtd = xmlNewDtd(
904
+ doc,
905
+ NIL_P(name) ? NULL : (const xmlChar *)StringValueCStr(name),
906
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValueCStr(external_id),
907
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
908
+ );
909
+
910
+ if (!dtd) { return Qnil; }
911
+
912
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
913
+ }
914
+
915
+ /*
916
+ * :call-seq:
917
+ * external_subset()
918
+ *
919
+ * Get the external subset
920
+ */
921
+ static VALUE
922
+ external_subset(VALUE self)
923
+ {
924
+ xmlNodePtr node;
925
+ xmlDocPtr doc;
926
+ xmlDtdPtr dtd;
927
+
928
+ Noko_Node_Get_Struct(self, xmlNode, node);
929
+
930
+ if (!node->doc) { return Qnil; }
931
+
932
+ doc = node->doc;
933
+ dtd = doc->extSubset;
934
+
935
+ if (!dtd) { return Qnil; }
936
+
937
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
938
+ }
939
+
940
+ /*
941
+ * :call-seq:
942
+ * internal_subset()
943
+ *
944
+ * Get the internal subset
945
+ */
946
+ static VALUE
947
+ internal_subset(VALUE self)
948
+ {
949
+ xmlNodePtr node;
950
+ xmlDocPtr doc;
951
+ xmlDtdPtr dtd;
952
+
953
+ Noko_Node_Get_Struct(self, xmlNode, node);
954
+
955
+ if (!node->doc) { return Qnil; }
956
+
957
+ doc = node->doc;
958
+ dtd = xmlGetIntSubset(doc);
959
+
960
+ if (!dtd) { return Qnil; }
961
+
962
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
963
+ }
964
+
965
+ /* :nodoc: */
966
+ static VALUE
967
+ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
968
+ {
969
+ xmlNodePtr c_self, c_other;
970
+ int c_level;
971
+ xmlDocPtr c_new_parent_doc;
972
+ VALUE rb_node_cache;
973
+
974
+ Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
975
+ c_level = (int)NUM2INT(rb_level);
976
+ c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
977
+
978
+ c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
979
+ if (c_self == NULL) { return Qnil; }
980
+
981
+ _xml_node_data_ptr_set(rb_self, c_self);
982
+ noko_xml_document_pin_node(c_self);
983
+
984
+ rb_node_cache = DOC_NODE_CACHE(c_new_parent_doc);
985
+ rb_ary_push(rb_node_cache, rb_self);
986
+ rb_funcall(rb_new_parent_doc, id_decorate, 1, rb_self);
987
+
988
+ return rb_self;
989
+ }
990
+
991
+ /*
992
+ * :call-seq:
993
+ * unlink() → self
994
+ *
995
+ * Unlink this node from its current context.
996
+ */
997
+ static VALUE
998
+ unlink_node(VALUE self)
999
+ {
1000
+ xmlNodePtr node;
1001
+ Noko_Node_Get_Struct(self, xmlNode, node);
1002
+ xmlUnlinkNode(node);
1003
+ noko_xml_document_pin_node(node);
1004
+ return self;
1005
+ }
1006
+
1007
+
1008
+ /*
1009
+ * call-seq:
1010
+ * next_sibling
1011
+ *
1012
+ * Returns the next sibling node
1013
+ */
1014
+ static VALUE
1015
+ next_sibling(VALUE self)
1016
+ {
1017
+ xmlNodePtr node, sibling;
1018
+ Noko_Node_Get_Struct(self, xmlNode, node);
1019
+
1020
+ sibling = node->next;
1021
+ if (!sibling) { return Qnil; }
1022
+
1023
+ return noko_xml_node_wrap(Qnil, sibling) ;
1024
+ }
1025
+
1026
+ /*
1027
+ * call-seq:
1028
+ * previous_sibling
1029
+ *
1030
+ * Returns the previous sibling node
1031
+ */
1032
+ static VALUE
1033
+ previous_sibling(VALUE self)
1034
+ {
1035
+ xmlNodePtr node, sibling;
1036
+ Noko_Node_Get_Struct(self, xmlNode, node);
1037
+
1038
+ sibling = node->prev;
1039
+ if (!sibling) { return Qnil; }
1040
+
1041
+ return noko_xml_node_wrap(Qnil, sibling);
1042
+ }
1043
+
1044
+ /*
1045
+ * call-seq:
1046
+ * next_element
1047
+ *
1048
+ * Returns the next Nokogiri::XML::Element type sibling node.
1049
+ */
1050
+ static VALUE
1051
+ next_element(VALUE self)
1052
+ {
1053
+ xmlNodePtr node, sibling;
1054
+ Noko_Node_Get_Struct(self, xmlNode, node);
1055
+
1056
+ sibling = xmlNextElementSibling(node);
1057
+ if (!sibling) { return Qnil; }
1058
+
1059
+ return noko_xml_node_wrap(Qnil, sibling);
1060
+ }
1061
+
1062
+ /*
1063
+ * call-seq:
1064
+ * previous_element
1065
+ *
1066
+ * Returns the previous Nokogiri::XML::Element type sibling node.
1067
+ */
1068
+ static VALUE
1069
+ previous_element(VALUE self)
1070
+ {
1071
+ xmlNodePtr node, sibling;
1072
+ Noko_Node_Get_Struct(self, xmlNode, node);
1073
+
1074
+ sibling = xmlPreviousElementSibling(node);
1075
+ if (!sibling) { return Qnil; }
1076
+
1077
+ return noko_xml_node_wrap(Qnil, sibling);
1078
+ }
1079
+
1080
+ /* :nodoc: */
1081
+ static VALUE
1082
+ replace(VALUE self, VALUE new_node)
1083
+ {
1084
+ VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
1085
+
1086
+ xmlNodePtr pivot;
1087
+ Noko_Node_Get_Struct(self, xmlNode, pivot);
1088
+ noko_xml_document_pin_node(pivot);
1089
+
1090
+ return reparent;
1091
+ }
1092
+
1093
+ /*
1094
+ * :call-seq:
1095
+ * element_children() → NodeSet
1096
+ * elements() → NodeSet
1097
+ *
1098
+ * [Returns]
1099
+ * The node's child elements as a NodeSet. Only children that are elements will be returned, which
1100
+ * notably excludes Text nodes.
1101
+ *
1102
+ * *Example:*
1103
+ *
1104
+ * Note that #children returns the Text node "hello" while #element_children does not.
1105
+ *
1106
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1107
+ * div.element_children
1108
+ * # => [#<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
1109
+ * div.children
1110
+ * # => [#<Nokogiri::XML::Text:0x64 "hello">,
1111
+ * # #<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
1112
+ */
1113
+ static VALUE
1114
+ rb_xml_node_element_children(VALUE self)
1115
+ {
1116
+ xmlNodePtr node;
1117
+ xmlNodePtr child;
1118
+ xmlNodeSetPtr set;
1119
+ VALUE document;
1120
+ VALUE node_set;
1121
+
1122
+ Noko_Node_Get_Struct(self, xmlNode, node);
1123
+
1124
+ child = xmlFirstElementChild(node);
1125
+ set = xmlXPathNodeSetCreate(child);
1126
+
1127
+ document = DOC_RUBY_OBJECT(node->doc);
1128
+
1129
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
1130
+
1131
+ child = xmlNextElementSibling(child);
1132
+ while (NULL != child) {
1133
+ xmlXPathNodeSetAddUnique(set, child);
1134
+ child = xmlNextElementSibling(child);
1135
+ }
1136
+
1137
+ node_set = noko_xml_node_set_wrap(set, document);
1138
+
1139
+ return node_set;
1140
+ }
1141
+
1142
+ /*
1143
+ * :call-seq:
1144
+ * first_element_child() → Node
1145
+ *
1146
+ * [Returns] The first child Node that is an element.
1147
+ *
1148
+ * *Example:*
1149
+ *
1150
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span></tt> element is
1151
+ * returned.
1152
+ *
1153
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1154
+ * div.first_element_child
1155
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "world")] })
1156
+ */
1157
+ static VALUE
1158
+ rb_xml_node_first_element_child(VALUE self)
1159
+ {
1160
+ xmlNodePtr node, child;
1161
+ Noko_Node_Get_Struct(self, xmlNode, node);
1162
+
1163
+ child = xmlFirstElementChild(node);
1164
+ if (!child) { return Qnil; }
1165
+
1166
+ return noko_xml_node_wrap(Qnil, child);
1167
+ }
1168
+
1169
+ /*
1170
+ * :call-seq:
1171
+ * last_element_child() → Node
1172
+ *
1173
+ * [Returns] The last child Node that is an element.
1174
+ *
1175
+ * *Example:*
1176
+ *
1177
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span>yes</span></tt>
1178
+ * element is returned.
1179
+ *
1180
+ * div = Nokogiri::HTML5("<div><span>no</span><span>yes</span>skip</div>").at_css("div")
1181
+ * div.last_element_child
1182
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "yes")] })
1183
+ */
1184
+ static VALUE
1185
+ rb_xml_node_last_element_child(VALUE self)
1186
+ {
1187
+ xmlNodePtr node, child;
1188
+ Noko_Node_Get_Struct(self, xmlNode, node);
1189
+
1190
+ child = xmlLastElementChild(node);
1191
+ if (!child) { return Qnil; }
1192
+
1193
+ return noko_xml_node_wrap(Qnil, child);
1194
+ }
1195
+
1196
+ /*
1197
+ * call-seq:
1198
+ * key?(attribute)
1199
+ *
1200
+ * Returns true if +attribute+ is set
1201
+ */
1202
+ static VALUE
1203
+ key_eh(VALUE self, VALUE attribute)
1204
+ {
1205
+ xmlNodePtr node;
1206
+ Noko_Node_Get_Struct(self, xmlNode, node);
1207
+ if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) {
1208
+ return Qtrue;
1209
+ }
1210
+ return Qfalse;
1211
+ }
1212
+
1213
+ /*
1214
+ * call-seq:
1215
+ * namespaced_key?(attribute, namespace)
1216
+ *
1217
+ * Returns true if +attribute+ is set with +namespace+
1218
+ */
1219
+ static VALUE
1220
+ namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
1221
+ {
1222
+ xmlNodePtr node;
1223
+ Noko_Node_Get_Struct(self, xmlNode, node);
1224
+ if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
1225
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) {
1226
+ return Qtrue;
1227
+ }
1228
+ return Qfalse;
1229
+ }
1230
+
1231
+ /*
1232
+ * call-seq:
1233
+ * []=(property, value)
1234
+ *
1235
+ * Set the +property+ to +value+
1236
+ */
1237
+ static VALUE
1238
+ set(VALUE self, VALUE property, VALUE value)
1239
+ {
1240
+ xmlNodePtr node, cur;
1241
+ xmlAttrPtr prop;
1242
+ Noko_Node_Get_Struct(self, xmlNode, node);
1243
+
1244
+ /* If a matching attribute node already exists, then xmlSetProp will destroy
1245
+ * the existing node's children. However, if Nokogiri has a node object
1246
+ * pointing to one of those children, we are left with a broken reference.
1247
+ *
1248
+ * We can avoid this by unlinking these nodes first.
1249
+ */
1250
+ if (node->type != XML_ELEMENT_NODE) {
1251
+ return (Qnil);
1252
+ }
1253
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(property));
1254
+ if (prop && prop->children) {
1255
+ for (cur = prop->children; cur; cur = cur->next) {
1256
+ if (cur->_private) {
1257
+ noko_xml_document_pin_node(cur);
1258
+ xmlUnlinkNode(cur);
1259
+ }
1260
+ }
1261
+ }
1262
+
1263
+ xmlSetProp(node, (xmlChar *)StringValueCStr(property),
1264
+ (xmlChar *)StringValueCStr(value));
1265
+
1266
+ return value;
1267
+ }
1268
+
1269
+ /*
1270
+ * call-seq:
1271
+ * get(attribute)
1272
+ *
1273
+ * Get the value for +attribute+
1274
+ */
1275
+ static VALUE
1276
+ get(VALUE self, VALUE rattribute)
1277
+ {
1278
+ xmlNodePtr node;
1279
+ xmlChar *value = 0;
1280
+ VALUE rvalue;
1281
+ xmlChar *colon;
1282
+ xmlChar *attribute, *attr_name, *prefix;
1283
+ xmlNsPtr ns;
1284
+
1285
+ if (NIL_P(rattribute)) { return Qnil; }
1286
+
1287
+ Noko_Node_Get_Struct(self, xmlNode, node);
1288
+ attribute = xmlCharStrdup(StringValueCStr(rattribute));
1289
+
1290
+ colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':'));
1291
+ if (colon) {
1292
+ /* split the attribute string into separate prefix and name by
1293
+ * null-terminating the prefix at the colon */
1294
+ prefix = attribute;
1295
+ attr_name = colon + 1;
1296
+ (*colon) = 0;
1297
+
1298
+ ns = xmlSearchNs(node->doc, node, prefix);
1299
+ if (ns) {
1300
+ value = xmlGetNsProp(node, attr_name, ns->href);
1301
+ } else {
1302
+ value = xmlGetProp(node, (xmlChar *)StringValueCStr(rattribute));
1303
+ }
1304
+ } else {
1305
+ value = xmlGetNoNsProp(node, attribute);
1306
+ }
1307
+
1308
+ xmlFree((void *)attribute);
1309
+ if (!value) { return Qnil; }
1310
+
1311
+ rvalue = NOKOGIRI_STR_NEW2(value);
1312
+ xmlFree((void *)value);
1313
+
1314
+ return rvalue ;
1315
+ }
1316
+
1317
+ /*
1318
+ * call-seq:
1319
+ * set_namespace(namespace)
1320
+ *
1321
+ * Set the namespace to +namespace+
1322
+ */
1323
+ static VALUE
1324
+ set_namespace(VALUE self, VALUE namespace)
1325
+ {
1326
+ xmlNodePtr node;
1327
+ xmlNsPtr ns = NULL;
1328
+
1329
+ Noko_Node_Get_Struct(self, xmlNode, node);
1330
+
1331
+ if (!NIL_P(namespace)) {
1332
+ Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
1333
+ }
1334
+
1335
+ xmlSetNs(node, ns);
1336
+
1337
+ return self;
1338
+ }
1339
+
1340
+ /*
1341
+ * :call-seq:
1342
+ * namespace() → Namespace
1343
+ *
1344
+ * [Returns] The Namespace of the element or attribute node, or +nil+ if there is no namespace.
1345
+ *
1346
+ * *Example:*
1347
+ *
1348
+ * doc = Nokogiri::XML(<<~EOF)
1349
+ * <root>
1350
+ * <first/>
1351
+ * <second xmlns="http://example.com/child"/>
1352
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1353
+ * </root>
1354
+ * EOF
1355
+ * doc.at_xpath("//first").namespace
1356
+ * # => nil
1357
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace
1358
+ * # => #(Namespace:0x3c { href = "http://example.com/child" })
1359
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace
1360
+ * # => #(Namespace:0x50 { prefix = "foo", href = "http://example.com/foo" })
1361
+ */
1362
+ static VALUE
1363
+ rb_xml_node_namespace(VALUE rb_node)
1364
+ {
1365
+ xmlNodePtr c_node ;
1366
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1367
+
1368
+ if (c_node->ns) {
1369
+ return noko_xml_namespace_wrap(c_node->ns, c_node->doc);
1370
+ }
1371
+
1372
+ return Qnil ;
1373
+ }
1374
+
1375
+ /*
1376
+ * :call-seq:
1377
+ * namespace_definitions() → Array<Nokogiri::XML::Namespace>
1378
+ *
1379
+ * [Returns]
1380
+ * Namespaces that are defined directly on this node, as an Array of Namespace objects. The array
1381
+ * will be empty if no namespaces are defined on this node.
1382
+ *
1383
+ * *Example:*
1384
+ *
1385
+ * doc = Nokogiri::XML(<<~EOF)
1386
+ * <root xmlns="http://example.com/root">
1387
+ * <first/>
1388
+ * <second xmlns="http://example.com/child" xmlns:unused="http://example.com/unused"/>
1389
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1390
+ * </root>
1391
+ * EOF
1392
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_definitions
1393
+ * # => []
1394
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace_definitions
1395
+ * # => [#(Namespace:0x3c { href = "http://example.com/child" }),
1396
+ * # #(Namespace:0x50 {
1397
+ * # prefix = "unused",
1398
+ * # href = "http://example.com/unused"
1399
+ * # })]
1400
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace_definitions
1401
+ * # => [#(Namespace:0x64 { prefix = "foo", href = "http://example.com/foo" })]
1402
+ */
1403
+ static VALUE
1404
+ namespace_definitions(VALUE rb_node)
1405
+ {
1406
+ /* this code in the mode of xmlHasProp() */
1407
+ xmlNodePtr c_node ;
1408
+ xmlNsPtr c_namespace;
1409
+ VALUE definitions = rb_ary_new();
1410
+
1411
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1412
+
1413
+ c_namespace = c_node->nsDef;
1414
+ if (!c_namespace) {
1415
+ return definitions;
1416
+ }
1417
+
1418
+ while (c_namespace != NULL) {
1419
+ rb_ary_push(definitions, noko_xml_namespace_wrap(c_namespace, c_node->doc));
1420
+ c_namespace = c_namespace->next;
1421
+ }
1422
+
1423
+ return definitions;
1424
+ }
1425
+
1426
+ /*
1427
+ * :call-seq:
1428
+ * namespace_scopes() → Array<Nokogiri::XML::Namespace>
1429
+ *
1430
+ * [Returns] Array of all the Namespaces on this node and its ancestors.
1431
+ *
1432
+ * See also #namespaces
1433
+ *
1434
+ * *Example:*
1435
+ *
1436
+ * doc = Nokogiri::XML(<<~EOF)
1437
+ * <root xmlns="http://example.com/root" xmlns:bar="http://example.com/bar">
1438
+ * <first/>
1439
+ * <second xmlns="http://example.com/child"/>
1440
+ * <third xmlns:foo="http://example.com/foo"/>
1441
+ * </root>
1442
+ * EOF
1443
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes
1444
+ * # => [#(Namespace:0x3c { href = "http://example.com/root" }),
1445
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1446
+ * doc.at_xpath("//child:second", "child" => "http://example.com/child").namespace_scopes
1447
+ * # => [#(Namespace:0x64 { href = "http://example.com/child" }),
1448
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1449
+ * doc.at_xpath("//root:third", "root" => "http://example.com/root").namespace_scopes
1450
+ * # => [#(Namespace:0x78 { prefix = "foo", href = "http://example.com/foo" }),
1451
+ * # #(Namespace:0x3c { href = "http://example.com/root" }),
1452
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1453
+ */
1454
+ static VALUE
1455
+ rb_xml_node_namespace_scopes(VALUE rb_node)
1456
+ {
1457
+ xmlNodePtr c_node ;
1458
+ xmlNsPtr *namespaces;
1459
+ VALUE scopes = rb_ary_new();
1460
+ int j;
1461
+
1462
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1463
+
1464
+ namespaces = xmlGetNsList(c_node->doc, c_node);
1465
+ if (!namespaces) {
1466
+ return scopes;
1467
+ }
1468
+
1469
+ for (j = 0 ; namespaces[j] != NULL ; ++j) {
1470
+ rb_ary_push(scopes, noko_xml_namespace_wrap(namespaces[j], c_node->doc));
1471
+ }
1472
+
1473
+ xmlFree(namespaces);
1474
+ return scopes;
1475
+ }
1476
+
1477
+ /*
1478
+ * call-seq:
1479
+ * node_type
1480
+ *
1481
+ * Get the type for this Node
1482
+ */
1483
+ static VALUE
1484
+ node_type(VALUE self)
1485
+ {
1486
+ xmlNodePtr node;
1487
+ Noko_Node_Get_Struct(self, xmlNode, node);
1488
+ return INT2NUM(node->type);
1489
+ }
1490
+
1491
+ /*
1492
+ * call-seq:
1493
+ * native_content=(input)
1494
+ *
1495
+ * Set the content of this node to +input+.
1496
+ *
1497
+ * [Parameters]
1498
+ * - +input+ (String) The new content for this node.
1499
+ *
1500
+ * ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
1501
+ * ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
1502
+ * will contain the entity-escaped version of the input (see example below). For Element and Attr
1503
+ * nodes, it treats the input as parsed content and expects it to be valid markup that is already
1504
+ * entity-escaped.
1505
+ *
1506
+ * 💡 Use Node#content= for a more consistent API across node types.
1507
+ *
1508
+ * [Example]
1509
+ * Note the behavior differences of this method between Text and Element nodes:
1510
+ *
1511
+ * doc = Nokogiri::HTML::Document.parse(<<~HTML)
1512
+ * <html>
1513
+ * <body>
1514
+ * <div id="first">asdf</div>
1515
+ * <div id="second">asdf</div>
1516
+ * HTML
1517
+ *
1518
+ * text_node = doc.at_css("div#first").children.first
1519
+ * div_node = doc.at_css("div#second")
1520
+ *
1521
+ * value = "You &amp; Me"
1522
+ *
1523
+ * text_node.native_content = value
1524
+ * div_node.native_content = value
1525
+ *
1526
+ * doc.css("div").to_html
1527
+ * # => "<div id=\"first\">You &amp;amp; Me</div>
1528
+ * # <div id=\"second\">You &amp; Me</div>"
1529
+ *
1530
+ * See also: #content=
1531
+ */
1532
+ static VALUE
1533
+ set_native_content(VALUE self, VALUE content)
1534
+ {
1535
+ xmlNodePtr node, child, next ;
1536
+ Noko_Node_Get_Struct(self, xmlNode, node);
1537
+
1538
+ child = node->children;
1539
+ while (NULL != child) {
1540
+ next = child->next ;
1541
+ xmlUnlinkNode(child) ;
1542
+ noko_xml_document_pin_node(child);
1543
+ child = next ;
1544
+ }
1545
+
1546
+ xmlNodeSetContent(node, (xmlChar *)StringValueCStr(content));
1547
+ return content;
1548
+ }
1549
+
1550
+ /*
1551
+ * call-seq:
1552
+ * lang=
1553
+ *
1554
+ * Set the language of a node, i.e. the values of the xml:lang attribute.
1555
+ */
1556
+ static VALUE
1557
+ set_lang(VALUE self_rb, VALUE lang_rb)
1558
+ {
1559
+ xmlNodePtr self ;
1560
+ xmlChar *lang ;
1561
+
1562
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1563
+ lang = (xmlChar *)StringValueCStr(lang_rb);
1564
+
1565
+ xmlNodeSetLang(self, lang);
1566
+
1567
+ return Qnil ;
1568
+ }
1569
+
1570
+ /*
1571
+ * call-seq:
1572
+ * lang
1573
+ *
1574
+ * Searches the language of a node, i.e. the values of the xml:lang attribute or
1575
+ * the one carried by the nearest ancestor.
1576
+ */
1577
+ static VALUE
1578
+ get_lang(VALUE self_rb)
1579
+ {
1580
+ xmlNodePtr self ;
1581
+ xmlChar *lang ;
1582
+ VALUE lang_rb ;
1583
+
1584
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1585
+
1586
+ lang = xmlNodeGetLang(self);
1587
+ if (lang) {
1588
+ lang_rb = NOKOGIRI_STR_NEW2(lang);
1589
+ xmlFree(lang);
1590
+ return lang_rb ;
1591
+ }
1592
+
1593
+ return Qnil ;
1594
+ }
1595
+
1596
+ /* :nodoc: */
1597
+ static VALUE
1598
+ add_child(VALUE self, VALUE new_child)
1599
+ {
1600
+ return reparent_node_with(self, new_child, xmlAddChild);
1601
+ }
1602
+
1603
+ /*
1604
+ * call-seq:
1605
+ * parent
1606
+ *
1607
+ * Get the parent Node for this Node
1608
+ */
1609
+ static VALUE
1610
+ get_parent(VALUE self)
1611
+ {
1612
+ xmlNodePtr node, parent;
1613
+ Noko_Node_Get_Struct(self, xmlNode, node);
1614
+
1615
+ parent = node->parent;
1616
+ if (!parent) { return Qnil; }
1617
+
1618
+ return noko_xml_node_wrap(Qnil, parent) ;
1619
+ }
1620
+
1621
+ /*
1622
+ * call-seq:
1623
+ * name=(new_name)
1624
+ *
1625
+ * Set the name for this Node
1626
+ */
1627
+ static VALUE
1628
+ set_name(VALUE self, VALUE new_name)
1629
+ {
1630
+ xmlNodePtr node;
1631
+ Noko_Node_Get_Struct(self, xmlNode, node);
1632
+ xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name));
1633
+ return new_name;
1634
+ }
1635
+
1636
+ /*
1637
+ * call-seq:
1638
+ * name
1639
+ *
1640
+ * Returns the name for this Node
1641
+ */
1642
+ static VALUE
1643
+ get_name(VALUE self)
1644
+ {
1645
+ xmlNodePtr node;
1646
+ Noko_Node_Get_Struct(self, xmlNode, node);
1647
+ if (node->name) {
1648
+ return NOKOGIRI_STR_NEW2(node->name);
1649
+ }
1650
+ return Qnil;
1651
+ }
1652
+
1653
+ /*
1654
+ * call-seq:
1655
+ * path
1656
+ *
1657
+ * Returns the path associated with this Node
1658
+ */
1659
+ static VALUE
1660
+ rb_xml_node_path(VALUE rb_node)
1661
+ {
1662
+ xmlNodePtr c_node;
1663
+ xmlChar *c_path ;
1664
+ VALUE rval;
1665
+
1666
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1667
+
1668
+ c_path = xmlGetNodePath(c_node);
1669
+ if (c_path == NULL) {
1670
+ // see https://github.com/sparklemotion/nokogiri/issues/2250
1671
+ // this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we
1672
+ // do this for now to preserve the behavior across libxml2 versions.
1673
+ rval = NOKOGIRI_STR_NEW2("?");
1674
+ } else {
1675
+ rval = NOKOGIRI_STR_NEW2(c_path);
1676
+ xmlFree(c_path);
1677
+ }
1678
+
1679
+ return rval ;
1680
+ }
1681
+
1682
+ /* :nodoc: */
1683
+ static VALUE
1684
+ add_next_sibling(VALUE self, VALUE new_sibling)
1685
+ {
1686
+ return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
1687
+ }
1688
+
1689
+ /* :nodoc: */
1690
+ static VALUE
1691
+ add_previous_sibling(VALUE self, VALUE new_sibling)
1692
+ {
1693
+ return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
1694
+ }
1695
+
1696
+ /*
1697
+ * call-seq:
1698
+ * native_write_to(io, encoding, options)
1699
+ *
1700
+ * Write this Node to +io+ with +encoding+ and +options+
1701
+ */
1702
+ static VALUE
1703
+ native_write_to(
1704
+ VALUE self,
1705
+ VALUE io,
1706
+ VALUE encoding,
1707
+ VALUE indent_string,
1708
+ VALUE options
1709
+ )
1710
+ {
1711
+ xmlNodePtr node;
1712
+ const char *before_indent;
1713
+ xmlSaveCtxtPtr savectx;
1714
+
1715
+ Noko_Node_Get_Struct(self, xmlNode, node);
1716
+
1717
+ xmlIndentTreeOutput = 1;
1718
+
1719
+ before_indent = xmlTreeIndentString;
1720
+
1721
+ xmlTreeIndentString = StringValueCStr(indent_string);
1722
+
1723
+ savectx = xmlSaveToIO(
1724
+ (xmlOutputWriteCallback)noko_io_write,
1725
+ (xmlOutputCloseCallback)noko_io_close,
1726
+ (void *)io,
1727
+ RTEST(encoding) ? StringValueCStr(encoding) : NULL,
1728
+ (int)NUM2INT(options)
1729
+ );
1730
+
1731
+ xmlSaveTree(savectx, node);
1732
+ xmlSaveClose(savectx);
1733
+
1734
+ xmlTreeIndentString = before_indent;
1735
+ return io;
1736
+ }
1737
+
1738
+
1739
+ static inline void
1740
+ output_partial_string(VALUE out, char const *str, size_t length)
1741
+ {
1742
+ if (length) {
1743
+ rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
1744
+ }
1745
+ }
1746
+
1747
+ static inline void
1748
+ output_char(VALUE out, char ch)
1749
+ {
1750
+ output_partial_string(out, &ch, 1);
1751
+ }
1752
+
1753
+ static inline void
1754
+ output_string(VALUE out, char const *str)
1755
+ {
1756
+ output_partial_string(out, str, strlen(str));
1757
+ }
1758
+
1759
+ static inline void
1760
+ output_tagname(VALUE out, xmlNodePtr elem)
1761
+ {
1762
+ // Elements in the HTML, MathML, and SVG namespaces do not use a namespace
1763
+ // prefix in the HTML syntax.
1764
+ char const *name = (char const *)elem->name;
1765
+ xmlNsPtr ns = elem->ns;
1766
+ if (ns && ns->href && ns->prefix
1767
+ && strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
1768
+ && strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
1769
+ && strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
1770
+ output_string(out, (char const *)elem->ns->prefix);
1771
+ output_char(out, ':');
1772
+ char const *colon = strchr(name, ':');
1773
+ if (colon) {
1774
+ name = colon + 1;
1775
+ }
1776
+ }
1777
+ output_string(out, name);
1778
+ }
1779
+
1780
+ static inline void
1781
+ output_attr_name(VALUE out, xmlAttrPtr attr)
1782
+ {
1783
+ xmlNsPtr ns = attr->ns;
1784
+ char const *name = (char const *)attr->name;
1785
+ if (ns && ns->href) {
1786
+ char const *uri = (char const *)ns->href;
1787
+ char const *localname = strchr(name, ':');
1788
+ if (localname) {
1789
+ ++localname;
1790
+ } else {
1791
+ localname = name;
1792
+ }
1793
+
1794
+ if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
1795
+ output_string(out, "xml:");
1796
+ name = localname;
1797
+ } else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
1798
+ // xmlns:xmlns -> xmlns
1799
+ // xmlns:foo -> xmlns:foo
1800
+ if (strcmp(localname, "xmlns")) {
1801
+ output_string(out, "xmlns:");
1802
+ }
1803
+ name = localname;
1804
+ } else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
1805
+ output_string(out, "xlink:");
1806
+ name = localname;
1807
+ } else if (ns->prefix) {
1808
+ output_string(out, (char const *)ns->prefix);
1809
+ output_char(out, ':');
1810
+ name = localname;
1811
+ }
1812
+ }
1813
+ output_string(out, name);
1814
+ }
1815
+
1816
+ static void
1817
+ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1818
+ {
1819
+ xmlChar const *next = start;
1820
+ int ch;
1821
+
1822
+ while ((ch = *next) != 0) {
1823
+ char const *replacement = NULL;
1824
+ size_t replaced_bytes = 1;
1825
+ if (ch == '&') {
1826
+ replacement = "&amp;";
1827
+ } else if (ch == 0xC2 && next[1] == 0xA0) {
1828
+ // U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
1829
+ replacement = "&nbsp;";
1830
+ replaced_bytes = 2;
1831
+ } else if (attr && ch == '"') {
1832
+ replacement = "&quot;";
1833
+ } else if (!attr && ch == '<') {
1834
+ replacement = "&lt;";
1835
+ } else if (!attr && ch == '>') {
1836
+ replacement = "&gt;";
1837
+ } else {
1838
+ ++next;
1839
+ continue;
1840
+ }
1841
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1842
+ output_string(out, replacement);
1843
+ next += replaced_bytes;
1844
+ start = next;
1845
+ }
1846
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1847
+ }
1848
+
1849
+ static bool
1850
+ should_prepend_newline(xmlNodePtr node)
1851
+ {
1852
+ char const *name = (char const *)node->name;
1853
+ xmlNodePtr child = node->children;
1854
+
1855
+ if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
1856
+ return false;
1857
+ }
1858
+
1859
+ return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
1860
+ }
1861
+
1862
+ static VALUE
1863
+ rb_prepend_newline(VALUE self)
1864
+ {
1865
+ xmlNodePtr node;
1866
+ Noko_Node_Get_Struct(self, xmlNode, node);
1867
+ return should_prepend_newline(node) ? Qtrue : Qfalse;
1868
+ }
1869
+
1870
+ static bool
1871
+ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1872
+ {
1873
+ char const *name = (char const *)node->name;
1874
+ if (name == NULL) { // fragments don't have a name
1875
+ return false;
1876
+ }
1877
+
1878
+ if (node->ns != NULL) {
1879
+ // if the node has a namespace, it's in a foreign context and is not one of the HTML tags we're
1880
+ // matching against.
1881
+ return false;
1882
+ }
1883
+
1884
+ for (size_t idx = 0; idx < num_tagnames; ++idx) {
1885
+ if (!strcmp(name, tagnames[idx])) {
1886
+ return true;
1887
+ }
1888
+ }
1889
+ return false;
1890
+ }
1891
+
1892
+ static void
1893
+ output_node(
1894
+ VALUE out,
1895
+ xmlNodePtr node,
1896
+ bool preserve_newline
1897
+ )
1898
+ {
1899
+ static char const *const VOID_ELEMENTS[] = {
1900
+ "area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
1901
+ "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
1902
+ };
1903
+
1904
+ static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
1905
+ "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
1906
+ };
1907
+
1908
+ switch (node->type) {
1909
+ case XML_ELEMENT_NODE:
1910
+ // Serialize the start tag.
1911
+ output_char(out, '<');
1912
+ output_tagname(out, node);
1913
+
1914
+ // Add attributes.
1915
+ for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1916
+ output_char(out, ' ');
1917
+ output_node(out, (xmlNodePtr)attr, preserve_newline);
1918
+ }
1919
+ output_char(out, '>');
1920
+
1921
+ // Add children and end tag if element is not void.
1922
+ if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
1923
+ if (preserve_newline && should_prepend_newline(node)) {
1924
+ output_char(out, '\n');
1925
+ }
1926
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1927
+ output_node(out, child, preserve_newline);
1928
+ }
1929
+ output_string(out, "</");
1930
+ output_tagname(out, node);
1931
+ output_char(out, '>');
1932
+ }
1933
+ break;
1934
+
1935
+ case XML_ATTRIBUTE_NODE: {
1936
+ xmlAttrPtr attr = (xmlAttrPtr)node;
1937
+ output_attr_name(out, attr);
1938
+ if (attr->children) {
1939
+ output_string(out, "=\"");
1940
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1941
+ output_escaped_string(out, value, true);
1942
+ xmlFree(value);
1943
+ output_char(out, '"');
1944
+ } else {
1945
+ // Output name=""
1946
+ output_string(out, "=\"\"");
1947
+ }
1948
+ }
1949
+ break;
1950
+
1951
+ case XML_TEXT_NODE:
1952
+ if (node->parent
1953
+ && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
1954
+ sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
1955
+ output_string(out, (char const *)node->content);
1956
+ } else {
1957
+ output_escaped_string(out, node->content, false);
1958
+ }
1959
+ break;
1960
+
1961
+ case XML_CDATA_SECTION_NODE:
1962
+ output_string(out, "<![CDATA[");
1963
+ output_string(out, (char const *)node->content);
1964
+ output_string(out, "]]>");
1965
+ break;
1966
+
1967
+ case XML_COMMENT_NODE:
1968
+ output_string(out, "<!--");
1969
+ output_string(out, (char const *)node->content);
1970
+ output_string(out, "-->");
1971
+ break;
1972
+
1973
+ case XML_PI_NODE:
1974
+ output_string(out, "<?");
1975
+ output_string(out, (char const *)node->content);
1976
+ output_char(out, '>');
1977
+ break;
1978
+
1979
+ case XML_DOCUMENT_TYPE_NODE:
1980
+ case XML_DTD_NODE:
1981
+ output_string(out, "<!DOCTYPE ");
1982
+ output_string(out, (char const *)node->name);
1983
+ output_string(out, ">");
1984
+ break;
1985
+
1986
+ case XML_DOCUMENT_NODE:
1987
+ case XML_DOCUMENT_FRAG_NODE:
1988
+ case XML_HTML_DOCUMENT_NODE:
1989
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1990
+ output_node(out, child, preserve_newline);
1991
+ }
1992
+ break;
1993
+
1994
+ default:
1995
+ rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
1996
+ break;
1997
+ }
1998
+ }
1999
+
2000
+ static VALUE
2001
+ html_standard_serialize(
2002
+ VALUE self,
2003
+ VALUE preserve_newline
2004
+ )
2005
+ {
2006
+ xmlNodePtr node;
2007
+ Noko_Node_Get_Struct(self, xmlNode, node);
2008
+ VALUE output = rb_str_buf_new(4096);
2009
+ output_node(output, node, RTEST(preserve_newline));
2010
+ return output;
2011
+ }
2012
+
2013
+ /*
2014
+ * :call-seq:
2015
+ * line() → Integer
2016
+ *
2017
+ * [Returns] The line number of this Node.
2018
+ *
2019
+ * ---
2020
+ *
2021
+ * <b> ⚠ The CRuby and JRuby implementations differ in important ways! </b>
2022
+ *
2023
+ * Semantic differences:
2024
+ * - The CRuby method reflects the node's line number <i>in the parsed string</i>
2025
+ * - The JRuby method reflects the node's line number <i>in the final DOM structure</i> after
2026
+ * corrections have been applied
2027
+ *
2028
+ * Performance differences:
2029
+ * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time]
2030
+ * (constant time)
2031
+ * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear
2032
+ * time, where n is the number of nodes before/above the element in the DOM)
2033
+ *
2034
+ * If you'd like to help improve the JRuby implementation, please review these issues and reach out
2035
+ * to the maintainers:
2036
+ * - https://github.com/sparklemotion/nokogiri/issues/1223
2037
+ * - https://github.com/sparklemotion/nokogiri/pull/2177
2038
+ * - https://github.com/sparklemotion/nokogiri/issues/2380
2039
+ */
2040
+ static VALUE
2041
+ rb_xml_node_line(VALUE rb_node)
2042
+ {
2043
+ xmlNodePtr c_node;
2044
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2045
+
2046
+ return LONG2NUM(xmlGetLineNo(c_node));
2047
+ }
2048
+
2049
+ /*
2050
+ * call-seq:
2051
+ * line=(num)
2052
+ *
2053
+ * Sets the line for this Node. num must be less than 65535.
2054
+ */
2055
+ static VALUE
2056
+ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
2057
+ {
2058
+ xmlNodePtr c_node;
2059
+ int line_number = NUM2INT(rb_line_number);
2060
+
2061
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2062
+
2063
+ // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2064
+ // search for "psvi" in SAX2.c and tree.c to learn more.
2065
+ if (line_number < 65535) {
2066
+ c_node->line = (short unsigned)line_number;
2067
+ } else {
2068
+ c_node->line = 65535;
2069
+ if (c_node->type == XML_TEXT_NODE) {
2070
+ c_node->psvi = (void *)(ptrdiff_t)line_number;
2071
+ }
2072
+ }
2073
+
2074
+ return rb_line_number;
2075
+ }
2076
+
2077
+ /* :nodoc: documented in lib/nokogiri/xml/node.rb */
2078
+ static VALUE
2079
+ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
2080
+ {
2081
+ xmlNodePtr c_document_node;
2082
+ xmlNodePtr c_node;
2083
+ VALUE rb_name;
2084
+ VALUE rb_document_node;
2085
+ VALUE rest;
2086
+ VALUE rb_node;
2087
+
2088
+ rb_scan_args(argc, argv, "2*", &rb_name, &rb_document_node, &rest);
2089
+
2090
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlNode)) {
2091
+ rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
2092
+ }
2093
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
2094
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
2095
+ }
2096
+ Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
2097
+
2098
+ c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name));
2099
+ c_node->doc = c_document_node->doc;
2100
+ noko_xml_document_pin_node(c_node);
2101
+
2102
+ rb_node = noko_xml_node_wrap(
2103
+ klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
2104
+ c_node
2105
+ );
2106
+ rb_obj_call_init(rb_node, argc, argv);
2107
+
2108
+ if (rb_block_given_p()) { rb_yield(rb_node); }
2109
+
2110
+ return rb_node;
2111
+ }
2112
+
2113
+ /*
2114
+ * call-seq:
2115
+ * dump_html
2116
+ *
2117
+ * Returns the Node as html.
2118
+ */
2119
+ static VALUE
2120
+ dump_html(VALUE self)
2121
+ {
2122
+ xmlBufferPtr buf ;
2123
+ xmlNodePtr node ;
2124
+ VALUE html;
2125
+
2126
+ Noko_Node_Get_Struct(self, xmlNode, node);
2127
+
2128
+ buf = xmlBufferCreate() ;
2129
+ htmlNodeDump(buf, node->doc, node);
2130
+ html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
2131
+ xmlBufferFree(buf);
2132
+ return html ;
2133
+ }
2134
+
2135
+ /*
2136
+ * call-seq:
2137
+ * compare(other)
2138
+ *
2139
+ * Compare this Node to +other+ with respect to their Document
2140
+ */
2141
+ static VALUE
2142
+ compare(VALUE self, VALUE _other)
2143
+ {
2144
+ xmlNodePtr node, other;
2145
+ Noko_Node_Get_Struct(self, xmlNode, node);
2146
+ Noko_Node_Get_Struct(_other, xmlNode, other);
2147
+
2148
+ return INT2NUM(xmlXPathCmpNodes(other, node));
2149
+ }
2150
+
2151
+
2152
+ /*
2153
+ * call-seq:
2154
+ * process_xincludes(flags)
2155
+ *
2156
+ * Loads and substitutes all xinclude elements below the node. The
2157
+ * parser context will be initialized with +flags+.
2158
+ */
2159
+ static VALUE
2160
+ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2161
+ {
2162
+ int status ;
2163
+ xmlNodePtr c_node;
2164
+ VALUE rb_errors = rb_ary_new();
2165
+ libxmlStructuredErrorHandlerState handler_state;
2166
+
2167
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2168
+
2169
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
2170
+
2171
+ status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
2172
+
2173
+ noko__structured_error_func_restore(&handler_state);
2174
+
2175
+ if (status < 0) {
2176
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
2177
+
2178
+ if (RB_TEST(exception)) {
2179
+ rb_exc_raise(exception);
2180
+ } else {
2181
+ rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
2182
+ }
2183
+ }
2184
+
2185
+ return rb_node;
2186
+ }
2187
+
2188
+
2189
+ /* TODO: DOCUMENT ME */
2190
+ static VALUE
2191
+ in_context(VALUE self, VALUE _str, VALUE _options)
2192
+ {
2193
+ xmlNodePtr node, list = 0, tmp, child_iter, node_children, doc_children;
2194
+ xmlNodeSetPtr set;
2195
+ xmlParserErrors error;
2196
+ VALUE doc, err;
2197
+ int doc_is_empty;
2198
+
2199
+ Noko_Node_Get_Struct(self, xmlNode, node);
2200
+
2201
+ doc = DOC_RUBY_OBJECT(node->doc);
2202
+ err = rb_iv_get(doc, "@errors");
2203
+ doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
2204
+ node_children = node->children;
2205
+ doc_children = node->doc->children;
2206
+
2207
+ xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
2208
+
2209
+ /* This function adds a fake node to the child of +node+. If the parser
2210
+ * does not exit cleanly with XML_ERR_OK, the list is freed. This can
2211
+ * leave the child pointers in a bad state if they were originally empty.
2212
+ *
2213
+ * http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
2214
+ * */
2215
+ error = xmlParseInNodeContext(node, StringValuePtr(_str),
2216
+ (int)RSTRING_LEN(_str),
2217
+ (int)NUM2INT(_options), &list);
2218
+
2219
+ /* xmlParseInNodeContext should not mutate the original document or node,
2220
+ * so reassigning these pointers should be OK. The reason we're reassigning
2221
+ * is because if there were errors, it's possible for the child pointers
2222
+ * to be manipulated. */
2223
+ if (error != XML_ERR_OK) {
2224
+ node->doc->children = doc_children;
2225
+ node->children = node_children;
2226
+ }
2227
+
2228
+ /* make sure parent/child pointers are coherent so an unlink will work
2229
+ * properly (#331)
2230
+ */
2231
+ child_iter = node->doc->children ;
2232
+ while (child_iter) {
2233
+ child_iter->parent = (xmlNodePtr)node->doc;
2234
+ child_iter = child_iter->next;
2235
+ }
2236
+
2237
+ xmlSetStructuredErrorFunc(NULL, NULL);
2238
+
2239
+ /*
2240
+ * Workaround for a libxml2 bug where a parsing error may leave a broken
2241
+ * node reference in node->doc->children.
2242
+ *
2243
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2244
+ *
2245
+ * This workaround is limited to when a parse error occurs, the document
2246
+ * went from having no children to having children, and the context node is
2247
+ * part of a document fragment.
2248
+ *
2249
+ * TODO: This was fixed in libxml 2.8.0 by 71a243d
2250
+ */
2251
+ if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
2252
+ child_iter = node;
2253
+ while (child_iter->parent) {
2254
+ child_iter = child_iter->parent;
2255
+ }
2256
+
2257
+ if (child_iter->type == XML_DOCUMENT_FRAG_NODE) {
2258
+ node->doc->children = NULL;
2259
+ }
2260
+ }
2261
+
2262
+ /* FIXME: This probably needs to handle more constants... */
2263
+ switch (error) {
2264
+ case XML_ERR_INTERNAL_ERROR:
2265
+ case XML_ERR_NO_MEMORY:
2266
+ rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
2267
+ break;
2268
+ default:
2269
+ break;
2270
+ }
2271
+
2272
+ set = xmlXPathNodeSetCreate(NULL);
2273
+
2274
+ while (list) {
2275
+ tmp = list->next;
2276
+ list->next = NULL;
2277
+ xmlXPathNodeSetAddUnique(set, list);
2278
+ noko_xml_document_pin_node(list);
2279
+ list = tmp;
2280
+ }
2281
+
2282
+ return noko_xml_node_set_wrap(set, doc);
2283
+ }
2284
+
2285
+ /* :nodoc: */
2286
+ VALUE
2287
+ rb_xml_node_data_ptr_eh(VALUE self)
2288
+ {
2289
+ xmlNodePtr c_node;
2290
+ Noko_Node_Get_Struct(self, xmlNode, c_node);
2291
+ return c_node ? Qtrue : Qfalse;
2292
+ }
2293
+
2294
+ VALUE
2295
+ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2296
+ {
2297
+ VALUE rb_document, rb_node_cache, rb_node;
2298
+ nokogiriTuplePtr node_has_a_document;
2299
+ xmlDocPtr c_doc;
2300
+
2301
+ assert(c_node);
2302
+
2303
+ if (c_node->type == XML_DOCUMENT_NODE || c_node->type == XML_HTML_DOCUMENT_NODE) {
2304
+ return DOC_RUBY_OBJECT(c_node->doc);
2305
+ }
2306
+
2307
+ c_doc = c_node->doc;
2308
+
2309
+ // Nodes yielded from XML::Reader don't have a fully-realized Document
2310
+ node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc);
2311
+
2312
+ if (c_node->_private && node_has_a_document) {
2313
+ return (VALUE)c_node->_private;
2314
+ }
2315
+
2316
+ if (!RTEST(rb_class)) {
2317
+ switch (c_node->type) {
2318
+ case XML_ELEMENT_NODE:
2319
+ rb_class = cNokogiriXmlElement;
2320
+ break;
2321
+ case XML_TEXT_NODE:
2322
+ rb_class = cNokogiriXmlText;
2323
+ break;
2324
+ case XML_ATTRIBUTE_NODE:
2325
+ rb_class = cNokogiriXmlAttr;
2326
+ break;
2327
+ case XML_ENTITY_REF_NODE:
2328
+ rb_class = cNokogiriXmlEntityReference;
2329
+ break;
2330
+ case XML_COMMENT_NODE:
2331
+ rb_class = cNokogiriXmlComment;
2332
+ break;
2333
+ case XML_DOCUMENT_FRAG_NODE:
2334
+ rb_class = cNokogiriXmlDocumentFragment;
2335
+ break;
2336
+ case XML_PI_NODE:
2337
+ rb_class = cNokogiriXmlProcessingInstruction;
2338
+ break;
2339
+ case XML_ENTITY_DECL:
2340
+ rb_class = cNokogiriXmlEntityDecl;
2341
+ break;
2342
+ case XML_CDATA_SECTION_NODE:
2343
+ rb_class = cNokogiriXmlCData;
2344
+ break;
2345
+ case XML_DTD_NODE:
2346
+ rb_class = cNokogiriXmlDtd;
2347
+ break;
2348
+ case XML_ATTRIBUTE_DECL:
2349
+ rb_class = cNokogiriXmlAttributeDecl;
2350
+ break;
2351
+ case XML_ELEMENT_DECL:
2352
+ rb_class = cNokogiriXmlElementDecl;
2353
+ break;
2354
+ default:
2355
+ rb_class = cNokogiriXmlNode;
2356
+ }
2357
+ }
2358
+
2359
+ rb_node = _xml_node_alloc(rb_class);
2360
+ _xml_node_data_ptr_set(rb_node, c_node);
2361
+
2362
+ if (node_has_a_document) {
2363
+ rb_document = DOC_RUBY_OBJECT(c_doc);
2364
+ rb_node_cache = DOC_NODE_CACHE(c_doc);
2365
+ rb_ary_push(rb_node_cache, rb_node);
2366
+ rb_funcall(rb_document, id_decorate, 1, rb_node);
2367
+ }
2368
+
2369
+ return rb_node ;
2370
+ }
2371
+
2372
+
2373
+ /*
2374
+ * return Array<Nokogiri::XML::Attr> containing the node's attributes
2375
+ */
2376
+ VALUE
2377
+ noko_xml_node_attrs(xmlNodePtr c_node)
2378
+ {
2379
+ VALUE rb_properties = rb_ary_new();
2380
+ xmlAttrPtr c_property;
2381
+
2382
+ c_property = c_node->properties ;
2383
+ while (c_property != NULL) {
2384
+ rb_ary_push(rb_properties, noko_xml_node_wrap(Qnil, (xmlNodePtr)c_property));
2385
+ c_property = c_property->next ;
2386
+ }
2387
+
2388
+ return rb_properties;
2389
+ }
2390
+
2391
+ void
2392
+ noko_init_xml_node(void)
2393
+ {
2394
+ cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2395
+
2396
+ rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
2397
+
2398
+ rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2399
+
2400
+ rb_define_method(cNokogiriXmlNode, "add_namespace_definition", rb_xml_node_add_namespace_definition, 2);
2401
+ rb_define_method(cNokogiriXmlNode, "attribute", rb_xml_node_attribute, 1);
2402
+ rb_define_method(cNokogiriXmlNode, "attribute_nodes", rb_xml_node_attribute_nodes, 0);
2403
+ rb_define_method(cNokogiriXmlNode, "attribute_with_ns", rb_xml_node_attribute_with_ns, 2);
2404
+ rb_define_method(cNokogiriXmlNode, "blank?", rb_xml_node_blank_eh, 0);
2405
+ rb_define_method(cNokogiriXmlNode, "child", rb_xml_node_child, 0);
2406
+ rb_define_method(cNokogiriXmlNode, "children", rb_xml_node_children, 0);
2407
+ rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2408
+ rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2409
+ rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2410
+ rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
2411
+ rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2412
+ rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2413
+ rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2414
+ rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
2415
+ rb_define_method(cNokogiriXmlNode, "first_element_child", rb_xml_node_first_element_child, 0);
2416
+ rb_define_method(cNokogiriXmlNode, "internal_subset", internal_subset, 0);
2417
+ rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1);
2418
+ rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0);
2419
+ rb_define_method(cNokogiriXmlNode, "lang=", set_lang, 1);
2420
+ rb_define_method(cNokogiriXmlNode, "last_element_child", rb_xml_node_last_element_child, 0);
2421
+ rb_define_method(cNokogiriXmlNode, "line", rb_xml_node_line, 0);
2422
+ rb_define_method(cNokogiriXmlNode, "line=", rb_xml_node_line_set, 1);
2423
+ rb_define_method(cNokogiriXmlNode, "namespace", rb_xml_node_namespace, 0);
2424
+ rb_define_method(cNokogiriXmlNode, "namespace_definitions", namespace_definitions, 0);
2425
+ rb_define_method(cNokogiriXmlNode, "namespace_scopes", rb_xml_node_namespace_scopes, 0);
2426
+ rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2);
2427
+ rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1);
2428
+ rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0);
2429
+ rb_define_method(cNokogiriXmlNode, "next_sibling", next_sibling, 0);
2430
+ rb_define_method(cNokogiriXmlNode, "node_name", get_name, 0);
2431
+ rb_define_method(cNokogiriXmlNode, "node_name=", set_name, 1);
2432
+ rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0);
2433
+ rb_define_method(cNokogiriXmlNode, "parent", get_parent, 0);
2434
+ rb_define_method(cNokogiriXmlNode, "path", rb_xml_node_path, 0);
2435
+ rb_define_method(cNokogiriXmlNode, "pointer_id", rb_xml_node_pointer_id, 0);
2436
+ rb_define_method(cNokogiriXmlNode, "previous_element", previous_element, 0);
2437
+ rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2438
+ rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2439
+
2440
+ rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
2441
+
2442
+ rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2443
+ rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2444
+ rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
2445
+ rb_define_private_method(cNokogiriXmlNode, "compare", compare, 1);
2446
+ rb_define_private_method(cNokogiriXmlNode, "dump_html", dump_html, 0);
2447
+ rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
2448
+ rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
2449
+ rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2450
+ rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2451
+ rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2452
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
2453
+ rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2454
+ rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2455
+ rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
2456
+
2457
+ id_decorate = rb_intern("decorate");
2458
+ id_decorate_bang = rb_intern("decorate!");
2459
+ }