nokogiri 1.10.7 → 1.16.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (224) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +42 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +188 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +862 -421
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +222 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +39 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +408 -243
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +131 -61
  33. data/ext/nokogiri/xml_node.c +1343 -674
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +305 -213
  37. data/ext/nokogiri/xml_relax_ng.c +87 -78
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +149 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +65 -37
  41. data/ext/nokogiri/xml_schema.c +138 -82
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +35 -26
  44. data/ext/nokogiri/xml_xpath_context.c +363 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +126 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3464 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +5 -3
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +205 -96
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +326 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +224 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +75 -34
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -127
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1096 -419
  142. data/lib/nokogiri/xml/node_set.rb +137 -61
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +7 -5
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +39 -38
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  171. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  172. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  173. data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
  174. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  175. metadata +121 -291
  176. data/ext/nokogiri/html_document.c +0 -170
  177. data/ext/nokogiri/html_document.h +0 -10
  178. data/ext/nokogiri/html_element_description.c +0 -279
  179. data/ext/nokogiri/html_element_description.h +0 -10
  180. data/ext/nokogiri/html_entity_lookup.c +0 -32
  181. data/ext/nokogiri/html_entity_lookup.h +0 -8
  182. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  183. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  184. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  185. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  186. data/ext/nokogiri/xml_attr.h +0 -9
  187. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  188. data/ext/nokogiri/xml_cdata.h +0 -9
  189. data/ext/nokogiri/xml_comment.h +0 -9
  190. data/ext/nokogiri/xml_document.h +0 -23
  191. data/ext/nokogiri/xml_document_fragment.h +0 -10
  192. data/ext/nokogiri/xml_dtd.h +0 -10
  193. data/ext/nokogiri/xml_element_content.h +0 -10
  194. data/ext/nokogiri/xml_element_decl.h +0 -9
  195. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  196. data/ext/nokogiri/xml_entity_decl.h +0 -10
  197. data/ext/nokogiri/xml_entity_reference.h +0 -9
  198. data/ext/nokogiri/xml_io.c +0 -61
  199. data/ext/nokogiri/xml_io.h +0 -11
  200. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  201. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  202. data/ext/nokogiri/xml_namespace.h +0 -14
  203. data/ext/nokogiri/xml_node.h +0 -13
  204. data/ext/nokogiri/xml_node_set.h +0 -12
  205. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  206. data/ext/nokogiri/xml_reader.h +0 -10
  207. data/ext/nokogiri/xml_relax_ng.h +0 -9
  208. data/ext/nokogiri/xml_sax_parser.h +0 -39
  209. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  210. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  211. data/ext/nokogiri/xml_schema.h +0 -9
  212. data/ext/nokogiri/xml_syntax_error.h +0 -13
  213. data/ext/nokogiri/xml_text.h +0 -9
  214. data/ext/nokogiri/xml_xpath_context.h +0 -10
  215. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  216. data/lib/nokogiri/html/document.rb +0 -335
  217. data/lib/nokogiri/html/document_fragment.rb +0 -49
  218. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  219. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  220. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  221. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  222. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  223. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  224. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,36 +1,57 @@
1
- #include <xml_node.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static ID decorate, decorate_bang;
3
+ #include <stdbool.h>
4
4
 
5
- #ifdef DEBUG
6
- static void debug_node_dealloc(xmlNodePtr x)
7
- {
8
- NOKOGIRI_DEBUG_START(x)
9
- NOKOGIRI_DEBUG_END(x)
10
- }
11
- #else
12
- # define debug_node_dealloc 0
13
- #endif
5
+ // :stopdoc:
6
+
7
+ VALUE cNokogiriXmlNode ;
8
+ static ID id_decorate, id_decorate_bang;
14
9
 
15
- static void mark(xmlNodePtr node)
10
+ typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
11
+
12
+ static void
13
+ _xml_node_mark(void *ptr)
16
14
  {
15
+ xmlNodePtr node = ptr;
16
+
17
+ if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
18
+ return;
19
+ }
20
+
17
21
  xmlDocPtr doc = node->doc;
18
- if(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
19
- if(DOC_RUBY_OBJECT_TEST(doc)) {
22
+ if (doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
23
+ if (DOC_RUBY_OBJECT_TEST(doc)) {
20
24
  rb_gc_mark(DOC_RUBY_OBJECT(doc));
21
25
  }
22
- } else if(node->doc->_private) {
26
+ } else if (node->doc->_private) {
23
27
  rb_gc_mark((VALUE)doc->_private);
24
28
  }
25
29
  }
26
30
 
27
- /* :nodoc: */
28
- typedef xmlNodePtr (*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
31
+ static void
32
+ _xml_node_update_references(void *ptr)
33
+ {
34
+ xmlNodePtr node = ptr;
29
35
 
30
- /* :nodoc: */
31
- static void relink_namespace(xmlNodePtr reparented)
36
+ if (node->_private) {
37
+ node->_private = (void *)rb_gc_location((VALUE)node->_private);
38
+ }
39
+ }
40
+
41
+ static const rb_data_type_t nokogiri_node_type = {
42
+ .wrap_struct_name = "Nokogiri::XML::Node",
43
+ .function = {
44
+ .dmark = _xml_node_mark,
45
+ .dcompact = _xml_node_update_references,
46
+ },
47
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
48
+ };
49
+
50
+ static void
51
+ relink_namespace(xmlNodePtr reparented)
32
52
  {
33
53
  xmlNodePtr child;
54
+ xmlAttrPtr attr;
34
55
 
35
56
  if (reparented->type != XML_ATTRIBUTE_NODE &&
36
57
  reparented->type != XML_ELEMENT_NODE) { return; }
@@ -42,7 +63,7 @@ static void relink_namespace(xmlNodePtr reparented)
42
63
  name = xmlSplitQName2(reparented->name, &prefix);
43
64
 
44
65
  if (reparented->type == XML_ATTRIBUTE_NODE) {
45
- if (prefix == NULL || strcmp((char*)prefix, XMLNS_PREFIX) == 0) {
66
+ if (prefix == NULL || strcmp((char *)prefix, XMLNS_PREFIX) == 0) {
46
67
  xmlFree(name);
47
68
  xmlFree(prefix);
48
69
  return;
@@ -64,7 +85,9 @@ static void relink_namespace(xmlNodePtr reparented)
64
85
  if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; }
65
86
 
66
87
  /* Make sure that our reparented node has the correct namespaces */
67
- if (!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent) {
88
+ if (!reparented->ns &&
89
+ (reparented->doc != (xmlDocPtr)reparented->parent) &&
90
+ (rb_iv_get(DOC_RUBY_OBJECT(reparented->doc), "@namespace_inheritance") == Qtrue)) {
68
91
  xmlSetNs(reparented, reparented->parent->ns);
69
92
  }
70
93
 
@@ -87,7 +110,7 @@ static void relink_namespace(xmlNodePtr reparented)
87
110
  } else {
88
111
  reparented->nsDef = curr->next;
89
112
  }
90
- nokogiri_root_nsdef(curr, reparented->doc);
113
+ noko_xml_document_pin_namespace(curr, reparented->doc);
91
114
  } else {
92
115
  prev = curr;
93
116
  }
@@ -127,16 +150,19 @@ static void relink_namespace(xmlNodePtr reparented)
127
150
  }
128
151
 
129
152
  if (reparented->type == XML_ELEMENT_NODE) {
130
- child = (xmlNodePtr)((xmlElementPtr)reparented)->attributes;
131
- while(NULL != child) {
132
- relink_namespace(child);
133
- child = child->next;
153
+ attr = reparented->properties;
154
+ while (NULL != attr) {
155
+ relink_namespace((xmlNodePtr)attr);
156
+ attr = attr->next;
134
157
  }
135
158
  }
136
159
  }
137
160
 
138
- /* :nodoc: */
139
- static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
161
+
162
+ /* internal function meant to wrap xmlReplaceNode
163
+ and fix some issues we have with libxml2 merging nodes */
164
+ static xmlNodePtr
165
+ xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
140
166
  {
141
167
  xmlNodePtr retval ;
142
168
 
@@ -159,22 +185,34 @@ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
159
185
  return retval ;
160
186
  }
161
187
 
162
- /* :nodoc: */
163
- static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
188
+
189
+ static void
190
+ raise_if_ancestor_of_self(xmlNodePtr self)
191
+ {
192
+ for (xmlNodePtr ancestor = self->parent ; ancestor ; ancestor = ancestor->parent) {
193
+ if (self == ancestor) {
194
+ rb_raise(rb_eRuntimeError, "cycle detected: node '%s' is an ancestor of itself", self->name);
195
+ }
196
+ }
197
+ }
198
+
199
+
200
+ static VALUE
201
+ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
164
202
  {
165
203
  VALUE reparented_obj ;
166
- xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text, parent ;
204
+ xmlNodePtr reparentee, original_reparentee, pivot, reparented, next_text, new_next_text, parent ;
167
205
  int original_ns_prefix_is_default = 0 ;
168
206
 
169
- if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) {
207
+ if (!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) {
170
208
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
171
209
  }
172
- if(rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument)) {
210
+ if (rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument)) {
173
211
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
174
212
  }
175
213
 
176
- Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
177
- Data_Get_Struct(pivot_obj, xmlNode, pivot);
214
+ Noko_Node_Get_Struct(reparentee_obj, xmlNode, reparentee);
215
+ Noko_Node_Get_Struct(pivot_obj, xmlNode, pivot);
178
216
 
179
217
  /*
180
218
  * Check if nodes given are appropriate to have a parent-child
@@ -190,66 +228,66 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
190
228
 
191
229
  if (parent) {
192
230
  switch (parent->type) {
193
- case XML_DOCUMENT_NODE:
194
- case XML_HTML_DOCUMENT_NODE:
195
- switch (reparentee->type) {
196
- case XML_ELEMENT_NODE:
197
- case XML_PI_NODE:
198
- case XML_COMMENT_NODE:
199
- case XML_DOCUMENT_TYPE_NODE:
200
- /*
201
- * The DOM specification says no to adding text-like nodes
202
- * directly to a document, but we allow it for compatibility.
203
- */
204
- case XML_TEXT_NODE:
205
- case XML_CDATA_SECTION_NODE:
206
- case XML_ENTITY_REF_NODE:
207
- goto ok;
208
- default:
231
+ case XML_DOCUMENT_NODE:
232
+ case XML_HTML_DOCUMENT_NODE:
233
+ switch (reparentee->type) {
234
+ case XML_ELEMENT_NODE:
235
+ case XML_PI_NODE:
236
+ case XML_COMMENT_NODE:
237
+ case XML_DOCUMENT_TYPE_NODE:
238
+ /*
239
+ * The DOM specification says no to adding text-like nodes
240
+ * directly to a document, but we allow it for compatibility.
241
+ */
242
+ case XML_TEXT_NODE:
243
+ case XML_CDATA_SECTION_NODE:
244
+ case XML_ENTITY_REF_NODE:
245
+ goto ok;
246
+ default:
247
+ break;
248
+ }
209
249
  break;
210
- }
211
- break;
212
- case XML_DOCUMENT_FRAG_NODE:
213
- case XML_ENTITY_REF_NODE:
214
- case XML_ELEMENT_NODE:
215
- switch (reparentee->type) {
216
- case XML_ELEMENT_NODE:
217
- case XML_PI_NODE:
218
- case XML_COMMENT_NODE:
219
- case XML_TEXT_NODE:
220
- case XML_CDATA_SECTION_NODE:
250
+ case XML_DOCUMENT_FRAG_NODE:
221
251
  case XML_ENTITY_REF_NODE:
222
- goto ok;
223
- default:
252
+ case XML_ELEMENT_NODE:
253
+ switch (reparentee->type) {
254
+ case XML_ELEMENT_NODE:
255
+ case XML_PI_NODE:
256
+ case XML_COMMENT_NODE:
257
+ case XML_TEXT_NODE:
258
+ case XML_CDATA_SECTION_NODE:
259
+ case XML_ENTITY_REF_NODE:
260
+ goto ok;
261
+ default:
262
+ break;
263
+ }
264
+ break;
265
+ case XML_ATTRIBUTE_NODE:
266
+ switch (reparentee->type) {
267
+ case XML_TEXT_NODE:
268
+ case XML_ENTITY_REF_NODE:
269
+ goto ok;
270
+ default:
271
+ break;
272
+ }
224
273
  break;
225
- }
226
- break;
227
- case XML_ATTRIBUTE_NODE:
228
- switch (reparentee->type) {
229
274
  case XML_TEXT_NODE:
230
- case XML_ENTITY_REF_NODE:
231
- goto ok;
275
+ /*
276
+ * xmlAddChild() breaks the DOM specification in that it allows
277
+ * adding a text node to another, in which case text nodes are
278
+ * coalesced, but since our JRuby version does not support such
279
+ * operation, we should inhibit it.
280
+ */
281
+ break;
232
282
  default:
233
283
  break;
234
- }
235
- break;
236
- case XML_TEXT_NODE:
237
- /*
238
- * xmlAddChild() breaks the DOM specification in that it allows
239
- * adding a text node to another, in which case text nodes are
240
- * coalesced, but since our JRuby version does not support such
241
- * operation, we should inhibit it.
242
- */
243
- break;
244
- default:
245
- break;
246
284
  }
247
285
 
248
286
  rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj));
249
287
  }
250
288
 
251
289
  ok:
252
- xmlUnlinkNode(reparentee);
290
+ original_reparentee = reparentee;
253
291
 
254
292
  if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
255
293
  /*
@@ -290,7 +328,7 @@ ok:
290
328
  original_ns_prefix_is_default = 1;
291
329
  }
292
330
 
293
- nokogiri_root_node(reparentee);
331
+ noko_xml_document_pin_node(reparentee);
294
332
 
295
333
  if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
296
334
  rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
@@ -301,12 +339,14 @@ ok:
301
339
  * issue #391, where new node's prefix may become the string "default"
302
340
  * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior.
303
341
  */
304
- xmlFree(reparentee->ns->prefix);
342
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(reparentee->ns->prefix));
305
343
  reparentee->ns->prefix = NULL;
306
344
  }
307
345
  }
308
346
 
309
- if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
347
+ xmlUnlinkNode(original_reparentee);
348
+
349
+ if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling && prf != xmlAddChild
310
350
  && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
311
351
  /*
312
352
  * libxml merges text nodes in a right-to-left fashion, meaning that if
@@ -330,12 +370,12 @@ ok:
330
370
  new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;
331
371
 
332
372
  xmlUnlinkNode(next_text);
333
- nokogiri_root_node(next_text);
373
+ noko_xml_document_pin_node(next_text);
334
374
 
335
375
  xmlAddNextSibling(pivot, new_next_text);
336
376
  }
337
377
 
338
- if(!(reparented = (*prf)(pivot, reparentee))) {
378
+ if (!(reparented = (*prf)(pivot, reparentee))) {
339
379
  rb_raise(rb_eRuntimeError, "Could not reparent node");
340
380
  }
341
381
 
@@ -345,57 +385,432 @@ ok:
345
385
  * adjacent text nodes.
346
386
  */
347
387
  DATA_PTR(reparentee_obj) = reparented ;
388
+ reparented_obj = noko_xml_node_wrap(Qnil, reparented);
348
389
 
349
- relink_namespace(reparented);
390
+ rb_funcall(reparented_obj, id_decorate_bang, 0);
350
391
 
351
- reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);
392
+ /* if we've created a cycle, raise an exception */
393
+ raise_if_ancestor_of_self(reparented);
352
394
 
353
- rb_funcall(reparented_obj, decorate_bang, 0);
395
+ relink_namespace(reparented);
354
396
 
355
397
  return reparented_obj ;
356
398
  }
357
399
 
400
+ // :startdoc:
358
401
 
359
402
  /*
360
- * call-seq:
361
- * document
403
+ * :call-seq:
404
+ * add_namespace_definition(prefix, href) → Nokogiri::XML::Namespace
405
+ * add_namespace(prefix, href) → Nokogiri::XML::Namespace
406
+ *
407
+ * :category: Manipulating Document Structure
408
+ *
409
+ * Adds a namespace definition to this node with +prefix+ using +href+ value, as if this node had
410
+ * included an attribute "xmlns:prefix=href".
411
+ *
412
+ * A default namespace definition for this node can be added by passing +nil+ for +prefix+.
413
+ *
414
+ * [Parameters]
415
+ * - +prefix+ (String, +nil+) An {XML Name}[https://www.w3.org/TR/xml-names/#ns-decl]
416
+ * - +href+ (String) The {URI reference}[https://www.w3.org/TR/xml-names/#sec-namespaces]
417
+ *
418
+ * [Returns] The new Nokogiri::XML::Namespace
419
+ *
420
+ * *Example:* adding a non-default namespace definition
421
+ *
422
+ * doc = Nokogiri::XML("<store><inventory></inventory></store>")
423
+ * inventory = doc.at_css("inventory")
424
+ * inventory.add_namespace_definition("automobile", "http://alices-autos.com/")
425
+ * inventory.add_namespace_definition("bicycle", "http://bobs-bikes.com/")
426
+ * inventory.add_child("<automobile:tire>Michelin model XGV, size 75R</automobile:tire>")
427
+ * doc.to_xml
428
+ * # => "<?xml version=\"1.0\"?>\n" +
429
+ * # "<store>\n" +
430
+ * # " <inventory xmlns:automobile=\"http://alices-autos.com/\" xmlns:bicycle=\"http://bobs-bikes.com/\">\n" +
431
+ * # " <automobile:tire>Michelin model XGV, size 75R</automobile:tire>\n" +
432
+ * # " </inventory>\n" +
433
+ * # "</store>\n"
434
+ *
435
+ * *Example:* adding a default namespace definition
436
+ *
437
+ * doc = Nokogiri::XML("<store><inventory><tire>Michelin model XGV, size 75R</tire></inventory></store>")
438
+ * doc.at_css("tire").add_namespace_definition(nil, "http://bobs-bikes.com/")
439
+ * doc.to_xml
440
+ * # => "<?xml version=\"1.0\"?>\n" +
441
+ * # "<store>\n" +
442
+ * # " <inventory>\n" +
443
+ * # " <tire xmlns=\"http://bobs-bikes.com/\">Michelin model XGV, size 75R</tire>\n" +
444
+ * # " </inventory>\n" +
445
+ * # "</store>\n"
446
+ *
447
+ */
448
+ static VALUE
449
+ rb_xml_node_add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_href)
450
+ {
451
+ xmlNodePtr c_node, element;
452
+ xmlNsPtr c_namespace;
453
+ const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix));
454
+
455
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
456
+ element = c_node ;
457
+
458
+ c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix);
459
+
460
+ if (!c_namespace) {
461
+ if (c_node->type != XML_ELEMENT_NODE) {
462
+ element = c_node->parent;
463
+ }
464
+ c_namespace = xmlNewNs(element, (const xmlChar *)StringValueCStr(rb_href), c_prefix);
465
+ }
466
+
467
+ if (!c_namespace) {
468
+ return Qnil ;
469
+ }
470
+
471
+ if (NIL_P(rb_prefix) || c_node != element) {
472
+ xmlSetNs(c_node, c_namespace);
473
+ }
474
+
475
+ return noko_xml_namespace_wrap(c_namespace, c_node->doc);
476
+ }
477
+
478
+
479
+ /*
480
+ * :call-seq: attribute(name) → Nokogiri::XML::Attr
481
+ *
482
+ * :category: Working With Node Attributes
483
+ *
484
+ * [Returns] Attribute (Nokogiri::XML::Attr) belonging to this node with name +name+.
485
+ *
486
+ * ⚠ Note that attribute namespaces are ignored and only the simple (non-namespace-prefixed) name is
487
+ * used to find a matching attribute. In case of a simple name collision, only one of the matching
488
+ * attributes will be returned. In this case, you will need to use #attribute_with_ns.
489
+ *
490
+ * *Example:*
491
+ *
492
+ * doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
493
+ * child = doc.at_css("child")
494
+ * child.attribute("size") # => #<Nokogiri::XML::Attr:0x550 name="size" value="large">
495
+ * child.attribute("class") # => #<Nokogiri::XML::Attr:0x564 name="class" value="big wide tall">
496
+ *
497
+ * *Example* showing that namespaced attributes will not be returned:
498
+ *
499
+ * ⚠ Note that only one of the two matching attributes is returned.
500
+ *
501
+ * doc = Nokogiri::XML(<<~EOF)
502
+ * <root xmlns:width='http://example.com/widths'
503
+ * xmlns:height='http://example.com/heights'>
504
+ * <child width:size='broad' height:size='tall'/>
505
+ * </root>
506
+ * EOF
507
+ * doc.at_css("child").attribute("size")
508
+ * # => #(Attr:0x550 {
509
+ * # name = "size",
510
+ * # namespace = #(Namespace:0x564 {
511
+ * # prefix = "width",
512
+ * # href = "http://example.com/widths"
513
+ * # }),
514
+ * # value = "broad"
515
+ * # })
516
+ */
517
+ static VALUE
518
+ rb_xml_node_attribute(VALUE self, VALUE name)
519
+ {
520
+ xmlNodePtr node;
521
+ xmlAttrPtr prop;
522
+ Noko_Node_Get_Struct(self, xmlNode, node);
523
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name));
524
+
525
+ if (! prop) { return Qnil; }
526
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
527
+ }
528
+
529
+
530
+ /*
531
+ * :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
532
+ *
533
+ * :category: Working With Node Attributes
534
+ *
535
+ * [Returns] Attributes (an Array of Nokogiri::XML::Attr) belonging to this node.
536
+ *
537
+ * Note that this is the preferred alternative to #attributes when the simple
538
+ * (non-namespace-prefixed) attribute names may collide.
539
+ *
540
+ * *Example:*
541
+ *
542
+ * Contrast this with the colliding-name example from #attributes.
543
+ *
544
+ * doc = Nokogiri::XML(<<~EOF)
545
+ * <root xmlns:width='http://example.com/widths'
546
+ * xmlns:height='http://example.com/heights'>
547
+ * <child width:size='broad' height:size='tall'/>
548
+ * </root>
549
+ * EOF
550
+ * doc.at_css("child").attribute_nodes
551
+ * # => [#(Attr:0x550 {
552
+ * # name = "size",
553
+ * # namespace = #(Namespace:0x564 {
554
+ * # prefix = "width",
555
+ * # href = "http://example.com/widths"
556
+ * # }),
557
+ * # value = "broad"
558
+ * # }),
559
+ * # #(Attr:0x578 {
560
+ * # name = "size",
561
+ * # namespace = #(Namespace:0x58c {
562
+ * # prefix = "height",
563
+ * # href = "http://example.com/heights"
564
+ * # }),
565
+ * # value = "tall"
566
+ * # })]
567
+ */
568
+ static VALUE
569
+ rb_xml_node_attribute_nodes(VALUE rb_node)
570
+ {
571
+ xmlNodePtr c_node;
572
+
573
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
574
+
575
+ return noko_xml_node_attrs(c_node);
576
+ }
577
+
578
+
579
+ /*
580
+ * :call-seq: attribute_with_ns(name, namespace) → Nokogiri::XML::Attr
581
+ *
582
+ * :category: Working With Node Attributes
583
+ *
584
+ * [Returns]
585
+ * Attribute (Nokogiri::XML::Attr) belonging to this node with matching +name+ and +namespace+.
586
+ *
587
+ * [Parameters]
588
+ * - +name+ (String): the simple (non-namespace-prefixed) name of the attribute
589
+ * - +namespace+ (String): the URI of the attribute's namespace
590
+ *
591
+ * See related: #attribute
592
+ *
593
+ * *Example:*
594
+ *
595
+ * doc = Nokogiri::XML(<<~EOF)
596
+ * <root xmlns:width='http://example.com/widths'
597
+ * xmlns:height='http://example.com/heights'>
598
+ * <child width:size='broad' height:size='tall'/>
599
+ * </root>
600
+ * EOF
601
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/widths")
602
+ * # => #(Attr:0x550 {
603
+ * # name = "size",
604
+ * # namespace = #(Namespace:0x564 {
605
+ * # prefix = "width",
606
+ * # href = "http://example.com/widths"
607
+ * # }),
608
+ * # value = "broad"
609
+ * # })
610
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/heights")
611
+ * # => #(Attr:0x578 {
612
+ * # name = "size",
613
+ * # namespace = #(Namespace:0x58c {
614
+ * # prefix = "height",
615
+ * # href = "http://example.com/heights"
616
+ * # }),
617
+ * # value = "tall"
618
+ * # })
619
+ */
620
+ static VALUE
621
+ rb_xml_node_attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
622
+ {
623
+ xmlNodePtr node;
624
+ xmlAttrPtr prop;
625
+ Noko_Node_Get_Struct(self, xmlNode, node);
626
+ prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name),
627
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace));
628
+
629
+ if (! prop) { return Qnil; }
630
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
631
+ }
632
+
633
+
634
+
635
+ /*
636
+ * call-seq: blank? → Boolean
637
+ *
638
+ * [Returns] +true+ if the node is an empty or whitespace-only text or cdata node, else +false+.
639
+ *
640
+ * *Example:*
641
+ *
642
+ * Nokogiri("<root><child/></root>").root.child.blank? # => false
643
+ * Nokogiri("<root>\t \n</root>").root.child.blank? # => true
644
+ * Nokogiri("<root><![CDATA[\t \n]]></root>").root.child.blank? # => true
645
+ * Nokogiri("<root>not-blank</root>").root.child
646
+ * .tap { |n| n.content = "" }.blank # => true
647
+ */
648
+ static VALUE
649
+ rb_xml_node_blank_eh(VALUE self)
650
+ {
651
+ xmlNodePtr node;
652
+ Noko_Node_Get_Struct(self, xmlNode, node);
653
+ return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
654
+ }
655
+
656
+
657
+ /*
658
+ * :call-seq: child() → Nokogiri::XML::Node
659
+ *
660
+ * :category: Traversing Document Structure
661
+ *
662
+ * [Returns] First of this node's children, or +nil+ if there are no children
663
+ *
664
+ * This is a convenience method and is equivalent to:
665
+ *
666
+ * node.children.first
667
+ *
668
+ * See related: #children
669
+ */
670
+ static VALUE
671
+ rb_xml_node_child(VALUE self)
672
+ {
673
+ xmlNodePtr node, child;
674
+ Noko_Node_Get_Struct(self, xmlNode, node);
675
+
676
+ child = node->children;
677
+ if (!child) { return Qnil; }
678
+
679
+ return noko_xml_node_wrap(Qnil, child);
680
+ }
681
+
682
+
683
+ /*
684
+ * :call-seq: children() → Nokogiri::XML::NodeSet
685
+ *
686
+ * :category: Traversing Document Structure
362
687
  *
363
- * Get the document for this Node
688
+ * [Returns] Nokogiri::XML::NodeSet containing this node's children.
364
689
  */
365
- static VALUE document(VALUE self)
690
+ static VALUE
691
+ rb_xml_node_children(VALUE self)
366
692
  {
367
693
  xmlNodePtr node;
368
- Data_Get_Struct(self, xmlNode, node);
694
+ xmlNodePtr child;
695
+ xmlNodeSetPtr set;
696
+ VALUE document;
697
+ VALUE node_set;
698
+
699
+ Noko_Node_Get_Struct(self, xmlNode, node);
700
+
701
+ child = node->children;
702
+ set = xmlXPathNodeSetCreate(child);
703
+
704
+ document = DOC_RUBY_OBJECT(node->doc);
705
+
706
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
707
+
708
+ child = child->next;
709
+ while (NULL != child) {
710
+ xmlXPathNodeSetAddUnique(set, child);
711
+ child = child->next;
712
+ }
713
+
714
+ node_set = noko_xml_node_set_wrap(set, document);
715
+
716
+ return node_set;
717
+ }
718
+
719
+
720
+ /*
721
+ * :call-seq:
722
+ * content() → String
723
+ * inner_text() → String
724
+ * text() → String
725
+ * to_str() → String
726
+ *
727
+ * [Returns]
728
+ * Contents of all the text nodes in this node's subtree, concatenated together into a single
729
+ * String.
730
+ *
731
+ * ⚠ Note that entities will _always_ be expanded in the returned String.
732
+ *
733
+ * See related: #inner_html
734
+ *
735
+ * *Example* of how entities are handled:
736
+ *
737
+ * Note that <tt>&lt;</tt> becomes <tt><</tt> in the returned String.
738
+ *
739
+ * doc = Nokogiri::XML.fragment("<child>a &lt; b</child>")
740
+ * doc.at_css("child").content
741
+ * # => "a < b"
742
+ *
743
+ * *Example* of how a subtree is handled:
744
+ *
745
+ * Note that the <tt><span></tt> tags are omitted and only the text node contents are returned,
746
+ * concatenated into a single string.
747
+ *
748
+ * doc = Nokogiri::XML.fragment("<child><span>first</span> <span>second</span></child>")
749
+ * doc.at_css("child").content
750
+ * # => "first second"
751
+ */
752
+ static VALUE
753
+ rb_xml_node_content(VALUE self)
754
+ {
755
+ xmlNodePtr node;
756
+ xmlChar *content;
757
+
758
+ Noko_Node_Get_Struct(self, xmlNode, node);
759
+
760
+ content = xmlNodeGetContent(node);
761
+ if (content) {
762
+ VALUE rval = NOKOGIRI_STR_NEW2(content);
763
+ xmlFree(content);
764
+ return rval;
765
+ }
766
+ return Qnil;
767
+ }
768
+
769
+
770
+ /*
771
+ * :call-seq: document() → Nokogiri::XML::Document
772
+ *
773
+ * :category: Traversing Document Structure
774
+ *
775
+ * [Returns] Parent Nokogiri::XML::Document for this node
776
+ */
777
+ static VALUE
778
+ rb_xml_node_document(VALUE self)
779
+ {
780
+ xmlNodePtr node;
781
+ Noko_Node_Get_Struct(self, xmlNode, node);
369
782
  return DOC_RUBY_OBJECT(node->doc);
370
783
  }
371
784
 
372
785
  /*
373
- * call-seq:
374
- * pointer_id
786
+ * :call-seq: pointer_id() → Integer
375
787
  *
376
- * Get the internal pointer number
788
+ * [Returns]
789
+ * A unique id for this node based on the internal memory structures. This method is used by #==
790
+ * to determine node identity.
377
791
  */
378
- static VALUE pointer_id(VALUE self)
792
+ static VALUE
793
+ rb_xml_node_pointer_id(VALUE self)
379
794
  {
380
795
  xmlNodePtr node;
381
- Data_Get_Struct(self, xmlNode, node);
796
+ Noko_Node_Get_Struct(self, xmlNode, node);
382
797
 
383
- return INT2NUM((long)(node));
798
+ return rb_uint2inum((uintptr_t)(node));
384
799
  }
385
800
 
386
801
  /*
387
- * call-seq:
388
- * encode_special_chars(string)
802
+ * :call-seq: encode_special_chars(string) → String
389
803
  *
390
804
  * Encode any special characters in +string+
391
805
  */
392
- static VALUE encode_special_chars(VALUE self, VALUE string)
806
+ static VALUE
807
+ encode_special_chars(VALUE self, VALUE string)
393
808
  {
394
809
  xmlNodePtr node;
395
810
  xmlChar *encoded;
396
811
  VALUE encoded_str;
397
812
 
398
- Data_Get_Struct(self, xmlNode, node);
813
+ Noko_Node_Get_Struct(self, xmlNode, node);
399
814
  encoded = xmlEncodeSpecialChars(
400
815
  node->doc,
401
816
  (const xmlChar *)StringValueCStr(string)
@@ -408,8 +823,8 @@ static VALUE encode_special_chars(VALUE self, VALUE string)
408
823
  }
409
824
 
410
825
  /*
411
- * call-seq:
412
- * create_internal_subset(name, external_id, system_id)
826
+ * :call-seq:
827
+ * create_internal_subset(name, external_id, system_id)
413
828
  *
414
829
  * Create the internal subset of a document.
415
830
  *
@@ -419,17 +834,18 @@ static VALUE encode_special_chars(VALUE self, VALUE string)
419
834
  * doc.create_internal_subset("chapter", nil, "chapter.dtd")
420
835
  * # => <!DOCTYPE chapter SYSTEM "chapter.dtd">
421
836
  */
422
- static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
837
+ static VALUE
838
+ create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
423
839
  {
424
840
  xmlNodePtr node;
425
841
  xmlDocPtr doc;
426
842
  xmlDtdPtr dtd;
427
843
 
428
- Data_Get_Struct(self, xmlNode, node);
844
+ Noko_Node_Get_Struct(self, xmlNode, node);
429
845
 
430
846
  doc = node->doc;
431
847
 
432
- if(xmlGetIntSubset(doc)) {
848
+ if (xmlGetIntSubset(doc)) {
433
849
  rb_raise(rb_eRuntimeError, "Document already has an internal subset");
434
850
  }
435
851
 
@@ -440,28 +856,29 @@ static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, V
440
856
  NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
441
857
  );
442
858
 
443
- if(!dtd) { return Qnil; }
859
+ if (!dtd) { return Qnil; }
444
860
 
445
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
861
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
446
862
  }
447
863
 
448
864
  /*
449
- * call-seq:
450
- * create_external_subset(name, external_id, system_id)
865
+ * :call-seq:
866
+ * create_external_subset(name, external_id, system_id)
451
867
  *
452
868
  * Create an external subset
453
869
  */
454
- static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
870
+ static VALUE
871
+ create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
455
872
  {
456
873
  xmlNodePtr node;
457
874
  xmlDocPtr doc;
458
875
  xmlDtdPtr dtd;
459
876
 
460
- Data_Get_Struct(self, xmlNode, node);
877
+ Noko_Node_Get_Struct(self, xmlNode, node);
461
878
 
462
879
  doc = node->doc;
463
880
 
464
- if(doc->extSubset) {
881
+ if (doc->extSubset) {
465
882
  rb_raise(rb_eRuntimeError, "Document already has an external subset");
466
883
  }
467
884
 
@@ -472,72 +889,78 @@ static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, V
472
889
  NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
473
890
  );
474
891
 
475
- if(!dtd) { return Qnil; }
892
+ if (!dtd) { return Qnil; }
476
893
 
477
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
894
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
478
895
  }
479
896
 
480
897
  /*
481
- * call-seq:
482
- * external_subset
898
+ * :call-seq:
899
+ * external_subset()
483
900
  *
484
901
  * Get the external subset
485
902
  */
486
- static VALUE external_subset(VALUE self)
903
+ static VALUE
904
+ external_subset(VALUE self)
487
905
  {
488
906
  xmlNodePtr node;
489
907
  xmlDocPtr doc;
490
908
  xmlDtdPtr dtd;
491
909
 
492
- Data_Get_Struct(self, xmlNode, node);
910
+ Noko_Node_Get_Struct(self, xmlNode, node);
493
911
 
494
- if(!node->doc) { return Qnil; }
912
+ if (!node->doc) { return Qnil; }
495
913
 
496
914
  doc = node->doc;
497
915
  dtd = doc->extSubset;
498
916
 
499
- if(!dtd) { return Qnil; }
917
+ if (!dtd) { return Qnil; }
500
918
 
501
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
919
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
502
920
  }
503
921
 
504
922
  /*
505
- * call-seq:
506
- * internal_subset
923
+ * :call-seq:
924
+ * internal_subset()
507
925
  *
508
926
  * Get the internal subset
509
927
  */
510
- static VALUE internal_subset(VALUE self)
928
+ static VALUE
929
+ internal_subset(VALUE self)
511
930
  {
512
931
  xmlNodePtr node;
513
932
  xmlDocPtr doc;
514
933
  xmlDtdPtr dtd;
515
934
 
516
- Data_Get_Struct(self, xmlNode, node);
935
+ Noko_Node_Get_Struct(self, xmlNode, node);
517
936
 
518
- if(!node->doc) { return Qnil; }
937
+ if (!node->doc) { return Qnil; }
519
938
 
520
939
  doc = node->doc;
521
940
  dtd = xmlGetIntSubset(doc);
522
941
 
523
- if(!dtd) { return Qnil; }
942
+ if (!dtd) { return Qnil; }
524
943
 
525
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
944
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
526
945
  }
527
946
 
528
947
  /*
529
- * call-seq:
530
- * dup
531
- * dup(depth)
532
- * dup(depth, new_parent_doc)
948
+ * :call-seq:
949
+ * dup → Nokogiri::XML::Node
950
+ * dup(depth) → Nokogiri::XML::Node
951
+ * dup(depth, new_parent_doc) → Nokogiri::XML::Node
533
952
  *
534
953
  * Copy this node.
535
- * An optional depth may be passed in. 0 is a shallow copy, 1 (the default) is a deep copy.
536
- * An optional new_parent_doc may also be passed in, which will be the new
537
- * node's parent document. Defaults to the current node's document.
538
- * current document.
954
+ *
955
+ * [Parameters]
956
+ * - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy.
957
+ * - +new_parent_doc+
958
+ * The new node's parent Document. Defaults to the this node's document.
959
+ *
960
+ * [Returns] The new Nokogiri::XML::Node
539
961
  */
540
- static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
962
+ static VALUE
963
+ duplicate_node(int argc, VALUE *argv, VALUE self)
541
964
  {
542
965
  VALUE r_level, r_new_parent_doc;
543
966
  int level;
@@ -545,7 +968,7 @@ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
545
968
  xmlDocPtr new_parent_doc;
546
969
  xmlNodePtr node, dup;
547
970
 
548
- Data_Get_Struct(self, xmlNode, node);
971
+ Noko_Node_Get_Struct(self, xmlNode, node);
549
972
 
550
973
  n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);
551
974
 
@@ -557,44 +980,33 @@ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
557
980
  if (n_args < 2) {
558
981
  new_parent_doc = node->doc;
559
982
  } else {
560
- Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc);
983
+ new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
561
984
  }
562
985
 
563
986
  dup = xmlDocCopyNode(node, new_parent_doc, level);
564
- if(dup == NULL) { return Qnil; }
987
+ if (dup == NULL) { return Qnil; }
565
988
 
566
- nokogiri_root_node(dup);
989
+ noko_xml_document_pin_node(dup);
567
990
 
568
- return Nokogiri_wrap_xml_node(rb_obj_class(self), dup);
991
+ return noko_xml_node_wrap(rb_obj_class(self), dup);
569
992
  }
570
993
 
571
994
  /*
572
- * call-seq:
573
- * unlink
995
+ * :call-seq:
996
+ * unlink() → self
574
997
  *
575
998
  * Unlink this node from its current context.
576
999
  */
577
- static VALUE unlink_node(VALUE self)
1000
+ static VALUE
1001
+ unlink_node(VALUE self)
578
1002
  {
579
1003
  xmlNodePtr node;
580
- Data_Get_Struct(self, xmlNode, node);
1004
+ Noko_Node_Get_Struct(self, xmlNode, node);
581
1005
  xmlUnlinkNode(node);
582
- nokogiri_root_node(node);
1006
+ noko_xml_document_pin_node(node);
583
1007
  return self;
584
1008
  }
585
1009
 
586
- /*
587
- * call-seq:
588
- * blank?
589
- *
590
- * Is this node blank?
591
- */
592
- static VALUE blank_eh(VALUE self)
593
- {
594
- xmlNodePtr node;
595
- Data_Get_Struct(self, xmlNode, node);
596
- return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
597
- }
598
1010
 
599
1011
  /*
600
1012
  * call-seq:
@@ -602,15 +1014,16 @@ static VALUE blank_eh(VALUE self)
602
1014
  *
603
1015
  * Returns the next sibling node
604
1016
  */
605
- static VALUE next_sibling(VALUE self)
1017
+ static VALUE
1018
+ next_sibling(VALUE self)
606
1019
  {
607
1020
  xmlNodePtr node, sibling;
608
- Data_Get_Struct(self, xmlNode, node);
1021
+ Noko_Node_Get_Struct(self, xmlNode, node);
609
1022
 
610
1023
  sibling = node->next;
611
- if(!sibling) { return Qnil; }
1024
+ if (!sibling) { return Qnil; }
612
1025
 
613
- return Nokogiri_wrap_xml_node(Qnil, sibling) ;
1026
+ return noko_xml_node_wrap(Qnil, sibling) ;
614
1027
  }
615
1028
 
616
1029
  /*
@@ -619,15 +1032,16 @@ static VALUE next_sibling(VALUE self)
619
1032
  *
620
1033
  * Returns the previous sibling node
621
1034
  */
622
- static VALUE previous_sibling(VALUE self)
1035
+ static VALUE
1036
+ previous_sibling(VALUE self)
623
1037
  {
624
1038
  xmlNodePtr node, sibling;
625
- Data_Get_Struct(self, xmlNode, node);
1039
+ Noko_Node_Get_Struct(self, xmlNode, node);
626
1040
 
627
1041
  sibling = node->prev;
628
- if(!sibling) { return Qnil; }
1042
+ if (!sibling) { return Qnil; }
629
1043
 
630
- return Nokogiri_wrap_xml_node(Qnil, sibling);
1044
+ return noko_xml_node_wrap(Qnil, sibling);
631
1045
  }
632
1046
 
633
1047
  /*
@@ -636,15 +1050,16 @@ static VALUE previous_sibling(VALUE self)
636
1050
  *
637
1051
  * Returns the next Nokogiri::XML::Element type sibling node.
638
1052
  */
639
- static VALUE next_element(VALUE self)
1053
+ static VALUE
1054
+ next_element(VALUE self)
640
1055
  {
641
1056
  xmlNodePtr node, sibling;
642
- Data_Get_Struct(self, xmlNode, node);
1057
+ Noko_Node_Get_Struct(self, xmlNode, node);
643
1058
 
644
1059
  sibling = xmlNextElementSibling(node);
645
- if(!sibling) { return Qnil; }
1060
+ if (!sibling) { return Qnil; }
646
1061
 
647
- return Nokogiri_wrap_xml_node(Qnil, sibling);
1062
+ return noko_xml_node_wrap(Qnil, sibling);
648
1063
  }
649
1064
 
650
1065
  /*
@@ -653,82 +1068,60 @@ static VALUE next_element(VALUE self)
653
1068
  *
654
1069
  * Returns the previous Nokogiri::XML::Element type sibling node.
655
1070
  */
656
- static VALUE previous_element(VALUE self)
1071
+ static VALUE
1072
+ previous_element(VALUE self)
657
1073
  {
658
1074
  xmlNodePtr node, sibling;
659
- Data_Get_Struct(self, xmlNode, node);
1075
+ Noko_Node_Get_Struct(self, xmlNode, node);
660
1076
 
661
1077
  /*
662
1078
  * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
663
1079
  */
664
1080
  sibling = node->prev;
665
- if(!sibling) { return Qnil; }
1081
+ if (!sibling) { return Qnil; }
666
1082
 
667
- while(sibling && sibling->type != XML_ELEMENT_NODE) {
1083
+ while (sibling && sibling->type != XML_ELEMENT_NODE) {
668
1084
  sibling = sibling->prev;
669
1085
  }
670
1086
 
671
- return sibling ? Nokogiri_wrap_xml_node(Qnil, sibling) : Qnil ;
1087
+ return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ;
672
1088
  }
673
1089
 
674
1090
  /* :nodoc: */
675
- static VALUE replace(VALUE self, VALUE new_node)
1091
+ static VALUE
1092
+ replace(VALUE self, VALUE new_node)
676
1093
  {
677
1094
  VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
678
1095
 
679
1096
  xmlNodePtr pivot;
680
- Data_Get_Struct(self, xmlNode, pivot);
681
- nokogiri_root_node(pivot);
1097
+ Noko_Node_Get_Struct(self, xmlNode, pivot);
1098
+ noko_xml_document_pin_node(pivot);
682
1099
 
683
1100
  return reparent;
684
1101
  }
685
1102
 
686
1103
  /*
687
- * call-seq:
688
- * children
1104
+ * :call-seq:
1105
+ * element_children() → NodeSet
1106
+ * elements() → NodeSet
689
1107
  *
690
- * Get the list of children for this node as a NodeSet
691
- */
692
- static VALUE children(VALUE self)
693
- {
694
- xmlNodePtr node;
695
- xmlNodePtr child;
696
- xmlNodeSetPtr set;
697
- VALUE document;
698
- VALUE node_set;
699
-
700
- Data_Get_Struct(self, xmlNode, node);
701
-
702
- child = node->children;
703
- set = xmlXPathNodeSetCreate(child);
704
-
705
- document = DOC_RUBY_OBJECT(node->doc);
706
-
707
- if(!child) { return Nokogiri_wrap_xml_node_set(set, document); }
708
-
709
- child = child->next;
710
- while(NULL != child) {
711
- xmlXPathNodeSetAddUnique(set, child);
712
- child = child->next;
713
- }
714
-
715
- node_set = Nokogiri_wrap_xml_node_set(set, document);
716
-
717
- return node_set;
718
- }
719
-
720
- /*
721
- * call-seq:
722
- * element_children
1108
+ * [Returns]
1109
+ * The node's child elements as a NodeSet. Only children that are elements will be returned, which
1110
+ * notably excludes Text nodes.
723
1111
  *
724
- * Get the list of children for this node as a NodeSet. All nodes will be
725
- * element nodes.
1112
+ * *Example:*
726
1113
  *
727
- * Example:
1114
+ * Note that #children returns the Text node "hello" while #element_children does not.
728
1115
  *
729
- * @doc.root.element_children.all? { |x| x.element? } # => true
1116
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1117
+ * div.element_children
1118
+ * # => [#<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
1119
+ * div.children
1120
+ * # => [#<Nokogiri::XML::Text:0x64 "hello">,
1121
+ * # #<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
730
1122
  */
731
- static VALUE element_children(VALUE self)
1123
+ static VALUE
1124
+ rb_xml_node_element_children(VALUE self)
732
1125
  {
733
1126
  xmlNodePtr node;
734
1127
  xmlNodePtr child;
@@ -736,83 +1129,78 @@ static VALUE element_children(VALUE self)
736
1129
  VALUE document;
737
1130
  VALUE node_set;
738
1131
 
739
- Data_Get_Struct(self, xmlNode, node);
1132
+ Noko_Node_Get_Struct(self, xmlNode, node);
740
1133
 
741
1134
  child = xmlFirstElementChild(node);
742
1135
  set = xmlXPathNodeSetCreate(child);
743
1136
 
744
1137
  document = DOC_RUBY_OBJECT(node->doc);
745
1138
 
746
- if(!child) { return Nokogiri_wrap_xml_node_set(set, document); }
1139
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
747
1140
 
748
1141
  child = xmlNextElementSibling(child);
749
- while(NULL != child) {
750
- xmlXPathNodeSetAddUnique(set, child);
751
- child = xmlNextElementSibling(child);
752
- }
753
-
754
- node_set = Nokogiri_wrap_xml_node_set(set, document);
755
-
756
- return node_set;
757
- }
758
-
759
- /*
760
- * call-seq:
761
- * child
762
- *
763
- * Returns the child node
764
- */
765
- static VALUE child(VALUE self)
766
- {
767
- xmlNodePtr node, child;
768
- Data_Get_Struct(self, xmlNode, node);
1142
+ while (NULL != child) {
1143
+ xmlXPathNodeSetAddUnique(set, child);
1144
+ child = xmlNextElementSibling(child);
1145
+ }
769
1146
 
770
- child = node->children;
771
- if(!child) { return Qnil; }
1147
+ node_set = noko_xml_node_set_wrap(set, document);
772
1148
 
773
- return Nokogiri_wrap_xml_node(Qnil, child);
1149
+ return node_set;
774
1150
  }
775
1151
 
776
1152
  /*
777
- * call-seq:
778
- * first_element_child
1153
+ * :call-seq:
1154
+ * first_element_child() → Node
1155
+ *
1156
+ * [Returns] The first child Node that is an element.
779
1157
  *
780
- * Returns the first child node of this node that is an element.
1158
+ * *Example:*
781
1159
  *
782
- * Example:
1160
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span></tt> element is
1161
+ * returned.
783
1162
  *
784
- * @doc.root.first_element_child.element? # => true
1163
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1164
+ * div.first_element_child
1165
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "world")] })
785
1166
  */
786
- static VALUE first_element_child(VALUE self)
1167
+ static VALUE
1168
+ rb_xml_node_first_element_child(VALUE self)
787
1169
  {
788
1170
  xmlNodePtr node, child;
789
- Data_Get_Struct(self, xmlNode, node);
1171
+ Noko_Node_Get_Struct(self, xmlNode, node);
790
1172
 
791
1173
  child = xmlFirstElementChild(node);
792
- if(!child) { return Qnil; }
1174
+ if (!child) { return Qnil; }
793
1175
 
794
- return Nokogiri_wrap_xml_node(Qnil, child);
1176
+ return noko_xml_node_wrap(Qnil, child);
795
1177
  }
796
1178
 
797
1179
  /*
798
- * call-seq:
799
- * last_element_child
1180
+ * :call-seq:
1181
+ * last_element_child() → Node
1182
+ *
1183
+ * [Returns] The last child Node that is an element.
800
1184
  *
801
- * Returns the last child node of this node that is an element.
1185
+ * *Example:*
802
1186
  *
803
- * Example:
1187
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span>yes</span></tt>
1188
+ * element is returned.
804
1189
  *
805
- * @doc.root.last_element_child.element? # => true
1190
+ * div = Nokogiri::HTML5("<div><span>no</span><span>yes</span>skip</div>").at_css("div")
1191
+ * div.last_element_child
1192
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "yes")] })
806
1193
  */
807
- static VALUE last_element_child(VALUE self)
1194
+ static VALUE
1195
+ rb_xml_node_last_element_child(VALUE self)
808
1196
  {
809
1197
  xmlNodePtr node, child;
810
- Data_Get_Struct(self, xmlNode, node);
1198
+ Noko_Node_Get_Struct(self, xmlNode, node);
811
1199
 
812
1200
  child = xmlLastElementChild(node);
813
- if(!child) { return Qnil; }
1201
+ if (!child) { return Qnil; }
814
1202
 
815
- return Nokogiri_wrap_xml_node(Qnil, child);
1203
+ return noko_xml_node_wrap(Qnil, child);
816
1204
  }
817
1205
 
818
1206
  /*
@@ -821,11 +1209,12 @@ static VALUE last_element_child(VALUE self)
821
1209
  *
822
1210
  * Returns true if +attribute+ is set
823
1211
  */
824
- static VALUE key_eh(VALUE self, VALUE attribute)
1212
+ static VALUE
1213
+ key_eh(VALUE self, VALUE attribute)
825
1214
  {
826
1215
  xmlNodePtr node;
827
- Data_Get_Struct(self, xmlNode, node);
828
- if(xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) {
1216
+ Noko_Node_Get_Struct(self, xmlNode, node);
1217
+ if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) {
829
1218
  return Qtrue;
830
1219
  }
831
1220
  return Qfalse;
@@ -837,12 +1226,13 @@ static VALUE key_eh(VALUE self, VALUE attribute)
837
1226
  *
838
1227
  * Returns true if +attribute+ is set with +namespace+
839
1228
  */
840
- static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
1229
+ static VALUE
1230
+ namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
841
1231
  {
842
1232
  xmlNodePtr node;
843
- Data_Get_Struct(self, xmlNode, node);
844
- if(xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
845
- NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) {
1233
+ Noko_Node_Get_Struct(self, xmlNode, node);
1234
+ if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
1235
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) {
846
1236
  return Qtrue;
847
1237
  }
848
1238
  return Qfalse;
@@ -854,11 +1244,12 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
854
1244
  *
855
1245
  * Set the +property+ to +value+
856
1246
  */
857
- static VALUE set(VALUE self, VALUE property, VALUE value)
1247
+ static VALUE
1248
+ set(VALUE self, VALUE property, VALUE value)
858
1249
  {
859
1250
  xmlNodePtr node, cur;
860
1251
  xmlAttrPtr prop;
861
- Data_Get_Struct(self, xmlNode, node);
1252
+ Noko_Node_Get_Struct(self, xmlNode, node);
862
1253
 
863
1254
  /* If a matching attribute node already exists, then xmlSetProp will destroy
864
1255
  * the existing node's children. However, if Nokogiri has a node object
@@ -867,13 +1258,13 @@ static VALUE set(VALUE self, VALUE property, VALUE value)
867
1258
  * We can avoid this by unlinking these nodes first.
868
1259
  */
869
1260
  if (node->type != XML_ELEMENT_NODE) {
870
- return(Qnil);
1261
+ return (Qnil);
871
1262
  }
872
1263
  prop = xmlHasProp(node, (xmlChar *)StringValueCStr(property));
873
1264
  if (prop && prop->children) {
874
1265
  for (cur = prop->children; cur; cur = cur->next) {
875
1266
  if (cur->_private) {
876
- nokogiri_root_node(cur);
1267
+ noko_xml_document_pin_node(cur);
877
1268
  xmlUnlinkNode(cur);
878
1269
  }
879
1270
  }
@@ -891,7 +1282,8 @@ static VALUE set(VALUE self, VALUE property, VALUE value)
891
1282
  *
892
1283
  * Get the value for +attribute+
893
1284
  */
894
- static VALUE get(VALUE self, VALUE rattribute)
1285
+ static VALUE
1286
+ get(VALUE self, VALUE rattribute)
895
1287
  {
896
1288
  xmlNodePtr node;
897
1289
  xmlChar *value = 0;
@@ -902,10 +1294,10 @@ static VALUE get(VALUE self, VALUE rattribute)
902
1294
 
903
1295
  if (NIL_P(rattribute)) { return Qnil; }
904
1296
 
905
- Data_Get_Struct(self, xmlNode, node);
1297
+ Noko_Node_Get_Struct(self, xmlNode, node);
906
1298
  attribute = xmlCharStrdup(StringValueCStr(rattribute));
907
1299
 
908
- colon = (xmlChar *)(uintptr_t)xmlStrchr(attribute, (const xmlChar)':');
1300
+ colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':'));
909
1301
  if (colon) {
910
1302
  /* split the attribute string into separate prefix and name by
911
1303
  * null-terminating the prefix at the colon */
@@ -917,7 +1309,7 @@ static VALUE get(VALUE self, VALUE rattribute)
917
1309
  if (ns) {
918
1310
  value = xmlGetNsProp(node, attr_name, ns->href);
919
1311
  } else {
920
- value = xmlGetProp(node, (xmlChar*)StringValueCStr(rattribute));
1312
+ value = xmlGetProp(node, (xmlChar *)StringValueCStr(rattribute));
921
1313
  }
922
1314
  } else {
923
1315
  value = xmlGetNoNsProp(node, attribute);
@@ -938,15 +1330,16 @@ static VALUE get(VALUE self, VALUE rattribute)
938
1330
  *
939
1331
  * Set the namespace to +namespace+
940
1332
  */
941
- static VALUE set_namespace(VALUE self, VALUE namespace)
1333
+ static VALUE
1334
+ set_namespace(VALUE self, VALUE namespace)
942
1335
  {
943
1336
  xmlNodePtr node;
944
1337
  xmlNsPtr ns = NULL;
945
1338
 
946
- Data_Get_Struct(self, xmlNode, node);
1339
+ Noko_Node_Get_Struct(self, xmlNode, node);
947
1340
 
948
- if(!NIL_P(namespace)) {
949
- Data_Get_Struct(namespace, xmlNs, ns);
1341
+ if (!NIL_P(namespace)) {
1342
+ Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
950
1343
  }
951
1344
 
952
1345
  xmlSetNs(node, ns);
@@ -955,138 +1348,140 @@ static VALUE set_namespace(VALUE self, VALUE namespace)
955
1348
  }
956
1349
 
957
1350
  /*
958
- * call-seq:
959
- * attribute(name)
1351
+ * :call-seq:
1352
+ * namespace() → Namespace
960
1353
  *
961
- * Get the attribute node with +name+
962
- */
963
- static VALUE attr(VALUE self, VALUE name)
964
- {
965
- xmlNodePtr node;
966
- xmlAttrPtr prop;
967
- Data_Get_Struct(self, xmlNode, node);
968
- prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name));
969
-
970
- if(! prop) { return Qnil; }
971
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop);
972
- }
973
-
974
- /*
975
- * call-seq:
976
- * attribute_with_ns(name, namespace)
1354
+ * [Returns] The Namespace of the element or attribute node, or +nil+ if there is no namespace.
977
1355
  *
978
- * Get the attribute node with +name+ and +namespace+
979
- */
980
- static VALUE attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
981
- {
982
- xmlNodePtr node;
983
- xmlAttrPtr prop;
984
- Data_Get_Struct(self, xmlNode, node);
985
- prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name),
986
- NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace));
987
-
988
- if(! prop) { return Qnil; }
989
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop);
990
- }
991
-
992
- /*
993
- * call-seq:
994
- * attribute_nodes()
1356
+ * *Example:*
995
1357
  *
996
- * returns a list containing the Node attributes.
1358
+ * doc = Nokogiri::XML(<<~EOF)
1359
+ * <root>
1360
+ * <first/>
1361
+ * <second xmlns="http://example.com/child"/>
1362
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1363
+ * </root>
1364
+ * EOF
1365
+ * doc.at_xpath("//first").namespace
1366
+ * # => nil
1367
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace
1368
+ * # => #(Namespace:0x3c { href = "http://example.com/child" })
1369
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace
1370
+ * # => #(Namespace:0x50 { prefix = "foo", href = "http://example.com/foo" })
997
1371
  */
998
- static VALUE attribute_nodes(VALUE self)
1372
+ static VALUE
1373
+ rb_xml_node_namespace(VALUE rb_node)
999
1374
  {
1000
- /* this code in the mode of xmlHasProp() */
1001
- xmlNodePtr node;
1002
- VALUE attr;
1003
-
1004
- Data_Get_Struct(self, xmlNode, node);
1375
+ xmlNodePtr c_node ;
1376
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1005
1377
 
1006
- attr = rb_ary_new();
1007
- Nokogiri_xml_node_properties(node, attr);
1378
+ if (c_node->ns) {
1379
+ return noko_xml_namespace_wrap(c_node->ns, c_node->doc);
1380
+ }
1008
1381
 
1009
- return attr ;
1382
+ return Qnil ;
1010
1383
  }
1011
1384
 
1012
-
1013
1385
  /*
1014
- * call-seq:
1015
- * namespace()
1386
+ * :call-seq:
1387
+ * namespace_definitions() → Array<Nokogiri::XML::Namespace>
1016
1388
  *
1017
- * returns the namespace of the element or attribute node as a Namespace
1018
- * object, or nil if there is no namespace for the element or attribute.
1019
- */
1020
- static VALUE namespace(VALUE self)
1021
- {
1022
- xmlNodePtr node ;
1023
- Data_Get_Struct(self, xmlNode, node);
1024
-
1025
- if (node->ns) {
1026
- return Nokogiri_wrap_xml_namespace(node->doc, node->ns);
1027
- }
1028
-
1029
- return Qnil ;
1030
- }
1031
-
1032
- /*
1033
- * call-seq:
1034
- * namespace_definitions()
1389
+ * [Returns]
1390
+ * Namespaces that are defined directly on this node, as an Array of Namespace objects. The array
1391
+ * will be empty if no namespaces are defined on this node.
1392
+ *
1393
+ * *Example:*
1035
1394
  *
1036
- * returns namespaces defined on self element directly, as an array of Namespace objects. Includes both a default namespace (as in"xmlns="), and prefixed namespaces (as in "xmlns:prefix=").
1395
+ * doc = Nokogiri::XML(<<~EOF)
1396
+ * <root xmlns="http://example.com/root">
1397
+ * <first/>
1398
+ * <second xmlns="http://example.com/child" xmlns:unused="http://example.com/unused"/>
1399
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1400
+ * </root>
1401
+ * EOF
1402
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_definitions
1403
+ * # => []
1404
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace_definitions
1405
+ * # => [#(Namespace:0x3c { href = "http://example.com/child" }),
1406
+ * # #(Namespace:0x50 {
1407
+ * # prefix = "unused",
1408
+ * # href = "http://example.com/unused"
1409
+ * # })]
1410
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace_definitions
1411
+ * # => [#(Namespace:0x64 { prefix = "foo", href = "http://example.com/foo" })]
1037
1412
  */
1038
- static VALUE namespace_definitions(VALUE self)
1413
+ static VALUE
1414
+ namespace_definitions(VALUE rb_node)
1039
1415
  {
1040
1416
  /* this code in the mode of xmlHasProp() */
1041
- xmlNodePtr node ;
1042
- VALUE list;
1043
- xmlNsPtr ns;
1417
+ xmlNodePtr c_node ;
1418
+ xmlNsPtr c_namespace;
1419
+ VALUE definitions = rb_ary_new();
1044
1420
 
1045
- Data_Get_Struct(self, xmlNode, node);
1421
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1046
1422
 
1047
- list = rb_ary_new();
1048
-
1049
- ns = node->nsDef;
1050
-
1051
- if(!ns) { return list; }
1423
+ c_namespace = c_node->nsDef;
1424
+ if (!c_namespace) {
1425
+ return definitions;
1426
+ }
1052
1427
 
1053
- while(NULL != ns) {
1054
- rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns));
1055
- ns = ns->next;
1428
+ while (c_namespace != NULL) {
1429
+ rb_ary_push(definitions, noko_xml_namespace_wrap(c_namespace, c_node->doc));
1430
+ c_namespace = c_namespace->next;
1056
1431
  }
1057
1432
 
1058
- return list;
1433
+ return definitions;
1059
1434
  }
1060
1435
 
1061
1436
  /*
1062
- * call-seq:
1063
- * namespace_scopes()
1437
+ * :call-seq:
1438
+ * namespace_scopes() → Array<Nokogiri::XML::Namespace>
1439
+ *
1440
+ * [Returns] Array of all the Namespaces on this node and its ancestors.
1064
1441
  *
1065
- * returns namespaces in scope for self -- those defined on self element
1066
- * directly or any ancestor node -- as an array of Namespace objects. Default
1067
- * namespaces ("xmlns=" style) for self are included in this array; Default
1068
- * namespaces for ancestors, however, are not. See also #namespaces
1442
+ * See also #namespaces
1443
+ *
1444
+ * *Example:*
1445
+ *
1446
+ * doc = Nokogiri::XML(<<~EOF)
1447
+ * <root xmlns="http://example.com/root" xmlns:bar="http://example.com/bar">
1448
+ * <first/>
1449
+ * <second xmlns="http://example.com/child"/>
1450
+ * <third xmlns:foo="http://example.com/foo"/>
1451
+ * </root>
1452
+ * EOF
1453
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes
1454
+ * # => [#(Namespace:0x3c { href = "http://example.com/root" }),
1455
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1456
+ * doc.at_xpath("//child:second", "child" => "http://example.com/child").namespace_scopes
1457
+ * # => [#(Namespace:0x64 { href = "http://example.com/child" }),
1458
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1459
+ * doc.at_xpath("//root:third", "root" => "http://example.com/root").namespace_scopes
1460
+ * # => [#(Namespace:0x78 { prefix = "foo", href = "http://example.com/foo" }),
1461
+ * # #(Namespace:0x3c { href = "http://example.com/root" }),
1462
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1069
1463
  */
1070
- static VALUE namespace_scopes(VALUE self)
1464
+ static VALUE
1465
+ rb_xml_node_namespace_scopes(VALUE rb_node)
1071
1466
  {
1072
- xmlNodePtr node ;
1073
- VALUE list;
1074
- xmlNsPtr *ns_list;
1467
+ xmlNodePtr c_node ;
1468
+ xmlNsPtr *namespaces;
1469
+ VALUE scopes = rb_ary_new();
1075
1470
  int j;
1076
1471
 
1077
- Data_Get_Struct(self, xmlNode, node);
1078
-
1079
- list = rb_ary_new();
1080
- ns_list = xmlGetNsList(node->doc, node);
1472
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1081
1473
 
1082
- if(!ns_list) { return list; }
1474
+ namespaces = xmlGetNsList(c_node->doc, c_node);
1475
+ if (!namespaces) {
1476
+ return scopes;
1477
+ }
1083
1478
 
1084
- for (j = 0 ; ns_list[j] != NULL ; ++j) {
1085
- rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns_list[j]));
1479
+ for (j = 0 ; namespaces[j] != NULL ; ++j) {
1480
+ rb_ary_push(scopes, noko_xml_namespace_wrap(namespaces[j], c_node->doc));
1086
1481
  }
1087
1482
 
1088
- xmlFree(ns_list);
1089
- return list;
1483
+ xmlFree(namespaces);
1484
+ return scopes;
1090
1485
  }
1091
1486
 
1092
1487
  /*
@@ -1095,11 +1490,12 @@ static VALUE namespace_scopes(VALUE self)
1095
1490
  *
1096
1491
  * Get the type for this Node
1097
1492
  */
1098
- static VALUE node_type(VALUE self)
1493
+ static VALUE
1494
+ node_type(VALUE self)
1099
1495
  {
1100
1496
  xmlNodePtr node;
1101
- Data_Get_Struct(self, xmlNode, node);
1102
- return INT2NUM((long)node->type);
1497
+ Noko_Node_Get_Struct(self, xmlNode, node);
1498
+ return INT2NUM(node->type);
1103
1499
  }
1104
1500
 
1105
1501
  /*
@@ -1108,16 +1504,17 @@ static VALUE node_type(VALUE self)
1108
1504
  *
1109
1505
  * Set the content for this Node
1110
1506
  */
1111
- static VALUE set_native_content(VALUE self, VALUE content)
1507
+ static VALUE
1508
+ set_native_content(VALUE self, VALUE content)
1112
1509
  {
1113
1510
  xmlNodePtr node, child, next ;
1114
- Data_Get_Struct(self, xmlNode, node);
1511
+ Noko_Node_Get_Struct(self, xmlNode, node);
1115
1512
 
1116
1513
  child = node->children;
1117
1514
  while (NULL != child) {
1118
1515
  next = child->next ;
1119
1516
  xmlUnlinkNode(child) ;
1120
- nokogiri_root_node(child);
1517
+ noko_xml_document_pin_node(child);
1121
1518
  child = next ;
1122
1519
  }
1123
1520
 
@@ -1125,42 +1522,20 @@ static VALUE set_native_content(VALUE self, VALUE content)
1125
1522
  return content;
1126
1523
  }
1127
1524
 
1128
- /*
1129
- * call-seq:
1130
- * content
1131
- *
1132
- * Returns the plaintext content for this Node. Note that entities will always
1133
- * be expanded in the returned string.
1134
- */
1135
- static VALUE get_native_content(VALUE self)
1136
- {
1137
- xmlNodePtr node;
1138
- xmlChar * content;
1139
-
1140
- Data_Get_Struct(self, xmlNode, node);
1141
-
1142
- content = xmlNodeGetContent(node);
1143
- if(content) {
1144
- VALUE rval = NOKOGIRI_STR_NEW2(content);
1145
- xmlFree(content);
1146
- return rval;
1147
- }
1148
- return Qnil;
1149
- }
1150
-
1151
1525
  /*
1152
1526
  * call-seq:
1153
1527
  * lang=
1154
1528
  *
1155
1529
  * Set the language of a node, i.e. the values of the xml:lang attribute.
1156
1530
  */
1157
- static VALUE set_lang(VALUE self_rb, VALUE lang_rb)
1531
+ static VALUE
1532
+ set_lang(VALUE self_rb, VALUE lang_rb)
1158
1533
  {
1159
1534
  xmlNodePtr self ;
1160
- xmlChar* lang ;
1535
+ xmlChar *lang ;
1161
1536
 
1162
- Data_Get_Struct(self_rb, xmlNode, self);
1163
- lang = (xmlChar*)StringValueCStr(lang_rb);
1537
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1538
+ lang = (xmlChar *)StringValueCStr(lang_rb);
1164
1539
 
1165
1540
  xmlNodeSetLang(self, lang);
1166
1541
 
@@ -1174,13 +1549,14 @@ static VALUE set_lang(VALUE self_rb, VALUE lang_rb)
1174
1549
  * Searches the language of a node, i.e. the values of the xml:lang attribute or
1175
1550
  * the one carried by the nearest ancestor.
1176
1551
  */
1177
- static VALUE get_lang(VALUE self_rb)
1552
+ static VALUE
1553
+ get_lang(VALUE self_rb)
1178
1554
  {
1179
1555
  xmlNodePtr self ;
1180
- xmlChar* lang ;
1556
+ xmlChar *lang ;
1181
1557
  VALUE lang_rb ;
1182
1558
 
1183
- Data_Get_Struct(self_rb, xmlNode, self);
1559
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1184
1560
 
1185
1561
  lang = xmlNodeGetLang(self);
1186
1562
  if (lang) {
@@ -1193,7 +1569,8 @@ static VALUE get_lang(VALUE self_rb)
1193
1569
  }
1194
1570
 
1195
1571
  /* :nodoc: */
1196
- static VALUE add_child(VALUE self, VALUE new_child)
1572
+ static VALUE
1573
+ add_child(VALUE self, VALUE new_child)
1197
1574
  {
1198
1575
  return reparent_node_with(self, new_child, xmlAddChild);
1199
1576
  }
@@ -1204,15 +1581,16 @@ static VALUE add_child(VALUE self, VALUE new_child)
1204
1581
  *
1205
1582
  * Get the parent Node for this Node
1206
1583
  */
1207
- static VALUE get_parent(VALUE self)
1584
+ static VALUE
1585
+ get_parent(VALUE self)
1208
1586
  {
1209
1587
  xmlNodePtr node, parent;
1210
- Data_Get_Struct(self, xmlNode, node);
1588
+ Noko_Node_Get_Struct(self, xmlNode, node);
1211
1589
 
1212
1590
  parent = node->parent;
1213
- if(!parent) { return Qnil; }
1591
+ if (!parent) { return Qnil; }
1214
1592
 
1215
- return Nokogiri_wrap_xml_node(Qnil, parent) ;
1593
+ return noko_xml_node_wrap(Qnil, parent) ;
1216
1594
  }
1217
1595
 
1218
1596
  /*
@@ -1221,11 +1599,12 @@ static VALUE get_parent(VALUE self)
1221
1599
  *
1222
1600
  * Set the name for this Node
1223
1601
  */
1224
- static VALUE set_name(VALUE self, VALUE new_name)
1602
+ static VALUE
1603
+ set_name(VALUE self, VALUE new_name)
1225
1604
  {
1226
1605
  xmlNodePtr node;
1227
- Data_Get_Struct(self, xmlNode, node);
1228
- xmlNodeSetName(node, (xmlChar*)StringValueCStr(new_name));
1606
+ Noko_Node_Get_Struct(self, xmlNode, node);
1607
+ xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name));
1229
1608
  return new_name;
1230
1609
  }
1231
1610
 
@@ -1235,11 +1614,12 @@ static VALUE set_name(VALUE self, VALUE new_name)
1235
1614
  *
1236
1615
  * Returns the name for this Node
1237
1616
  */
1238
- static VALUE get_name(VALUE self)
1617
+ static VALUE
1618
+ get_name(VALUE self)
1239
1619
  {
1240
1620
  xmlNodePtr node;
1241
- Data_Get_Struct(self, xmlNode, node);
1242
- if(node->name) {
1621
+ Noko_Node_Get_Struct(self, xmlNode, node);
1622
+ if (node->name) {
1243
1623
  return NOKOGIRI_STR_NEW2(node->name);
1244
1624
  }
1245
1625
  return Qnil;
@@ -1251,28 +1631,39 @@ static VALUE get_name(VALUE self)
1251
1631
  *
1252
1632
  * Returns the path associated with this Node
1253
1633
  */
1254
- static VALUE path(VALUE self)
1634
+ static VALUE
1635
+ rb_xml_node_path(VALUE rb_node)
1255
1636
  {
1256
- xmlNodePtr node;
1257
- xmlChar *path ;
1637
+ xmlNodePtr c_node;
1638
+ xmlChar *c_path ;
1258
1639
  VALUE rval;
1259
1640
 
1260
- Data_Get_Struct(self, xmlNode, node);
1641
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1642
+
1643
+ c_path = xmlGetNodePath(c_node);
1644
+ if (c_path == NULL) {
1645
+ // see https://github.com/sparklemotion/nokogiri/issues/2250
1646
+ // this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we
1647
+ // do this for now to preserve the behavior across libxml2 versions.
1648
+ rval = NOKOGIRI_STR_NEW2("?");
1649
+ } else {
1650
+ rval = NOKOGIRI_STR_NEW2(c_path);
1651
+ xmlFree(c_path);
1652
+ }
1261
1653
 
1262
- path = xmlGetNodePath(node);
1263
- rval = NOKOGIRI_STR_NEW2(path);
1264
- xmlFree(path);
1265
1654
  return rval ;
1266
1655
  }
1267
1656
 
1268
1657
  /* :nodoc: */
1269
- static VALUE add_next_sibling(VALUE self, VALUE new_sibling)
1658
+ static VALUE
1659
+ add_next_sibling(VALUE self, VALUE new_sibling)
1270
1660
  {
1271
1661
  return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
1272
1662
  }
1273
1663
 
1274
1664
  /* :nodoc: */
1275
- static VALUE add_previous_sibling(VALUE self, VALUE new_sibling)
1665
+ static VALUE
1666
+ add_previous_sibling(VALUE self, VALUE new_sibling)
1276
1667
  {
1277
1668
  return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
1278
1669
  }
@@ -1283,7 +1674,8 @@ static VALUE add_previous_sibling(VALUE self, VALUE new_sibling)
1283
1674
  *
1284
1675
  * Write this Node to +io+ with +encoding+ and +options+
1285
1676
  */
1286
- static VALUE native_write_to(
1677
+ static VALUE
1678
+ native_write_to(
1287
1679
  VALUE self,
1288
1680
  VALUE io,
1289
1681
  VALUE encoding,
@@ -1292,10 +1684,10 @@ static VALUE native_write_to(
1292
1684
  )
1293
1685
  {
1294
1686
  xmlNodePtr node;
1295
- const char * before_indent;
1687
+ const char *before_indent;
1296
1688
  xmlSaveCtxtPtr savectx;
1297
1689
 
1298
- Data_Get_Struct(self, xmlNode, node);
1690
+ Noko_Node_Get_Struct(self, xmlNode, node);
1299
1691
 
1300
1692
  xmlIndentTreeOutput = 1;
1301
1693
 
@@ -1304,8 +1696,8 @@ static VALUE native_write_to(
1304
1696
  xmlTreeIndentString = StringValueCStr(indent_string);
1305
1697
 
1306
1698
  savectx = xmlSaveToIO(
1307
- (xmlOutputWriteCallback)io_write_callback,
1308
- (xmlOutputCloseCallback)io_close_callback,
1699
+ (xmlOutputWriteCallback)noko_io_write,
1700
+ (xmlOutputCloseCallback)noko_io_close,
1309
1701
  (void *)io,
1310
1702
  RTEST(encoding) ? StringValueCStr(encoding) : NULL,
1311
1703
  (int)NUM2INT(options)
@@ -1318,93 +1710,365 @@ static VALUE native_write_to(
1318
1710
  return io;
1319
1711
  }
1320
1712
 
1321
- /*
1322
- * call-seq:
1323
- * line
1324
- *
1325
- * Returns the line for this Node
1326
- */
1327
- static VALUE line(VALUE self)
1713
+
1714
+ static inline void
1715
+ output_partial_string(VALUE out, char const *str, size_t length)
1328
1716
  {
1329
- xmlNodePtr node;
1330
- Data_Get_Struct(self, xmlNode, node);
1717
+ if (length) {
1718
+ rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
1719
+ }
1720
+ }
1331
1721
 
1332
- return INT2NUM(xmlGetLineNo(node));
1722
+ static inline void
1723
+ output_char(VALUE out, char ch)
1724
+ {
1725
+ output_partial_string(out, &ch, 1);
1333
1726
  }
1334
1727
 
1335
- /*
1336
- * call-seq:
1337
- * add_namespace_definition(prefix, href)
1338
- *
1339
- * Adds a namespace definition with +prefix+ using +href+ value. The result is
1340
- * as if parsed XML for this node had included an attribute
1341
- * 'xmlns:prefix=value'. A default namespace for this node ("xmlns=") can be
1342
- * added by passing 'nil' for prefix. Namespaces added this way will not
1343
- * show up in #attributes, but they will be included as an xmlns attribute
1344
- * when the node is serialized to XML.
1345
- */
1346
- static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
1728
+ static inline void
1729
+ output_string(VALUE out, char const *str)
1347
1730
  {
1348
- xmlNodePtr node, namespace;
1349
- xmlNsPtr ns;
1731
+ output_partial_string(out, str, strlen(str));
1732
+ }
1733
+
1734
+ static inline void
1735
+ output_tagname(VALUE out, xmlNodePtr elem)
1736
+ {
1737
+ // Elements in the HTML, MathML, and SVG namespaces do not use a namespace
1738
+ // prefix in the HTML syntax.
1739
+ char const *name = (char const *)elem->name;
1740
+ xmlNsPtr ns = elem->ns;
1741
+ if (ns && ns->href && ns->prefix
1742
+ && strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
1743
+ && strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
1744
+ && strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
1745
+ output_string(out, (char const *)elem->ns->prefix);
1746
+ output_char(out, ':');
1747
+ char const *colon = strchr(name, ':');
1748
+ if (colon) {
1749
+ name = colon + 1;
1750
+ }
1751
+ }
1752
+ output_string(out, name);
1753
+ }
1350
1754
 
1351
- Data_Get_Struct(self, xmlNode, node);
1352
- namespace = node ;
1755
+ static inline void
1756
+ output_attr_name(VALUE out, xmlAttrPtr attr)
1757
+ {
1758
+ xmlNsPtr ns = attr->ns;
1759
+ char const *name = (char const *)attr->name;
1760
+ if (ns && ns->href) {
1761
+ char const *uri = (char const *)ns->href;
1762
+ char const *localname = strchr(name, ':');
1763
+ if (localname) {
1764
+ ++localname;
1765
+ } else {
1766
+ localname = name;
1767
+ }
1768
+
1769
+ if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
1770
+ output_string(out, "xml:");
1771
+ name = localname;
1772
+ } else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
1773
+ // xmlns:xmlns -> xmlns
1774
+ // xmlns:foo -> xmlns:foo
1775
+ if (strcmp(localname, "xmlns")) {
1776
+ output_string(out, "xmlns:");
1777
+ }
1778
+ name = localname;
1779
+ } else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
1780
+ output_string(out, "xlink:");
1781
+ name = localname;
1782
+ } else if (ns->prefix) {
1783
+ output_string(out, (char const *)ns->prefix);
1784
+ output_char(out, ':');
1785
+ name = localname;
1786
+ }
1787
+ }
1788
+ output_string(out, name);
1789
+ }
1790
+
1791
+ static void
1792
+ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1793
+ {
1794
+ xmlChar const *next = start;
1795
+ int ch;
1796
+
1797
+ while ((ch = *next) != 0) {
1798
+ char const *replacement = NULL;
1799
+ size_t replaced_bytes = 1;
1800
+ if (ch == '&') {
1801
+ replacement = "&amp;";
1802
+ } else if (ch == 0xC2 && next[1] == 0xA0) {
1803
+ // U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
1804
+ replacement = "&nbsp;";
1805
+ replaced_bytes = 2;
1806
+ } else if (attr && ch == '"') {
1807
+ replacement = "&quot;";
1808
+ } else if (!attr && ch == '<') {
1809
+ replacement = "&lt;";
1810
+ } else if (!attr && ch == '>') {
1811
+ replacement = "&gt;";
1812
+ } else {
1813
+ ++next;
1814
+ continue;
1815
+ }
1816
+ output_partial_string(out, (char const *)start, next - start);
1817
+ output_string(out, replacement);
1818
+ next += replaced_bytes;
1819
+ start = next;
1820
+ }
1821
+ output_partial_string(out, (char const *)start, next - start);
1822
+ }
1823
+
1824
+ static bool
1825
+ should_prepend_newline(xmlNodePtr node)
1826
+ {
1827
+ char const *name = (char const *)node->name;
1828
+ xmlNodePtr child = node->children;
1353
1829
 
1354
- ns = xmlSearchNs(
1355
- node->doc,
1356
- node,
1357
- (const xmlChar *)(NIL_P(prefix) ? NULL : StringValueCStr(prefix))
1358
- );
1830
+ if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
1831
+ return false;
1832
+ }
1833
+
1834
+ return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
1835
+ }
1836
+
1837
+ static VALUE
1838
+ rb_prepend_newline(VALUE self)
1839
+ {
1840
+ xmlNodePtr node;
1841
+ Noko_Node_Get_Struct(self, xmlNode, node);
1842
+ return should_prepend_newline(node) ? Qtrue : Qfalse;
1843
+ }
1359
1844
 
1360
- if(!ns) {
1361
- if (node->type != XML_ELEMENT_NODE) {
1362
- namespace = node->parent;
1845
+ static bool
1846
+ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1847
+ {
1848
+ char const *name = (char const *)node->name;
1849
+ if (name == NULL) { // fragments don't have a name
1850
+ return false;
1851
+ }
1852
+ for (size_t idx = 0; idx < num_tagnames; ++idx) {
1853
+ if (!strcmp(name, tagnames[idx])) {
1854
+ return true;
1363
1855
  }
1364
- ns = xmlNewNs(
1365
- namespace,
1366
- (const xmlChar *)StringValueCStr(href),
1367
- (const xmlChar *)(NIL_P(prefix) ? NULL : StringValueCStr(prefix))
1368
- );
1369
1856
  }
1857
+ return false;
1858
+
1859
+ }
1860
+
1861
+ static void
1862
+ output_node(
1863
+ VALUE out,
1864
+ xmlNodePtr node,
1865
+ bool preserve_newline
1866
+ )
1867
+ {
1868
+ static char const *const VOID_ELEMENTS[] = {
1869
+ "area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
1870
+ "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
1871
+ };
1872
+
1873
+ static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
1874
+ "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
1875
+ };
1876
+
1877
+ switch (node->type) {
1878
+ case XML_ELEMENT_NODE:
1879
+ // Serialize the start tag.
1880
+ output_char(out, '<');
1881
+ output_tagname(out, node);
1882
+
1883
+ // Add attributes.
1884
+ for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1885
+ output_char(out, ' ');
1886
+ output_attr_name(out, attr);
1887
+ if (attr->children) {
1888
+ output_string(out, "=\"");
1889
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1890
+ output_escaped_string(out, value, true);
1891
+ xmlFree(value);
1892
+ output_char(out, '"');
1893
+ } else {
1894
+ // Output name=""
1895
+ output_string(out, "=\"\"");
1896
+ }
1897
+ }
1898
+ output_char(out, '>');
1899
+
1900
+ // Add children and end tag if element is not void.
1901
+ if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
1902
+ if (preserve_newline && should_prepend_newline(node)) {
1903
+ output_char(out, '\n');
1904
+ }
1905
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1906
+ output_node(out, child, preserve_newline);
1907
+ }
1908
+ output_string(out, "</");
1909
+ output_tagname(out, node);
1910
+ output_char(out, '>');
1911
+ }
1912
+ break;
1913
+
1914
+ case XML_TEXT_NODE:
1915
+ if (node->parent
1916
+ && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
1917
+ sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
1918
+ output_string(out, (char const *)node->content);
1919
+ } else {
1920
+ output_escaped_string(out, node->content, false);
1921
+ }
1922
+ break;
1923
+
1924
+ case XML_CDATA_SECTION_NODE:
1925
+ output_string(out, "<![CDATA[");
1926
+ output_string(out, (char const *)node->content);
1927
+ output_string(out, "]]>");
1928
+ break;
1929
+
1930
+ case XML_COMMENT_NODE:
1931
+ output_string(out, "<!--");
1932
+ output_string(out, (char const *)node->content);
1933
+ output_string(out, "-->");
1934
+ break;
1935
+
1936
+ case XML_PI_NODE:
1937
+ output_string(out, "<?");
1938
+ output_string(out, (char const *)node->content);
1939
+ output_char(out, '>');
1940
+ break;
1941
+
1942
+ case XML_DOCUMENT_TYPE_NODE:
1943
+ case XML_DTD_NODE:
1944
+ output_string(out, "<!DOCTYPE ");
1945
+ output_string(out, (char const *)node->name);
1946
+ output_string(out, ">");
1947
+ break;
1948
+
1949
+ case XML_DOCUMENT_NODE:
1950
+ case XML_DOCUMENT_FRAG_NODE:
1951
+ case XML_HTML_DOCUMENT_NODE:
1952
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1953
+ output_node(out, child, preserve_newline);
1954
+ }
1955
+ break;
1956
+
1957
+ default:
1958
+ rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
1959
+ break;
1960
+ }
1961
+ }
1370
1962
 
1371
- if (!ns) { return Qnil ; }
1963
+ static VALUE
1964
+ html_standard_serialize(
1965
+ VALUE self,
1966
+ VALUE preserve_newline
1967
+ )
1968
+ {
1969
+ xmlNodePtr node;
1970
+ Noko_Node_Get_Struct(self, xmlNode, node);
1971
+ VALUE output = rb_str_buf_new(4096);
1972
+ output_node(output, node, RTEST(preserve_newline));
1973
+ return output;
1974
+ }
1372
1975
 
1373
- if(NIL_P(prefix) || node != namespace) { xmlSetNs(node, ns); }
1976
+ /*
1977
+ * :call-seq:
1978
+ * line() → Integer
1979
+ *
1980
+ * [Returns] The line number of this Node.
1981
+ *
1982
+ * ---
1983
+ *
1984
+ * <b> ⚠ The CRuby and JRuby implementations differ in important ways! </b>
1985
+ *
1986
+ * Semantic differences:
1987
+ * - The CRuby method reflects the node's line number <i>in the parsed string</i>
1988
+ * - The JRuby method reflects the node's line number <i>in the final DOM structure</i> after
1989
+ * corrections have been applied
1990
+ *
1991
+ * Performance differences:
1992
+ * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time]
1993
+ * (constant time)
1994
+ * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear
1995
+ * time, where n is the number of nodes before/above the element in the DOM)
1996
+ *
1997
+ * If you'd like to help improve the JRuby implementation, please review these issues and reach out
1998
+ * to the maintainers:
1999
+ * - https://github.com/sparklemotion/nokogiri/issues/1223
2000
+ * - https://github.com/sparklemotion/nokogiri/pull/2177
2001
+ * - https://github.com/sparklemotion/nokogiri/issues/2380
2002
+ */
2003
+ static VALUE
2004
+ rb_xml_node_line(VALUE rb_node)
2005
+ {
2006
+ xmlNodePtr c_node;
2007
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1374
2008
 
1375
- return Nokogiri_wrap_xml_namespace(node->doc, ns);
2009
+ return LONG2NUM(xmlGetLineNo(c_node));
1376
2010
  }
1377
2011
 
1378
2012
  /*
1379
2013
  * call-seq:
1380
- * new(name, document)
2014
+ * line=(num)
1381
2015
  *
1382
- * Create a new node with +name+ sharing GC lifecycle with +document+
2016
+ * Sets the line for this Node. num must be less than 65535.
1383
2017
  */
1384
- static VALUE new(int argc, VALUE *argv, VALUE klass)
2018
+ static VALUE
2019
+ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
1385
2020
  {
1386
- xmlDocPtr doc;
1387
- xmlNodePtr node;
1388
- VALUE name;
1389
- VALUE document;
2021
+ xmlNodePtr c_node;
2022
+ int line_number = NUM2INT(rb_line_number);
2023
+
2024
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2025
+
2026
+ // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2027
+ // search for "psvi" in SAX2.c and tree.c to learn more.
2028
+ if (line_number < 65535) {
2029
+ c_node->line = (short) line_number;
2030
+ } else {
2031
+ c_node->line = 65535;
2032
+ if (c_node->type == XML_TEXT_NODE) {
2033
+ c_node->psvi = (void *)(ptrdiff_t) line_number;
2034
+ }
2035
+ }
2036
+
2037
+ return rb_line_number;
2038
+ }
2039
+
2040
+ /* :nodoc: documented in lib/nokogiri/xml/node.rb */
2041
+ static VALUE
2042
+ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
2043
+ {
2044
+ xmlNodePtr c_document_node;
2045
+ xmlNodePtr c_node;
2046
+ VALUE rb_name;
2047
+ VALUE rb_document_node;
1390
2048
  VALUE rest;
1391
2049
  VALUE rb_node;
1392
2050
 
1393
- rb_scan_args(argc, argv, "2*", &name, &document, &rest);
2051
+ rb_scan_args(argc, argv, "2*", &rb_name, &rb_document_node, &rest);
1394
2052
 
1395
- Data_Get_Struct(document, xmlDoc, doc);
2053
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlNode)) {
2054
+ rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
2055
+ }
2056
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
2057
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
2058
+ }
2059
+ Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
1396
2060
 
1397
- node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(name));
1398
- node->doc = doc->doc;
1399
- nokogiri_root_node(node);
2061
+ c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name));
2062
+ c_node->doc = c_document_node->doc;
2063
+ noko_xml_document_pin_node(c_node);
1400
2064
 
1401
- rb_node = Nokogiri_wrap_xml_node(
2065
+ rb_node = noko_xml_node_wrap(
1402
2066
  klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
1403
- node
2067
+ c_node
1404
2068
  );
1405
2069
  rb_obj_call_init(rb_node, argc, argv);
1406
2070
 
1407
- if(rb_block_given_p()) { rb_yield(rb_node); }
2071
+ if (rb_block_given_p()) { rb_yield(rb_node); }
1408
2072
 
1409
2073
  return rb_node;
1410
2074
  }
@@ -1415,13 +2079,14 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
1415
2079
  *
1416
2080
  * Returns the Node as html.
1417
2081
  */
1418
- static VALUE dump_html(VALUE self)
2082
+ static VALUE
2083
+ dump_html(VALUE self)
1419
2084
  {
1420
2085
  xmlBufferPtr buf ;
1421
2086
  xmlNodePtr node ;
1422
2087
  VALUE html;
1423
2088
 
1424
- Data_Get_Struct(self, xmlNode, node);
2089
+ Noko_Node_Get_Struct(self, xmlNode, node);
1425
2090
 
1426
2091
  buf = xmlBufferCreate() ;
1427
2092
  htmlNodeDump(buf, node->doc, node);
@@ -1436,13 +2101,14 @@ static VALUE dump_html(VALUE self)
1436
2101
  *
1437
2102
  * Compare this Node to +other+ with respect to their Document
1438
2103
  */
1439
- static VALUE compare(VALUE self, VALUE _other)
2104
+ static VALUE
2105
+ compare(VALUE self, VALUE _other)
1440
2106
  {
1441
2107
  xmlNodePtr node, other;
1442
- Data_Get_Struct(self, xmlNode, node);
1443
- Data_Get_Struct(_other, xmlNode, other);
2108
+ Noko_Node_Get_Struct(self, xmlNode, node);
2109
+ Noko_Node_Get_Struct(_other, xmlNode, other);
1444
2110
 
1445
- return INT2NUM((long)xmlXPathCmpNodes(other, node));
2111
+ return INT2NUM(xmlXPathCmpNodes(other, node));
1446
2112
  }
1447
2113
 
1448
2114
 
@@ -1453,23 +2119,24 @@ static VALUE compare(VALUE self, VALUE _other)
1453
2119
  * Loads and substitutes all xinclude elements below the node. The
1454
2120
  * parser context will be initialized with +options+.
1455
2121
  */
1456
- static VALUE process_xincludes(VALUE self, VALUE options)
2122
+ static VALUE
2123
+ process_xincludes(VALUE self, VALUE options)
1457
2124
  {
1458
2125
  int rcode ;
1459
2126
  xmlNodePtr node;
1460
2127
  VALUE error_list = rb_ary_new();
1461
2128
 
1462
- Data_Get_Struct(self, xmlNode, node);
2129
+ Noko_Node_Get_Struct(self, xmlNode, node);
1463
2130
 
1464
2131
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
1465
2132
  rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
1466
2133
  xmlSetStructuredErrorFunc(NULL, NULL);
1467
2134
 
1468
2135
  if (rcode < 0) {
1469
- xmlErrorPtr error;
2136
+ xmlErrorConstPtr error;
1470
2137
 
1471
2138
  error = xmlGetLastError();
1472
- if(error) {
2139
+ if (error) {
1473
2140
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
1474
2141
  } else {
1475
2142
  rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
@@ -1481,7 +2148,8 @@ static VALUE process_xincludes(VALUE self, VALUE options)
1481
2148
 
1482
2149
 
1483
2150
  /* TODO: DOCUMENT ME */
1484
- static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
2151
+ static VALUE
2152
+ in_context(VALUE self, VALUE _str, VALUE _options)
1485
2153
  {
1486
2154
  xmlNodePtr node, list = 0, tmp, child_iter, node_children, doc_children;
1487
2155
  xmlNodeSetPtr set;
@@ -1489,7 +2157,7 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1489
2157
  VALUE doc, err;
1490
2158
  int doc_is_empty;
1491
2159
 
1492
- Data_Get_Struct(self, xmlNode, node);
2160
+ Noko_Node_Get_Struct(self, xmlNode, node);
1493
2161
 
1494
2162
  doc = DOC_RUBY_OBJECT(node->doc);
1495
2163
  err = rb_iv_get(doc, "@errors");
@@ -1501,6 +2169,8 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1501
2169
 
1502
2170
  /* Twiddle global variable because of a bug in libxml2.
1503
2171
  * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2172
+ *
2173
+ * TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7
1504
2174
  */
1505
2175
  #ifndef HTML_PARSE_NOIMPLIED
1506
2176
  htmlHandleOmittedElem(0);
@@ -1530,9 +2200,7 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1530
2200
  */
1531
2201
  child_iter = node->doc->children ;
1532
2202
  while (child_iter) {
1533
- if (child_iter->parent != (xmlNodePtr)node->doc) {
1534
- child_iter->parent = (xmlNodePtr)node->doc;
1535
- }
2203
+ child_iter->parent = (xmlNodePtr)node->doc;
1536
2204
  child_iter = child_iter->next;
1537
2205
  }
1538
2206
 
@@ -1542,12 +2210,17 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1542
2210
 
1543
2211
  xmlSetStructuredErrorFunc(NULL, NULL);
1544
2212
 
1545
- /* Workaround for a libxml2 bug where a parsing error may leave a broken
2213
+ /*
2214
+ * Workaround for a libxml2 bug where a parsing error may leave a broken
1546
2215
  * node reference in node->doc->children.
2216
+ *
2217
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2218
+ *
1547
2219
  * This workaround is limited to when a parse error occurs, the document
1548
2220
  * went from having no children to having children, and the context node is
1549
2221
  * part of a document fragment.
1550
- * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2222
+ *
2223
+ * TODO: This was fixed in libxml 2.8.0 by 71a243d
1551
2224
  */
1552
2225
  if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
1553
2226
  child_iter = node;
@@ -1562,12 +2235,12 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1562
2235
 
1563
2236
  /* FIXME: This probably needs to handle more constants... */
1564
2237
  switch (error) {
1565
- case XML_ERR_INTERNAL_ERROR:
1566
- case XML_ERR_NO_MEMORY:
1567
- rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
1568
- break;
1569
- default:
1570
- break;
2238
+ case XML_ERR_INTERNAL_ERROR:
2239
+ case XML_ERR_NO_MEMORY:
2240
+ rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
2241
+ break;
2242
+ default:
2243
+ break;
1571
2244
  }
1572
2245
 
1573
2246
  set = xmlXPathNodeSetCreate(NULL);
@@ -1576,178 +2249,174 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1576
2249
  tmp = list->next;
1577
2250
  list->next = NULL;
1578
2251
  xmlXPathNodeSetAddUnique(set, list);
1579
- nokogiri_root_node(list);
2252
+ noko_xml_document_pin_node(list);
1580
2253
  list = tmp;
1581
2254
  }
1582
2255
 
1583
- return Nokogiri_wrap_xml_node_set(set, doc);
2256
+ return noko_xml_node_set_wrap(set, doc);
1584
2257
  }
1585
2258
 
1586
-
1587
- VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
2259
+ VALUE
2260
+ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
1588
2261
  {
1589
- VALUE document = Qnil ;
1590
- VALUE node_cache = Qnil ;
1591
- VALUE rb_node = Qnil ;
2262
+ VALUE rb_document, rb_node_cache, rb_node;
1592
2263
  nokogiriTuplePtr node_has_a_document;
1593
- xmlDocPtr doc;
1594
- void (*mark_method)(xmlNodePtr) = NULL ;
2264
+ xmlDocPtr c_doc;
1595
2265
 
1596
- assert(node);
2266
+ assert(c_node);
1597
2267
 
1598
- if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE) {
1599
- return DOC_RUBY_OBJECT(node->doc);
2268
+ if (c_node->type == XML_DOCUMENT_NODE || c_node->type == XML_HTML_DOCUMENT_NODE) {
2269
+ return DOC_RUBY_OBJECT(c_node->doc);
1600
2270
  }
1601
2271
 
1602
- /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
1603
- /* see https://github.com/sparklemotion/nokogiri/issues/95 */
1604
- /* and https://github.com/sparklemotion/nokogiri/issues/439 */
1605
- doc = node->doc;
1606
- if (doc->type == XML_DOCUMENT_FRAG_NODE) { doc = doc->doc; }
1607
- node_has_a_document = DOC_RUBY_OBJECT_TEST(doc);
2272
+ c_doc = c_node->doc;
2273
+
2274
+ // Nodes yielded from XML::Reader don't have a fully-realized Document
2275
+ node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc);
1608
2276
 
1609
- if(node->_private && node_has_a_document) {
1610
- return (VALUE)node->_private;
2277
+ if (c_node->_private && node_has_a_document) {
2278
+ return (VALUE)c_node->_private;
1611
2279
  }
1612
2280
 
1613
- if(!RTEST(klass)) {
1614
- switch(node->type) {
1615
- case XML_ELEMENT_NODE:
1616
- klass = cNokogiriXmlElement;
1617
- break;
1618
- case XML_TEXT_NODE:
1619
- klass = cNokogiriXmlText;
1620
- break;
1621
- case XML_ATTRIBUTE_NODE:
1622
- klass = cNokogiriXmlAttr;
1623
- break;
1624
- case XML_ENTITY_REF_NODE:
1625
- klass = cNokogiriXmlEntityReference;
1626
- break;
1627
- case XML_COMMENT_NODE:
1628
- klass = cNokogiriXmlComment;
1629
- break;
1630
- case XML_DOCUMENT_FRAG_NODE:
1631
- klass = cNokogiriXmlDocumentFragment;
1632
- break;
1633
- case XML_PI_NODE:
1634
- klass = cNokogiriXmlProcessingInstruction;
1635
- break;
1636
- case XML_ENTITY_DECL:
1637
- klass = cNokogiriXmlEntityDecl;
1638
- break;
1639
- case XML_CDATA_SECTION_NODE:
1640
- klass = cNokogiriXmlCData;
1641
- break;
1642
- case XML_DTD_NODE:
1643
- klass = cNokogiriXmlDtd;
1644
- break;
1645
- case XML_ATTRIBUTE_DECL:
1646
- klass = cNokogiriXmlAttributeDecl;
1647
- break;
1648
- case XML_ELEMENT_DECL:
1649
- klass = cNokogiriXmlElementDecl;
1650
- break;
1651
- default:
1652
- klass = cNokogiriXmlNode;
2281
+ if (!RTEST(rb_class)) {
2282
+ switch (c_node->type) {
2283
+ case XML_ELEMENT_NODE:
2284
+ rb_class = cNokogiriXmlElement;
2285
+ break;
2286
+ case XML_TEXT_NODE:
2287
+ rb_class = cNokogiriXmlText;
2288
+ break;
2289
+ case XML_ATTRIBUTE_NODE:
2290
+ rb_class = cNokogiriXmlAttr;
2291
+ break;
2292
+ case XML_ENTITY_REF_NODE:
2293
+ rb_class = cNokogiriXmlEntityReference;
2294
+ break;
2295
+ case XML_COMMENT_NODE:
2296
+ rb_class = cNokogiriXmlComment;
2297
+ break;
2298
+ case XML_DOCUMENT_FRAG_NODE:
2299
+ rb_class = cNokogiriXmlDocumentFragment;
2300
+ break;
2301
+ case XML_PI_NODE:
2302
+ rb_class = cNokogiriXmlProcessingInstruction;
2303
+ break;
2304
+ case XML_ENTITY_DECL:
2305
+ rb_class = cNokogiriXmlEntityDecl;
2306
+ break;
2307
+ case XML_CDATA_SECTION_NODE:
2308
+ rb_class = cNokogiriXmlCData;
2309
+ break;
2310
+ case XML_DTD_NODE:
2311
+ rb_class = cNokogiriXmlDtd;
2312
+ break;
2313
+ case XML_ATTRIBUTE_DECL:
2314
+ rb_class = cNokogiriXmlAttributeDecl;
2315
+ break;
2316
+ case XML_ELEMENT_DECL:
2317
+ rb_class = cNokogiriXmlElementDecl;
2318
+ break;
2319
+ default:
2320
+ rb_class = cNokogiriXmlNode;
1653
2321
  }
1654
2322
  }
1655
2323
 
1656
- mark_method = node_has_a_document ? mark : NULL ;
1657
-
1658
- rb_node = Data_Wrap_Struct(klass, mark_method, debug_node_dealloc, node) ;
1659
- node->_private = (void *)rb_node;
2324
+ rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ;
2325
+ c_node->_private = (void *)rb_node;
1660
2326
 
1661
2327
  if (node_has_a_document) {
1662
- document = DOC_RUBY_OBJECT(doc);
1663
- node_cache = DOC_NODE_CACHE(doc);
1664
- rb_ary_push(node_cache, rb_node);
1665
- rb_funcall(document, decorate, 1, rb_node);
2328
+ rb_document = DOC_RUBY_OBJECT(c_doc);
2329
+ rb_node_cache = DOC_NODE_CACHE(c_doc);
2330
+ rb_ary_push(rb_node_cache, rb_node);
2331
+ rb_funcall(rb_document, id_decorate, 1, rb_node);
1666
2332
  }
1667
2333
 
1668
2334
  return rb_node ;
1669
2335
  }
1670
2336
 
1671
2337
 
1672
- void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_list)
2338
+ /*
2339
+ * return Array<Nokogiri::XML::Attr> containing the node's attributes
2340
+ */
2341
+ VALUE
2342
+ noko_xml_node_attrs(xmlNodePtr c_node)
1673
2343
  {
1674
- xmlAttrPtr prop;
1675
- prop = node->properties ;
1676
- while (prop != NULL) {
1677
- rb_ary_push(attr_list, Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop));
1678
- prop = prop->next ;
2344
+ VALUE rb_properties = rb_ary_new();
2345
+ xmlAttrPtr c_property;
2346
+
2347
+ c_property = c_node->properties ;
2348
+ while (c_property != NULL) {
2349
+ rb_ary_push(rb_properties, noko_xml_node_wrap(Qnil, (xmlNodePtr)c_property));
2350
+ c_property = c_property->next ;
1679
2351
  }
2352
+
2353
+ return rb_properties;
1680
2354
  }
1681
2355
 
1682
- VALUE cNokogiriXmlNode ;
1683
- VALUE cNokogiriXmlElement ;
1684
-
1685
- void init_xml_node()
1686
- {
1687
- VALUE nokogiri = rb_define_module("Nokogiri");
1688
- VALUE xml = rb_define_module_under(nokogiri, "XML");
1689
- VALUE klass = rb_define_class_under(xml, "Node", rb_cObject);
1690
-
1691
- cNokogiriXmlNode = klass;
1692
-
1693
- cNokogiriXmlElement = rb_define_class_under(xml, "Element", klass);
1694
-
1695
- rb_define_singleton_method(klass, "new", new, -1);
1696
-
1697
- rb_define_method(klass, "add_namespace_definition", add_namespace_definition, 2);
1698
- rb_define_method(klass, "node_name", get_name, 0);
1699
- rb_define_method(klass, "document", document, 0);
1700
- rb_define_method(klass, "node_name=", set_name, 1);
1701
- rb_define_method(klass, "parent", get_parent, 0);
1702
- rb_define_method(klass, "child", child, 0);
1703
- rb_define_method(klass, "first_element_child", first_element_child, 0);
1704
- rb_define_method(klass, "last_element_child", last_element_child, 0);
1705
- rb_define_method(klass, "children", children, 0);
1706
- rb_define_method(klass, "element_children", element_children, 0);
1707
- rb_define_method(klass, "next_sibling", next_sibling, 0);
1708
- rb_define_method(klass, "previous_sibling", previous_sibling, 0);
1709
- rb_define_method(klass, "next_element", next_element, 0);
1710
- rb_define_method(klass, "previous_element", previous_element, 0);
1711
- rb_define_method(klass, "node_type", node_type, 0);
1712
- rb_define_method(klass, "path", path, 0);
1713
- rb_define_method(klass, "key?", key_eh, 1);
1714
- rb_define_method(klass, "namespaced_key?", namespaced_key_eh, 2);
1715
- rb_define_method(klass, "blank?", blank_eh, 0);
1716
- rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
1717
- rb_define_method(klass, "attribute", attr, 1);
1718
- rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
1719
- rb_define_method(klass, "namespace", namespace, 0);
1720
- rb_define_method(klass, "namespace_definitions", namespace_definitions, 0);
1721
- rb_define_method(klass, "namespace_scopes", namespace_scopes, 0);
1722
- rb_define_method(klass, "encode_special_chars", encode_special_chars, 1);
1723
- rb_define_method(klass, "dup", duplicate_node, -1);
1724
- rb_define_method(klass, "unlink", unlink_node, 0);
1725
- rb_define_method(klass, "internal_subset", internal_subset, 0);
1726
- rb_define_method(klass, "external_subset", external_subset, 0);
1727
- rb_define_method(klass, "create_internal_subset", create_internal_subset, 3);
1728
- rb_define_method(klass, "create_external_subset", create_external_subset, 3);
1729
- rb_define_method(klass, "pointer_id", pointer_id, 0);
1730
- rb_define_method(klass, "line", line, 0);
1731
- rb_define_method(klass, "content", get_native_content, 0);
1732
- rb_define_method(klass, "native_content=", set_native_content, 1);
1733
- rb_define_method(klass, "lang", get_lang, 0);
1734
- rb_define_method(klass, "lang=", set_lang, 1);
1735
-
1736
- rb_define_private_method(klass, "process_xincludes", process_xincludes, 1);
1737
- rb_define_private_method(klass, "in_context", in_context, 2);
1738
- rb_define_private_method(klass, "add_child_node", add_child, 1);
1739
- rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
1740
- rb_define_private_method(klass, "add_next_sibling_node", add_next_sibling, 1);
1741
- rb_define_private_method(klass, "replace_node", replace, 1);
1742
- rb_define_private_method(klass, "dump_html", dump_html, 0);
1743
- rb_define_private_method(klass, "native_write_to", native_write_to, 4);
1744
- rb_define_private_method(klass, "get", get, 1);
1745
- rb_define_private_method(klass, "set", set, 2);
1746
- rb_define_private_method(klass, "set_namespace", set_namespace, 1);
1747
- rb_define_private_method(klass, "compare", compare, 1);
1748
-
1749
- decorate = rb_intern("decorate");
1750
- decorate_bang = rb_intern("decorate!");
1751
- }
1752
-
1753
- /* vim: set noet sw=4 sws=4 */
2356
+ void
2357
+ noko_init_xml_node(void)
2358
+ {
2359
+ cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2360
+
2361
+ rb_undef_alloc_func(cNokogiriXmlNode);
2362
+
2363
+ rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2364
+
2365
+ rb_define_method(cNokogiriXmlNode, "add_namespace_definition", rb_xml_node_add_namespace_definition, 2);
2366
+ rb_define_method(cNokogiriXmlNode, "attribute", rb_xml_node_attribute, 1);
2367
+ rb_define_method(cNokogiriXmlNode, "attribute_nodes", rb_xml_node_attribute_nodes, 0);
2368
+ rb_define_method(cNokogiriXmlNode, "attribute_with_ns", rb_xml_node_attribute_with_ns, 2);
2369
+ rb_define_method(cNokogiriXmlNode, "blank?", rb_xml_node_blank_eh, 0);
2370
+ rb_define_method(cNokogiriXmlNode, "child", rb_xml_node_child, 0);
2371
+ rb_define_method(cNokogiriXmlNode, "children", rb_xml_node_children, 0);
2372
+ rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2373
+ rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2374
+ rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2375
+ rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2376
+ rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1);
2377
+ rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2378
+ rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2379
+ rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
2380
+ rb_define_method(cNokogiriXmlNode, "first_element_child", rb_xml_node_first_element_child, 0);
2381
+ rb_define_method(cNokogiriXmlNode, "internal_subset", internal_subset, 0);
2382
+ rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1);
2383
+ rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0);
2384
+ rb_define_method(cNokogiriXmlNode, "lang=", set_lang, 1);
2385
+ rb_define_method(cNokogiriXmlNode, "last_element_child", rb_xml_node_last_element_child, 0);
2386
+ rb_define_method(cNokogiriXmlNode, "line", rb_xml_node_line, 0);
2387
+ rb_define_method(cNokogiriXmlNode, "line=", rb_xml_node_line_set, 1);
2388
+ rb_define_method(cNokogiriXmlNode, "namespace", rb_xml_node_namespace, 0);
2389
+ rb_define_method(cNokogiriXmlNode, "namespace_definitions", namespace_definitions, 0);
2390
+ rb_define_method(cNokogiriXmlNode, "namespace_scopes", rb_xml_node_namespace_scopes, 0);
2391
+ rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2);
2392
+ rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1);
2393
+ rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0);
2394
+ rb_define_method(cNokogiriXmlNode, "next_sibling", next_sibling, 0);
2395
+ rb_define_method(cNokogiriXmlNode, "node_name", get_name, 0);
2396
+ rb_define_method(cNokogiriXmlNode, "node_name=", set_name, 1);
2397
+ rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0);
2398
+ rb_define_method(cNokogiriXmlNode, "parent", get_parent, 0);
2399
+ rb_define_method(cNokogiriXmlNode, "path", rb_xml_node_path, 0);
2400
+ rb_define_method(cNokogiriXmlNode, "pointer_id", rb_xml_node_pointer_id, 0);
2401
+ rb_define_method(cNokogiriXmlNode, "previous_element", previous_element, 0);
2402
+ rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2403
+ rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2404
+
2405
+ rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2406
+ rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2407
+ rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
2408
+ rb_define_private_method(cNokogiriXmlNode, "compare", compare, 1);
2409
+ rb_define_private_method(cNokogiriXmlNode, "dump_html", dump_html, 0);
2410
+ rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
2411
+ rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
2412
+ rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2413
+ rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2414
+ rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2415
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2416
+ rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2417
+ rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2418
+ rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
2419
+
2420
+ id_decorate = rb_intern("decorate");
2421
+ id_decorate_bang = rb_intern("decorate!");
2422
+ }