nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,36 +1,75 @@
1
- #include <xml_node.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static ID decorate, decorate_bang;
3
+ #include <stdbool.h>
4
4
 
5
- #ifdef DEBUG
6
- static void debug_node_dealloc(xmlNodePtr x)
7
- {
8
- NOKOGIRI_DEBUG_START(x)
9
- NOKOGIRI_DEBUG_END(x)
10
- }
11
- #else
12
- # define debug_node_dealloc 0
13
- #endif
5
+ // :stopdoc:
6
+
7
+ VALUE cNokogiriXmlNode ;
8
+ static ID id_decorate, id_decorate_bang;
14
9
 
15
- static void mark(xmlNodePtr node)
10
+ typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
11
+
12
+ static void
13
+ _xml_node_mark(void *ptr)
16
14
  {
15
+ xmlNodePtr node = ptr;
16
+
17
+ if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
18
+ return;
19
+ }
20
+
17
21
  xmlDocPtr doc = node->doc;
18
- if(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
19
- if(DOC_RUBY_OBJECT_TEST(doc)) {
22
+ if (doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
23
+ if (DOC_RUBY_OBJECT_TEST(doc)) {
20
24
  rb_gc_mark(DOC_RUBY_OBJECT(doc));
21
25
  }
22
- } else if(node->doc->_private) {
26
+ } else if (node->doc->_private) {
23
27
  rb_gc_mark((VALUE)doc->_private);
24
28
  }
25
29
  }
26
30
 
27
- /* :nodoc: */
28
- typedef xmlNodePtr (*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
31
+ static void
32
+ _xml_node_update_references(void *ptr)
33
+ {
34
+ xmlNodePtr node = ptr;
29
35
 
30
- /* :nodoc: */
31
- static void relink_namespace(xmlNodePtr reparented)
36
+ if (node->_private) {
37
+ node->_private = (void *)rb_gc_location((VALUE)node->_private);
38
+ }
39
+ }
40
+
41
+ static const rb_data_type_t xml_node_type = {
42
+ .wrap_struct_name = "xmlNode",
43
+ .function = {
44
+ .dmark = _xml_node_mark,
45
+ .dcompact = _xml_node_update_references,
46
+ },
47
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
48
+ };
49
+
50
+ static VALUE
51
+ _xml_node_alloc(VALUE klass)
52
+ {
53
+ return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
54
+ }
55
+
56
+ static void
57
+ _xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
58
+ {
59
+ assert(DATA_PTR(rb_node) == NULL);
60
+ assert(c_node->_private == NULL);
61
+
62
+ DATA_PTR(rb_node) = c_node;
63
+ c_node->_private = (void *)rb_node;
64
+
65
+ return;
66
+ }
67
+
68
+ static void
69
+ relink_namespace(xmlNodePtr reparented)
32
70
  {
33
71
  xmlNodePtr child;
72
+ xmlAttrPtr attr;
34
73
 
35
74
  if (reparented->type != XML_ATTRIBUTE_NODE &&
36
75
  reparented->type != XML_ELEMENT_NODE) { return; }
@@ -42,7 +81,7 @@ static void relink_namespace(xmlNodePtr reparented)
42
81
  name = xmlSplitQName2(reparented->name, &prefix);
43
82
 
44
83
  if (reparented->type == XML_ATTRIBUTE_NODE) {
45
- if (prefix == NULL || strcmp((char*)prefix, XMLNS_PREFIX) == 0) {
84
+ if (prefix == NULL || strcmp((char *)prefix, XMLNS_PREFIX) == 0) {
46
85
  xmlFree(name);
47
86
  xmlFree(prefix);
48
87
  return;
@@ -64,7 +103,9 @@ static void relink_namespace(xmlNodePtr reparented)
64
103
  if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; }
65
104
 
66
105
  /* Make sure that our reparented node has the correct namespaces */
67
- if (!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent) {
106
+ if (!reparented->ns &&
107
+ (reparented->doc != (xmlDocPtr)reparented->parent) &&
108
+ (rb_iv_get(DOC_RUBY_OBJECT(reparented->doc), "@namespace_inheritance") == Qtrue)) {
68
109
  xmlSetNs(reparented, reparented->parent->ns);
69
110
  }
70
111
 
@@ -87,7 +128,7 @@ static void relink_namespace(xmlNodePtr reparented)
87
128
  } else {
88
129
  reparented->nsDef = curr->next;
89
130
  }
90
- nokogiri_root_nsdef(curr, reparented->doc);
131
+ noko_xml_document_pin_namespace(curr, reparented->doc);
91
132
  } else {
92
133
  prev = curr;
93
134
  }
@@ -118,7 +159,7 @@ static void relink_namespace(xmlNodePtr reparented)
118
159
  /* reparent. */
119
160
  if (NULL == reparented->ns) { return; }
120
161
 
121
- /* When a node gets reparented, walk it's children to make sure that */
162
+ /* When a node gets reparented, walk its children to make sure that */
122
163
  /* their namespaces are reparented as well. */
123
164
  child = reparented->children;
124
165
  while (NULL != child) {
@@ -127,16 +168,19 @@ static void relink_namespace(xmlNodePtr reparented)
127
168
  }
128
169
 
129
170
  if (reparented->type == XML_ELEMENT_NODE) {
130
- child = (xmlNodePtr)((xmlElementPtr)reparented)->attributes;
131
- while(NULL != child) {
132
- relink_namespace(child);
133
- child = child->next;
171
+ attr = reparented->properties;
172
+ while (NULL != attr) {
173
+ relink_namespace((xmlNodePtr)attr);
174
+ attr = attr->next;
134
175
  }
135
176
  }
136
177
  }
137
178
 
138
- /* :nodoc: */
139
- static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
179
+
180
+ /* internal function meant to wrap xmlReplaceNode
181
+ and fix some issues we have with libxml2 merging nodes */
182
+ static xmlNodePtr
183
+ xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
140
184
  {
141
185
  xmlNodePtr retval ;
142
186
 
@@ -159,22 +203,34 @@ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
159
203
  return retval ;
160
204
  }
161
205
 
162
- /* :nodoc: */
163
- static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
206
+
207
+ static void
208
+ raise_if_ancestor_of_self(xmlNodePtr self)
209
+ {
210
+ for (xmlNodePtr ancestor = self->parent ; ancestor ; ancestor = ancestor->parent) {
211
+ if (self == ancestor) {
212
+ rb_raise(rb_eRuntimeError, "cycle detected: node '%s' is an ancestor of itself", self->name);
213
+ }
214
+ }
215
+ }
216
+
217
+
218
+ static VALUE
219
+ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
164
220
  {
165
221
  VALUE reparented_obj ;
166
- xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text, parent ;
222
+ xmlNodePtr reparentee, original_reparentee, pivot, reparented, next_text, new_next_text, parent ;
167
223
  int original_ns_prefix_is_default = 0 ;
168
224
 
169
- if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) {
225
+ if (!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) {
170
226
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
171
227
  }
172
- if(rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument)) {
228
+ if (rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument)) {
173
229
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
174
230
  }
175
231
 
176
- Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
177
- Data_Get_Struct(pivot_obj, xmlNode, pivot);
232
+ Noko_Node_Get_Struct(reparentee_obj, xmlNode, reparentee);
233
+ Noko_Node_Get_Struct(pivot_obj, xmlNode, pivot);
178
234
 
179
235
  /*
180
236
  * Check if nodes given are appropriate to have a parent-child
@@ -190,66 +246,66 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
190
246
 
191
247
  if (parent) {
192
248
  switch (parent->type) {
193
- case XML_DOCUMENT_NODE:
194
- case XML_HTML_DOCUMENT_NODE:
195
- switch (reparentee->type) {
196
- case XML_ELEMENT_NODE:
197
- case XML_PI_NODE:
198
- case XML_COMMENT_NODE:
199
- case XML_DOCUMENT_TYPE_NODE:
200
- /*
201
- * The DOM specification says no to adding text-like nodes
202
- * directly to a document, but we allow it for compatibility.
203
- */
204
- case XML_TEXT_NODE:
205
- case XML_CDATA_SECTION_NODE:
206
- case XML_ENTITY_REF_NODE:
207
- goto ok;
208
- default:
249
+ case XML_DOCUMENT_NODE:
250
+ case XML_HTML_DOCUMENT_NODE:
251
+ switch (reparentee->type) {
252
+ case XML_ELEMENT_NODE:
253
+ case XML_PI_NODE:
254
+ case XML_COMMENT_NODE:
255
+ case XML_DOCUMENT_TYPE_NODE:
256
+ /*
257
+ * The DOM specification says no to adding text-like nodes
258
+ * directly to a document, but we allow it for compatibility.
259
+ */
260
+ case XML_TEXT_NODE:
261
+ case XML_CDATA_SECTION_NODE:
262
+ case XML_ENTITY_REF_NODE:
263
+ goto ok;
264
+ default:
265
+ break;
266
+ }
209
267
  break;
210
- }
211
- break;
212
- case XML_DOCUMENT_FRAG_NODE:
213
- case XML_ENTITY_REF_NODE:
214
- case XML_ELEMENT_NODE:
215
- switch (reparentee->type) {
216
- case XML_ELEMENT_NODE:
217
- case XML_PI_NODE:
218
- case XML_COMMENT_NODE:
219
- case XML_TEXT_NODE:
220
- case XML_CDATA_SECTION_NODE:
268
+ case XML_DOCUMENT_FRAG_NODE:
221
269
  case XML_ENTITY_REF_NODE:
222
- goto ok;
223
- default:
270
+ case XML_ELEMENT_NODE:
271
+ switch (reparentee->type) {
272
+ case XML_ELEMENT_NODE:
273
+ case XML_PI_NODE:
274
+ case XML_COMMENT_NODE:
275
+ case XML_TEXT_NODE:
276
+ case XML_CDATA_SECTION_NODE:
277
+ case XML_ENTITY_REF_NODE:
278
+ goto ok;
279
+ default:
280
+ break;
281
+ }
282
+ break;
283
+ case XML_ATTRIBUTE_NODE:
284
+ switch (reparentee->type) {
285
+ case XML_TEXT_NODE:
286
+ case XML_ENTITY_REF_NODE:
287
+ goto ok;
288
+ default:
289
+ break;
290
+ }
224
291
  break;
225
- }
226
- break;
227
- case XML_ATTRIBUTE_NODE:
228
- switch (reparentee->type) {
229
292
  case XML_TEXT_NODE:
230
- case XML_ENTITY_REF_NODE:
231
- goto ok;
293
+ /*
294
+ * xmlAddChild() breaks the DOM specification in that it allows
295
+ * adding a text node to another, in which case text nodes are
296
+ * coalesced, but since our JRuby version does not support such
297
+ * operation, we should inhibit it.
298
+ */
299
+ break;
232
300
  default:
233
301
  break;
234
- }
235
- break;
236
- case XML_TEXT_NODE:
237
- /*
238
- * xmlAddChild() breaks the DOM specification in that it allows
239
- * adding a text node to another, in which case text nodes are
240
- * coalesced, but since our JRuby version does not support such
241
- * operation, we should inhibit it.
242
- */
243
- break;
244
- default:
245
- break;
246
302
  }
247
303
 
248
304
  rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj));
249
305
  }
250
306
 
251
307
  ok:
252
- xmlUnlinkNode(reparentee);
308
+ original_reparentee = reparentee;
253
309
 
254
310
  if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
255
311
  /*
@@ -290,7 +346,7 @@ ok:
290
346
  original_ns_prefix_is_default = 1;
291
347
  }
292
348
 
293
- nokogiri_root_node(reparentee);
349
+ noko_xml_document_pin_node(reparentee);
294
350
 
295
351
  if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
296
352
  rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
@@ -301,12 +357,14 @@ ok:
301
357
  * issue #391, where new node's prefix may become the string "default"
302
358
  * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior.
303
359
  */
304
- xmlFree(reparentee->ns->prefix);
360
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(reparentee->ns->prefix));
305
361
  reparentee->ns->prefix = NULL;
306
362
  }
307
363
  }
308
364
 
309
- if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
365
+ xmlUnlinkNode(original_reparentee);
366
+
367
+ if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling && prf != xmlAddChild
310
368
  && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
311
369
  /*
312
370
  * libxml merges text nodes in a right-to-left fashion, meaning that if
@@ -330,12 +388,12 @@ ok:
330
388
  new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;
331
389
 
332
390
  xmlUnlinkNode(next_text);
333
- nokogiri_root_node(next_text);
391
+ noko_xml_document_pin_node(next_text);
334
392
 
335
393
  xmlAddNextSibling(pivot, new_next_text);
336
394
  }
337
395
 
338
- if(!(reparented = (*prf)(pivot, reparentee))) {
396
+ if (!(reparented = (*prf)(pivot, reparentee))) {
339
397
  rb_raise(rb_eRuntimeError, "Could not reparent node");
340
398
  }
341
399
 
@@ -345,57 +403,432 @@ ok:
345
403
  * adjacent text nodes.
346
404
  */
347
405
  DATA_PTR(reparentee_obj) = reparented ;
406
+ reparented_obj = noko_xml_node_wrap(Qnil, reparented);
348
407
 
349
- relink_namespace(reparented);
408
+ rb_funcall(reparented_obj, id_decorate_bang, 0);
350
409
 
351
- reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);
410
+ /* if we've created a cycle, raise an exception */
411
+ raise_if_ancestor_of_self(reparented);
352
412
 
353
- rb_funcall(reparented_obj, decorate_bang, 0);
413
+ relink_namespace(reparented);
354
414
 
355
415
  return reparented_obj ;
356
416
  }
357
417
 
418
+ // :startdoc:
358
419
 
359
420
  /*
360
- * call-seq:
361
- * document
421
+ * :call-seq:
422
+ * add_namespace_definition(prefix, href) → Nokogiri::XML::Namespace
423
+ * add_namespace(prefix, href) → Nokogiri::XML::Namespace
424
+ *
425
+ * :category: Manipulating Document Structure
426
+ *
427
+ * Adds a namespace definition to this node with +prefix+ using +href+ value, as if this node had
428
+ * included an attribute "xmlns:prefix=href".
429
+ *
430
+ * A default namespace definition for this node can be added by passing +nil+ for +prefix+.
431
+ *
432
+ * [Parameters]
433
+ * - +prefix+ (String, +nil+) An {XML Name}[https://www.w3.org/TR/xml-names/#ns-decl]
434
+ * - +href+ (String) The {URI reference}[https://www.w3.org/TR/xml-names/#sec-namespaces]
435
+ *
436
+ * [Returns] The new Nokogiri::XML::Namespace
437
+ *
438
+ * *Example:* adding a non-default namespace definition
439
+ *
440
+ * doc = Nokogiri::XML("<store><inventory></inventory></store>")
441
+ * inventory = doc.at_css("inventory")
442
+ * inventory.add_namespace_definition("automobile", "http://alices-autos.com/")
443
+ * inventory.add_namespace_definition("bicycle", "http://bobs-bikes.com/")
444
+ * inventory.add_child("<automobile:tire>Michelin model XGV, size 75R</automobile:tire>")
445
+ * doc.to_xml
446
+ * # => "<?xml version=\"1.0\"?>\n" +
447
+ * # "<store>\n" +
448
+ * # " <inventory xmlns:automobile=\"http://alices-autos.com/\" xmlns:bicycle=\"http://bobs-bikes.com/\">\n" +
449
+ * # " <automobile:tire>Michelin model XGV, size 75R</automobile:tire>\n" +
450
+ * # " </inventory>\n" +
451
+ * # "</store>\n"
452
+ *
453
+ * *Example:* adding a default namespace definition
454
+ *
455
+ * doc = Nokogiri::XML("<store><inventory><tire>Michelin model XGV, size 75R</tire></inventory></store>")
456
+ * doc.at_css("tire").add_namespace_definition(nil, "http://bobs-bikes.com/")
457
+ * doc.to_xml
458
+ * # => "<?xml version=\"1.0\"?>\n" +
459
+ * # "<store>\n" +
460
+ * # " <inventory>\n" +
461
+ * # " <tire xmlns=\"http://bobs-bikes.com/\">Michelin model XGV, size 75R</tire>\n" +
462
+ * # " </inventory>\n" +
463
+ * # "</store>\n"
464
+ *
465
+ */
466
+ static VALUE
467
+ rb_xml_node_add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_href)
468
+ {
469
+ xmlNodePtr c_node, element;
470
+ xmlNsPtr c_namespace;
471
+ const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix));
472
+
473
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
474
+ element = c_node ;
475
+
476
+ c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix);
477
+
478
+ if (!c_namespace) {
479
+ if (c_node->type != XML_ELEMENT_NODE) {
480
+ element = c_node->parent;
481
+ }
482
+ c_namespace = xmlNewNs(element, (const xmlChar *)StringValueCStr(rb_href), c_prefix);
483
+ }
484
+
485
+ if (!c_namespace) {
486
+ return Qnil ;
487
+ }
488
+
489
+ if (NIL_P(rb_prefix) || c_node != element) {
490
+ xmlSetNs(c_node, c_namespace);
491
+ }
492
+
493
+ return noko_xml_namespace_wrap(c_namespace, c_node->doc);
494
+ }
495
+
496
+
497
+ /*
498
+ * :call-seq: attribute(name) → Nokogiri::XML::Attr
499
+ *
500
+ * :category: Working With Node Attributes
501
+ *
502
+ * [Returns] Attribute (Nokogiri::XML::Attr) belonging to this node with name +name+.
503
+ *
504
+ * ⚠ Note that attribute namespaces are ignored and only the simple (non-namespace-prefixed) name is
505
+ * used to find a matching attribute. In case of a simple name collision, only one of the matching
506
+ * attributes will be returned. In this case, you will need to use #attribute_with_ns.
507
+ *
508
+ * *Example:*
509
+ *
510
+ * doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
511
+ * child = doc.at_css("child")
512
+ * child.attribute("size") # => #<Nokogiri::XML::Attr:0x550 name="size" value="large">
513
+ * child.attribute("class") # => #<Nokogiri::XML::Attr:0x564 name="class" value="big wide tall">
514
+ *
515
+ * *Example* showing that namespaced attributes will not be returned:
516
+ *
517
+ * ⚠ Note that only one of the two matching attributes is returned.
518
+ *
519
+ * doc = Nokogiri::XML(<<~EOF)
520
+ * <root xmlns:width='http://example.com/widths'
521
+ * xmlns:height='http://example.com/heights'>
522
+ * <child width:size='broad' height:size='tall'/>
523
+ * </root>
524
+ * EOF
525
+ * doc.at_css("child").attribute("size")
526
+ * # => #(Attr:0x550 {
527
+ * # name = "size",
528
+ * # namespace = #(Namespace:0x564 {
529
+ * # prefix = "width",
530
+ * # href = "http://example.com/widths"
531
+ * # }),
532
+ * # value = "broad"
533
+ * # })
534
+ */
535
+ static VALUE
536
+ rb_xml_node_attribute(VALUE self, VALUE name)
537
+ {
538
+ xmlNodePtr node;
539
+ xmlAttrPtr prop;
540
+ Noko_Node_Get_Struct(self, xmlNode, node);
541
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name));
542
+
543
+ if (! prop) { return Qnil; }
544
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
545
+ }
546
+
547
+
548
+ /*
549
+ * :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
550
+ *
551
+ * :category: Working With Node Attributes
552
+ *
553
+ * [Returns] Attributes (an Array of Nokogiri::XML::Attr) belonging to this node.
554
+ *
555
+ * Note that this is the preferred alternative to #attributes when the simple
556
+ * (non-namespace-prefixed) attribute names may collide.
557
+ *
558
+ * *Example:*
559
+ *
560
+ * Contrast this with the colliding-name example from #attributes.
561
+ *
562
+ * doc = Nokogiri::XML(<<~EOF)
563
+ * <root xmlns:width='http://example.com/widths'
564
+ * xmlns:height='http://example.com/heights'>
565
+ * <child width:size='broad' height:size='tall'/>
566
+ * </root>
567
+ * EOF
568
+ * doc.at_css("child").attribute_nodes
569
+ * # => [#(Attr:0x550 {
570
+ * # name = "size",
571
+ * # namespace = #(Namespace:0x564 {
572
+ * # prefix = "width",
573
+ * # href = "http://example.com/widths"
574
+ * # }),
575
+ * # value = "broad"
576
+ * # }),
577
+ * # #(Attr:0x578 {
578
+ * # name = "size",
579
+ * # namespace = #(Namespace:0x58c {
580
+ * # prefix = "height",
581
+ * # href = "http://example.com/heights"
582
+ * # }),
583
+ * # value = "tall"
584
+ * # })]
585
+ */
586
+ static VALUE
587
+ rb_xml_node_attribute_nodes(VALUE rb_node)
588
+ {
589
+ xmlNodePtr c_node;
590
+
591
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
592
+
593
+ return noko_xml_node_attrs(c_node);
594
+ }
595
+
596
+
597
+ /*
598
+ * :call-seq: attribute_with_ns(name, namespace) → Nokogiri::XML::Attr
599
+ *
600
+ * :category: Working With Node Attributes
601
+ *
602
+ * [Returns]
603
+ * Attribute (Nokogiri::XML::Attr) belonging to this node with matching +name+ and +namespace+.
604
+ *
605
+ * [Parameters]
606
+ * - +name+ (String): the simple (non-namespace-prefixed) name of the attribute
607
+ * - +namespace+ (String): the URI of the attribute's namespace
608
+ *
609
+ * See related: #attribute
610
+ *
611
+ * *Example:*
612
+ *
613
+ * doc = Nokogiri::XML(<<~EOF)
614
+ * <root xmlns:width='http://example.com/widths'
615
+ * xmlns:height='http://example.com/heights'>
616
+ * <child width:size='broad' height:size='tall'/>
617
+ * </root>
618
+ * EOF
619
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/widths")
620
+ * # => #(Attr:0x550 {
621
+ * # name = "size",
622
+ * # namespace = #(Namespace:0x564 {
623
+ * # prefix = "width",
624
+ * # href = "http://example.com/widths"
625
+ * # }),
626
+ * # value = "broad"
627
+ * # })
628
+ * doc.at_css("child").attribute_with_ns("size", "http://example.com/heights")
629
+ * # => #(Attr:0x578 {
630
+ * # name = "size",
631
+ * # namespace = #(Namespace:0x58c {
632
+ * # prefix = "height",
633
+ * # href = "http://example.com/heights"
634
+ * # }),
635
+ * # value = "tall"
636
+ * # })
637
+ */
638
+ static VALUE
639
+ rb_xml_node_attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
640
+ {
641
+ xmlNodePtr node;
642
+ xmlAttrPtr prop;
643
+ Noko_Node_Get_Struct(self, xmlNode, node);
644
+ prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name),
645
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace));
646
+
647
+ if (! prop) { return Qnil; }
648
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop);
649
+ }
650
+
651
+
652
+
653
+ /*
654
+ * call-seq: blank? → Boolean
655
+ *
656
+ * [Returns] +true+ if the node is an empty or whitespace-only text or cdata node, else +false+.
657
+ *
658
+ * *Example:*
659
+ *
660
+ * Nokogiri("<root><child/></root>").root.child.blank? # => false
661
+ * Nokogiri("<root>\t \n</root>").root.child.blank? # => true
662
+ * Nokogiri("<root><![CDATA[\t \n]]></root>").root.child.blank? # => true
663
+ * Nokogiri("<root>not-blank</root>").root.child
664
+ * .tap { |n| n.content = "" }.blank # => true
665
+ */
666
+ static VALUE
667
+ rb_xml_node_blank_eh(VALUE self)
668
+ {
669
+ xmlNodePtr node;
670
+ Noko_Node_Get_Struct(self, xmlNode, node);
671
+ return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
672
+ }
673
+
674
+
675
+ /*
676
+ * :call-seq: child() → Nokogiri::XML::Node
677
+ *
678
+ * :category: Traversing Document Structure
679
+ *
680
+ * [Returns] First of this node's children, or +nil+ if there are no children
681
+ *
682
+ * This is a convenience method and is equivalent to:
683
+ *
684
+ * node.children.first
685
+ *
686
+ * See related: #children
687
+ */
688
+ static VALUE
689
+ rb_xml_node_child(VALUE self)
690
+ {
691
+ xmlNodePtr node, child;
692
+ Noko_Node_Get_Struct(self, xmlNode, node);
693
+
694
+ child = node->children;
695
+ if (!child) { return Qnil; }
696
+
697
+ return noko_xml_node_wrap(Qnil, child);
698
+ }
699
+
700
+
701
+ /*
702
+ * :call-seq: children() → Nokogiri::XML::NodeSet
703
+ *
704
+ * :category: Traversing Document Structure
705
+ *
706
+ * [Returns] Nokogiri::XML::NodeSet containing this node's children.
707
+ */
708
+ static VALUE
709
+ rb_xml_node_children(VALUE self)
710
+ {
711
+ xmlNodePtr node;
712
+ xmlNodePtr child;
713
+ xmlNodeSetPtr set;
714
+ VALUE document;
715
+ VALUE node_set;
716
+
717
+ Noko_Node_Get_Struct(self, xmlNode, node);
718
+
719
+ child = node->children;
720
+ set = xmlXPathNodeSetCreate(child);
721
+
722
+ document = DOC_RUBY_OBJECT(node->doc);
723
+
724
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
725
+
726
+ child = child->next;
727
+ while (NULL != child) {
728
+ xmlXPathNodeSetAddUnique(set, child);
729
+ child = child->next;
730
+ }
731
+
732
+ node_set = noko_xml_node_set_wrap(set, document);
733
+
734
+ return node_set;
735
+ }
736
+
737
+
738
+ /*
739
+ * :call-seq:
740
+ * content() → String
741
+ * inner_text() → String
742
+ * text() → String
743
+ * to_str() → String
744
+ *
745
+ * [Returns]
746
+ * Contents of all the text nodes in this node's subtree, concatenated together into a single
747
+ * String.
748
+ *
749
+ * ⚠ Note that entities will _always_ be expanded in the returned String.
750
+ *
751
+ * See related: #inner_html
752
+ *
753
+ * *Example* of how entities are handled:
754
+ *
755
+ * Note that <tt>&lt;</tt> becomes <tt><</tt> in the returned String.
756
+ *
757
+ * doc = Nokogiri::XML.fragment("<child>a &lt; b</child>")
758
+ * doc.at_css("child").content
759
+ * # => "a < b"
760
+ *
761
+ * *Example* of how a subtree is handled:
762
+ *
763
+ * Note that the <tt><span></tt> tags are omitted and only the text node contents are returned,
764
+ * concatenated into a single string.
765
+ *
766
+ * doc = Nokogiri::XML.fragment("<child><span>first</span> <span>second</span></child>")
767
+ * doc.at_css("child").content
768
+ * # => "first second"
769
+ */
770
+ static VALUE
771
+ rb_xml_node_content(VALUE self)
772
+ {
773
+ xmlNodePtr node;
774
+ xmlChar *content;
775
+
776
+ Noko_Node_Get_Struct(self, xmlNode, node);
777
+
778
+ content = xmlNodeGetContent(node);
779
+ if (content) {
780
+ VALUE rval = NOKOGIRI_STR_NEW2(content);
781
+ xmlFree(content);
782
+ return rval;
783
+ }
784
+ return Qnil;
785
+ }
786
+
787
+
788
+ /*
789
+ * :call-seq: document() → Nokogiri::XML::Document
790
+ *
791
+ * :category: Traversing Document Structure
362
792
  *
363
- * Get the document for this Node
793
+ * [Returns] Parent Nokogiri::XML::Document for this node
364
794
  */
365
- static VALUE document(VALUE self)
795
+ static VALUE
796
+ rb_xml_node_document(VALUE self)
366
797
  {
367
798
  xmlNodePtr node;
368
- Data_Get_Struct(self, xmlNode, node);
799
+ Noko_Node_Get_Struct(self, xmlNode, node);
369
800
  return DOC_RUBY_OBJECT(node->doc);
370
801
  }
371
802
 
372
803
  /*
373
- * call-seq:
374
- * pointer_id
804
+ * :call-seq: pointer_id() → Integer
375
805
  *
376
- * Get the internal pointer number
806
+ * [Returns]
807
+ * A unique id for this node based on the internal memory structures. This method is used by #==
808
+ * to determine node identity.
377
809
  */
378
- static VALUE pointer_id(VALUE self)
810
+ static VALUE
811
+ rb_xml_node_pointer_id(VALUE self)
379
812
  {
380
813
  xmlNodePtr node;
381
- Data_Get_Struct(self, xmlNode, node);
814
+ Noko_Node_Get_Struct(self, xmlNode, node);
382
815
 
383
- return INT2NUM((long)(node));
816
+ return rb_uint2inum((uintptr_t)(node));
384
817
  }
385
818
 
386
819
  /*
387
- * call-seq:
388
- * encode_special_chars(string)
820
+ * :call-seq: encode_special_chars(string) → String
389
821
  *
390
822
  * Encode any special characters in +string+
391
823
  */
392
- static VALUE encode_special_chars(VALUE self, VALUE string)
824
+ static VALUE
825
+ encode_special_chars(VALUE self, VALUE string)
393
826
  {
394
827
  xmlNodePtr node;
395
828
  xmlChar *encoded;
396
829
  VALUE encoded_str;
397
830
 
398
- Data_Get_Struct(self, xmlNode, node);
831
+ Noko_Node_Get_Struct(self, xmlNode, node);
399
832
  encoded = xmlEncodeSpecialChars(
400
833
  node->doc,
401
834
  (const xmlChar *)StringValueCStr(string)
@@ -408,8 +841,8 @@ static VALUE encode_special_chars(VALUE self, VALUE string)
408
841
  }
409
842
 
410
843
  /*
411
- * call-seq:
412
- * create_internal_subset(name, external_id, system_id)
844
+ * :call-seq:
845
+ * create_internal_subset(name, external_id, system_id)
413
846
  *
414
847
  * Create the internal subset of a document.
415
848
  *
@@ -419,17 +852,18 @@ static VALUE encode_special_chars(VALUE self, VALUE string)
419
852
  * doc.create_internal_subset("chapter", nil, "chapter.dtd")
420
853
  * # => <!DOCTYPE chapter SYSTEM "chapter.dtd">
421
854
  */
422
- static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
855
+ static VALUE
856
+ create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
423
857
  {
424
858
  xmlNodePtr node;
425
859
  xmlDocPtr doc;
426
860
  xmlDtdPtr dtd;
427
861
 
428
- Data_Get_Struct(self, xmlNode, node);
862
+ Noko_Node_Get_Struct(self, xmlNode, node);
429
863
 
430
864
  doc = node->doc;
431
865
 
432
- if(xmlGetIntSubset(doc)) {
866
+ if (xmlGetIntSubset(doc)) {
433
867
  rb_raise(rb_eRuntimeError, "Document already has an internal subset");
434
868
  }
435
869
 
@@ -440,28 +874,29 @@ static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, V
440
874
  NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
441
875
  );
442
876
 
443
- if(!dtd) { return Qnil; }
877
+ if (!dtd) { return Qnil; }
444
878
 
445
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
879
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
446
880
  }
447
881
 
448
882
  /*
449
- * call-seq:
450
- * create_external_subset(name, external_id, system_id)
883
+ * :call-seq:
884
+ * create_external_subset(name, external_id, system_id)
451
885
  *
452
886
  * Create an external subset
453
887
  */
454
- static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
888
+ static VALUE
889
+ create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
455
890
  {
456
891
  xmlNodePtr node;
457
892
  xmlDocPtr doc;
458
893
  xmlDtdPtr dtd;
459
894
 
460
- Data_Get_Struct(self, xmlNode, node);
895
+ Noko_Node_Get_Struct(self, xmlNode, node);
461
896
 
462
897
  doc = node->doc;
463
898
 
464
- if(doc->extSubset) {
899
+ if (doc->extSubset) {
465
900
  rb_raise(rb_eRuntimeError, "Document already has an external subset");
466
901
  }
467
902
 
@@ -472,129 +907,103 @@ static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, V
472
907
  NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
473
908
  );
474
909
 
475
- if(!dtd) { return Qnil; }
910
+ if (!dtd) { return Qnil; }
476
911
 
477
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
912
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
478
913
  }
479
914
 
480
915
  /*
481
- * call-seq:
482
- * external_subset
916
+ * :call-seq:
917
+ * external_subset()
483
918
  *
484
919
  * Get the external subset
485
920
  */
486
- static VALUE external_subset(VALUE self)
921
+ static VALUE
922
+ external_subset(VALUE self)
487
923
  {
488
924
  xmlNodePtr node;
489
925
  xmlDocPtr doc;
490
926
  xmlDtdPtr dtd;
491
927
 
492
- Data_Get_Struct(self, xmlNode, node);
928
+ Noko_Node_Get_Struct(self, xmlNode, node);
493
929
 
494
- if(!node->doc) { return Qnil; }
930
+ if (!node->doc) { return Qnil; }
495
931
 
496
932
  doc = node->doc;
497
933
  dtd = doc->extSubset;
498
934
 
499
- if(!dtd) { return Qnil; }
935
+ if (!dtd) { return Qnil; }
500
936
 
501
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
937
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
502
938
  }
503
939
 
504
940
  /*
505
- * call-seq:
506
- * internal_subset
941
+ * :call-seq:
942
+ * internal_subset()
507
943
  *
508
944
  * Get the internal subset
509
945
  */
510
- static VALUE internal_subset(VALUE self)
946
+ static VALUE
947
+ internal_subset(VALUE self)
511
948
  {
512
949
  xmlNodePtr node;
513
950
  xmlDocPtr doc;
514
951
  xmlDtdPtr dtd;
515
952
 
516
- Data_Get_Struct(self, xmlNode, node);
953
+ Noko_Node_Get_Struct(self, xmlNode, node);
517
954
 
518
- if(!node->doc) { return Qnil; }
955
+ if (!node->doc) { return Qnil; }
519
956
 
520
957
  doc = node->doc;
521
958
  dtd = xmlGetIntSubset(doc);
522
959
 
523
- if(!dtd) { return Qnil; }
960
+ if (!dtd) { return Qnil; }
524
961
 
525
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
962
+ return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
526
963
  }
527
964
 
528
- /*
529
- * call-seq:
530
- * dup
531
- * dup(depth)
532
- * dup(depth, new_parent_doc)
533
- *
534
- * Copy this node.
535
- * An optional depth may be passed in. 0 is a shallow copy, 1 (the default) is a deep copy.
536
- * An optional new_parent_doc may also be passed in, which will be the new
537
- * node's parent document. Defaults to the current node's document.
538
- * current document.
539
- */
540
- static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
965
+ /* :nodoc: */
966
+ static VALUE
967
+ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
541
968
  {
542
- VALUE r_level, r_new_parent_doc;
543
- int level;
544
- int n_args;
545
- xmlDocPtr new_parent_doc;
546
- xmlNodePtr node, dup;
969
+ xmlNodePtr c_self, c_other;
970
+ int c_level;
971
+ xmlDocPtr c_new_parent_doc;
972
+ VALUE rb_node_cache;
547
973
 
548
- Data_Get_Struct(self, xmlNode, node);
974
+ Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
975
+ c_level = (int)NUM2INT(rb_level);
976
+ c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
549
977
 
550
- n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);
978
+ c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
979
+ if (c_self == NULL) { return Qnil; }
551
980
 
552
- if (n_args < 1) {
553
- r_level = INT2NUM((long)1);
554
- }
555
- level = (int)NUM2INT(r_level);
556
-
557
- if (n_args < 2) {
558
- new_parent_doc = node->doc;
559
- } else {
560
- Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc);
561
- }
981
+ _xml_node_data_ptr_set(rb_self, c_self);
982
+ noko_xml_document_pin_node(c_self);
562
983
 
563
- dup = xmlDocCopyNode(node, new_parent_doc, level);
564
- if(dup == NULL) { return Qnil; }
984
+ rb_node_cache = DOC_NODE_CACHE(c_new_parent_doc);
985
+ rb_ary_push(rb_node_cache, rb_self);
986
+ rb_funcall(rb_new_parent_doc, id_decorate, 1, rb_self);
565
987
 
566
- nokogiri_root_node(dup);
567
-
568
- return Nokogiri_wrap_xml_node(rb_obj_class(self), dup);
988
+ return rb_self;
569
989
  }
570
990
 
571
991
  /*
572
- * call-seq:
573
- * unlink
992
+ * :call-seq:
993
+ * unlink() → self
574
994
  *
575
995
  * Unlink this node from its current context.
576
996
  */
577
- static VALUE unlink_node(VALUE self)
997
+ static VALUE
998
+ unlink_node(VALUE self)
578
999
  {
579
1000
  xmlNodePtr node;
580
- Data_Get_Struct(self, xmlNode, node);
1001
+ Noko_Node_Get_Struct(self, xmlNode, node);
581
1002
  xmlUnlinkNode(node);
582
- nokogiri_root_node(node);
1003
+ noko_xml_document_pin_node(node);
583
1004
  return self;
584
1005
  }
585
1006
 
586
- /*
587
- * call-seq:
588
- * blank?
589
- *
590
- * Is this node blank?
591
- */
592
- static VALUE blank_eh(VALUE self)
593
- {
594
- xmlNodePtr node;
595
- Data_Get_Struct(self, xmlNode, node);
596
- return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
597
- }
598
1007
 
599
1008
  /*
600
1009
  * call-seq:
@@ -602,15 +1011,16 @@ static VALUE blank_eh(VALUE self)
602
1011
  *
603
1012
  * Returns the next sibling node
604
1013
  */
605
- static VALUE next_sibling(VALUE self)
1014
+ static VALUE
1015
+ next_sibling(VALUE self)
606
1016
  {
607
1017
  xmlNodePtr node, sibling;
608
- Data_Get_Struct(self, xmlNode, node);
1018
+ Noko_Node_Get_Struct(self, xmlNode, node);
609
1019
 
610
1020
  sibling = node->next;
611
- if(!sibling) { return Qnil; }
1021
+ if (!sibling) { return Qnil; }
612
1022
 
613
- return Nokogiri_wrap_xml_node(Qnil, sibling) ;
1023
+ return noko_xml_node_wrap(Qnil, sibling) ;
614
1024
  }
615
1025
 
616
1026
  /*
@@ -619,15 +1029,16 @@ static VALUE next_sibling(VALUE self)
619
1029
  *
620
1030
  * Returns the previous sibling node
621
1031
  */
622
- static VALUE previous_sibling(VALUE self)
1032
+ static VALUE
1033
+ previous_sibling(VALUE self)
623
1034
  {
624
1035
  xmlNodePtr node, sibling;
625
- Data_Get_Struct(self, xmlNode, node);
1036
+ Noko_Node_Get_Struct(self, xmlNode, node);
626
1037
 
627
1038
  sibling = node->prev;
628
- if(!sibling) { return Qnil; }
1039
+ if (!sibling) { return Qnil; }
629
1040
 
630
- return Nokogiri_wrap_xml_node(Qnil, sibling);
1041
+ return noko_xml_node_wrap(Qnil, sibling);
631
1042
  }
632
1043
 
633
1044
  /*
@@ -636,15 +1047,16 @@ static VALUE previous_sibling(VALUE self)
636
1047
  *
637
1048
  * Returns the next Nokogiri::XML::Element type sibling node.
638
1049
  */
639
- static VALUE next_element(VALUE self)
1050
+ static VALUE
1051
+ next_element(VALUE self)
640
1052
  {
641
1053
  xmlNodePtr node, sibling;
642
- Data_Get_Struct(self, xmlNode, node);
1054
+ Noko_Node_Get_Struct(self, xmlNode, node);
643
1055
 
644
1056
  sibling = xmlNextElementSibling(node);
645
- if(!sibling) { return Qnil; }
1057
+ if (!sibling) { return Qnil; }
646
1058
 
647
- return Nokogiri_wrap_xml_node(Qnil, sibling);
1059
+ return noko_xml_node_wrap(Qnil, sibling);
648
1060
  }
649
1061
 
650
1062
  /*
@@ -653,82 +1065,53 @@ static VALUE next_element(VALUE self)
653
1065
  *
654
1066
  * Returns the previous Nokogiri::XML::Element type sibling node.
655
1067
  */
656
- static VALUE previous_element(VALUE self)
1068
+ static VALUE
1069
+ previous_element(VALUE self)
657
1070
  {
658
1071
  xmlNodePtr node, sibling;
659
- Data_Get_Struct(self, xmlNode, node);
1072
+ Noko_Node_Get_Struct(self, xmlNode, node);
660
1073
 
661
- /*
662
- * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
663
- */
664
- sibling = node->prev;
665
- if(!sibling) { return Qnil; }
666
-
667
- while(sibling && sibling->type != XML_ELEMENT_NODE) {
668
- sibling = sibling->prev;
669
- }
1074
+ sibling = xmlPreviousElementSibling(node);
1075
+ if (!sibling) { return Qnil; }
670
1076
 
671
- return sibling ? Nokogiri_wrap_xml_node(Qnil, sibling) : Qnil ;
1077
+ return noko_xml_node_wrap(Qnil, sibling);
672
1078
  }
673
1079
 
674
1080
  /* :nodoc: */
675
- static VALUE replace(VALUE self, VALUE new_node)
1081
+ static VALUE
1082
+ replace(VALUE self, VALUE new_node)
676
1083
  {
677
1084
  VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
678
1085
 
679
1086
  xmlNodePtr pivot;
680
- Data_Get_Struct(self, xmlNode, pivot);
681
- nokogiri_root_node(pivot);
1087
+ Noko_Node_Get_Struct(self, xmlNode, pivot);
1088
+ noko_xml_document_pin_node(pivot);
682
1089
 
683
1090
  return reparent;
684
1091
  }
685
1092
 
686
1093
  /*
687
- * call-seq:
688
- * children
1094
+ * :call-seq:
1095
+ * element_children() → NodeSet
1096
+ * elements() → NodeSet
689
1097
  *
690
- * Get the list of children for this node as a NodeSet
691
- */
692
- static VALUE children(VALUE self)
693
- {
694
- xmlNodePtr node;
695
- xmlNodePtr child;
696
- xmlNodeSetPtr set;
697
- VALUE document;
698
- VALUE node_set;
699
-
700
- Data_Get_Struct(self, xmlNode, node);
701
-
702
- child = node->children;
703
- set = xmlXPathNodeSetCreate(child);
704
-
705
- document = DOC_RUBY_OBJECT(node->doc);
706
-
707
- if(!child) { return Nokogiri_wrap_xml_node_set(set, document); }
708
-
709
- child = child->next;
710
- while(NULL != child) {
711
- xmlXPathNodeSetAddUnique(set, child);
712
- child = child->next;
713
- }
714
-
715
- node_set = Nokogiri_wrap_xml_node_set(set, document);
716
-
717
- return node_set;
718
- }
719
-
720
- /*
721
- * call-seq:
722
- * element_children
1098
+ * [Returns]
1099
+ * The node's child elements as a NodeSet. Only children that are elements will be returned, which
1100
+ * notably excludes Text nodes.
723
1101
  *
724
- * Get the list of children for this node as a NodeSet. All nodes will be
725
- * element nodes.
1102
+ * *Example:*
726
1103
  *
727
- * Example:
1104
+ * Note that #children returns the Text node "hello" while #element_children does not.
728
1105
  *
729
- * @doc.root.element_children.all? { |x| x.element? } # => true
1106
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1107
+ * div.element_children
1108
+ * # => [#<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
1109
+ * div.children
1110
+ * # => [#<Nokogiri::XML::Text:0x64 "hello">,
1111
+ * # #<Nokogiri::XML::Element:0x50 name="span" children=[#<Nokogiri::XML::Text:0x3c "world">]>]
730
1112
  */
731
- static VALUE element_children(VALUE self)
1113
+ static VALUE
1114
+ rb_xml_node_element_children(VALUE self)
732
1115
  {
733
1116
  xmlNodePtr node;
734
1117
  xmlNodePtr child;
@@ -736,83 +1119,78 @@ static VALUE element_children(VALUE self)
736
1119
  VALUE document;
737
1120
  VALUE node_set;
738
1121
 
739
- Data_Get_Struct(self, xmlNode, node);
1122
+ Noko_Node_Get_Struct(self, xmlNode, node);
740
1123
 
741
1124
  child = xmlFirstElementChild(node);
742
1125
  set = xmlXPathNodeSetCreate(child);
743
1126
 
744
1127
  document = DOC_RUBY_OBJECT(node->doc);
745
1128
 
746
- if(!child) { return Nokogiri_wrap_xml_node_set(set, document); }
1129
+ if (!child) { return noko_xml_node_set_wrap(set, document); }
747
1130
 
748
1131
  child = xmlNextElementSibling(child);
749
- while(NULL != child) {
1132
+ while (NULL != child) {
750
1133
  xmlXPathNodeSetAddUnique(set, child);
751
1134
  child = xmlNextElementSibling(child);
752
1135
  }
753
1136
 
754
- node_set = Nokogiri_wrap_xml_node_set(set, document);
1137
+ node_set = noko_xml_node_set_wrap(set, document);
755
1138
 
756
1139
  return node_set;
757
1140
  }
758
1141
 
759
1142
  /*
760
- * call-seq:
761
- * child
1143
+ * :call-seq:
1144
+ * first_element_child() → Node
762
1145
  *
763
- * Returns the child node
764
- */
765
- static VALUE child(VALUE self)
766
- {
767
- xmlNodePtr node, child;
768
- Data_Get_Struct(self, xmlNode, node);
769
-
770
- child = node->children;
771
- if(!child) { return Qnil; }
772
-
773
- return Nokogiri_wrap_xml_node(Qnil, child);
774
- }
775
-
776
- /*
777
- * call-seq:
778
- * first_element_child
1146
+ * [Returns] The first child Node that is an element.
779
1147
  *
780
- * Returns the first child node of this node that is an element.
1148
+ * *Example:*
781
1149
  *
782
- * Example:
1150
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span></tt> element is
1151
+ * returned.
783
1152
  *
784
- * @doc.root.first_element_child.element? # => true
1153
+ * div = Nokogiri::HTML5("<div>hello<span>world</span>").at_css("div")
1154
+ * div.first_element_child
1155
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "world")] })
785
1156
  */
786
- static VALUE first_element_child(VALUE self)
1157
+ static VALUE
1158
+ rb_xml_node_first_element_child(VALUE self)
787
1159
  {
788
1160
  xmlNodePtr node, child;
789
- Data_Get_Struct(self, xmlNode, node);
1161
+ Noko_Node_Get_Struct(self, xmlNode, node);
790
1162
 
791
1163
  child = xmlFirstElementChild(node);
792
- if(!child) { return Qnil; }
1164
+ if (!child) { return Qnil; }
793
1165
 
794
- return Nokogiri_wrap_xml_node(Qnil, child);
1166
+ return noko_xml_node_wrap(Qnil, child);
795
1167
  }
796
1168
 
797
1169
  /*
798
- * call-seq:
799
- * last_element_child
1170
+ * :call-seq:
1171
+ * last_element_child() → Node
800
1172
  *
801
- * Returns the last child node of this node that is an element.
1173
+ * [Returns] The last child Node that is an element.
802
1174
  *
803
- * Example:
1175
+ * *Example:*
804
1176
  *
805
- * @doc.root.last_element_child.element? # => true
1177
+ * Note that the "hello" child, which is a Text node, is skipped and the <tt><span>yes</span></tt>
1178
+ * element is returned.
1179
+ *
1180
+ * div = Nokogiri::HTML5("<div><span>no</span><span>yes</span>skip</div>").at_css("div")
1181
+ * div.last_element_child
1182
+ * # => #(Element:0x3c { name = "span", children = [ #(Text "yes")] })
806
1183
  */
807
- static VALUE last_element_child(VALUE self)
1184
+ static VALUE
1185
+ rb_xml_node_last_element_child(VALUE self)
808
1186
  {
809
1187
  xmlNodePtr node, child;
810
- Data_Get_Struct(self, xmlNode, node);
1188
+ Noko_Node_Get_Struct(self, xmlNode, node);
811
1189
 
812
1190
  child = xmlLastElementChild(node);
813
- if(!child) { return Qnil; }
1191
+ if (!child) { return Qnil; }
814
1192
 
815
- return Nokogiri_wrap_xml_node(Qnil, child);
1193
+ return noko_xml_node_wrap(Qnil, child);
816
1194
  }
817
1195
 
818
1196
  /*
@@ -821,11 +1199,12 @@ static VALUE last_element_child(VALUE self)
821
1199
  *
822
1200
  * Returns true if +attribute+ is set
823
1201
  */
824
- static VALUE key_eh(VALUE self, VALUE attribute)
1202
+ static VALUE
1203
+ key_eh(VALUE self, VALUE attribute)
825
1204
  {
826
1205
  xmlNodePtr node;
827
- Data_Get_Struct(self, xmlNode, node);
828
- if(xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) {
1206
+ Noko_Node_Get_Struct(self, xmlNode, node);
1207
+ if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) {
829
1208
  return Qtrue;
830
1209
  }
831
1210
  return Qfalse;
@@ -837,12 +1216,13 @@ static VALUE key_eh(VALUE self, VALUE attribute)
837
1216
  *
838
1217
  * Returns true if +attribute+ is set with +namespace+
839
1218
  */
840
- static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
1219
+ static VALUE
1220
+ namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
841
1221
  {
842
1222
  xmlNodePtr node;
843
- Data_Get_Struct(self, xmlNode, node);
844
- if(xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
845
- NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) {
1223
+ Noko_Node_Get_Struct(self, xmlNode, node);
1224
+ if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
1225
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) {
846
1226
  return Qtrue;
847
1227
  }
848
1228
  return Qfalse;
@@ -854,11 +1234,12 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
854
1234
  *
855
1235
  * Set the +property+ to +value+
856
1236
  */
857
- static VALUE set(VALUE self, VALUE property, VALUE value)
1237
+ static VALUE
1238
+ set(VALUE self, VALUE property, VALUE value)
858
1239
  {
859
1240
  xmlNodePtr node, cur;
860
1241
  xmlAttrPtr prop;
861
- Data_Get_Struct(self, xmlNode, node);
1242
+ Noko_Node_Get_Struct(self, xmlNode, node);
862
1243
 
863
1244
  /* If a matching attribute node already exists, then xmlSetProp will destroy
864
1245
  * the existing node's children. However, if Nokogiri has a node object
@@ -867,13 +1248,13 @@ static VALUE set(VALUE self, VALUE property, VALUE value)
867
1248
  * We can avoid this by unlinking these nodes first.
868
1249
  */
869
1250
  if (node->type != XML_ELEMENT_NODE) {
870
- return(Qnil);
1251
+ return (Qnil);
871
1252
  }
872
1253
  prop = xmlHasProp(node, (xmlChar *)StringValueCStr(property));
873
1254
  if (prop && prop->children) {
874
1255
  for (cur = prop->children; cur; cur = cur->next) {
875
1256
  if (cur->_private) {
876
- nokogiri_root_node(cur);
1257
+ noko_xml_document_pin_node(cur);
877
1258
  xmlUnlinkNode(cur);
878
1259
  }
879
1260
  }
@@ -891,7 +1272,8 @@ static VALUE set(VALUE self, VALUE property, VALUE value)
891
1272
  *
892
1273
  * Get the value for +attribute+
893
1274
  */
894
- static VALUE get(VALUE self, VALUE rattribute)
1275
+ static VALUE
1276
+ get(VALUE self, VALUE rattribute)
895
1277
  {
896
1278
  xmlNodePtr node;
897
1279
  xmlChar *value = 0;
@@ -902,10 +1284,10 @@ static VALUE get(VALUE self, VALUE rattribute)
902
1284
 
903
1285
  if (NIL_P(rattribute)) { return Qnil; }
904
1286
 
905
- Data_Get_Struct(self, xmlNode, node);
1287
+ Noko_Node_Get_Struct(self, xmlNode, node);
906
1288
  attribute = xmlCharStrdup(StringValueCStr(rattribute));
907
1289
 
908
- colon = (xmlChar *)(uintptr_t)xmlStrchr(attribute, (const xmlChar)':');
1290
+ colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':'));
909
1291
  if (colon) {
910
1292
  /* split the attribute string into separate prefix and name by
911
1293
  * null-terminating the prefix at the colon */
@@ -917,7 +1299,7 @@ static VALUE get(VALUE self, VALUE rattribute)
917
1299
  if (ns) {
918
1300
  value = xmlGetNsProp(node, attr_name, ns->href);
919
1301
  } else {
920
- value = xmlGetProp(node, (xmlChar*)StringValueCStr(rattribute));
1302
+ value = xmlGetProp(node, (xmlChar *)StringValueCStr(rattribute));
921
1303
  }
922
1304
  } else {
923
1305
  value = xmlGetNoNsProp(node, attribute);
@@ -938,15 +1320,16 @@ static VALUE get(VALUE self, VALUE rattribute)
938
1320
  *
939
1321
  * Set the namespace to +namespace+
940
1322
  */
941
- static VALUE set_namespace(VALUE self, VALUE namespace)
1323
+ static VALUE
1324
+ set_namespace(VALUE self, VALUE namespace)
942
1325
  {
943
1326
  xmlNodePtr node;
944
1327
  xmlNsPtr ns = NULL;
945
1328
 
946
- Data_Get_Struct(self, xmlNode, node);
1329
+ Noko_Node_Get_Struct(self, xmlNode, node);
947
1330
 
948
- if(!NIL_P(namespace)) {
949
- Data_Get_Struct(namespace, xmlNs, ns);
1331
+ if (!NIL_P(namespace)) {
1332
+ Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
950
1333
  }
951
1334
 
952
1335
  xmlSetNs(node, ns);
@@ -955,138 +1338,140 @@ static VALUE set_namespace(VALUE self, VALUE namespace)
955
1338
  }
956
1339
 
957
1340
  /*
958
- * call-seq:
959
- * attribute(name)
1341
+ * :call-seq:
1342
+ * namespace() → Namespace
960
1343
  *
961
- * Get the attribute node with +name+
962
- */
963
- static VALUE attr(VALUE self, VALUE name)
964
- {
965
- xmlNodePtr node;
966
- xmlAttrPtr prop;
967
- Data_Get_Struct(self, xmlNode, node);
968
- prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name));
969
-
970
- if(! prop) { return Qnil; }
971
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop);
972
- }
973
-
974
- /*
975
- * call-seq:
976
- * attribute_with_ns(name, namespace)
1344
+ * [Returns] The Namespace of the element or attribute node, or +nil+ if there is no namespace.
977
1345
  *
978
- * Get the attribute node with +name+ and +namespace+
979
- */
980
- static VALUE attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
981
- {
982
- xmlNodePtr node;
983
- xmlAttrPtr prop;
984
- Data_Get_Struct(self, xmlNode, node);
985
- prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name),
986
- NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace));
987
-
988
- if(! prop) { return Qnil; }
989
- return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop);
990
- }
991
-
992
- /*
993
- * call-seq:
994
- * attribute_nodes()
1346
+ * *Example:*
995
1347
  *
996
- * returns a list containing the Node attributes.
1348
+ * doc = Nokogiri::XML(<<~EOF)
1349
+ * <root>
1350
+ * <first/>
1351
+ * <second xmlns="http://example.com/child"/>
1352
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1353
+ * </root>
1354
+ * EOF
1355
+ * doc.at_xpath("//first").namespace
1356
+ * # => nil
1357
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace
1358
+ * # => #(Namespace:0x3c { href = "http://example.com/child" })
1359
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace
1360
+ * # => #(Namespace:0x50 { prefix = "foo", href = "http://example.com/foo" })
997
1361
  */
998
- static VALUE attribute_nodes(VALUE self)
1362
+ static VALUE
1363
+ rb_xml_node_namespace(VALUE rb_node)
999
1364
  {
1000
- /* this code in the mode of xmlHasProp() */
1001
- xmlNodePtr node;
1002
- VALUE attr;
1003
-
1004
- Data_Get_Struct(self, xmlNode, node);
1365
+ xmlNodePtr c_node ;
1366
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1005
1367
 
1006
- attr = rb_ary_new();
1007
- Nokogiri_xml_node_properties(node, attr);
1368
+ if (c_node->ns) {
1369
+ return noko_xml_namespace_wrap(c_node->ns, c_node->doc);
1370
+ }
1008
1371
 
1009
- return attr ;
1372
+ return Qnil ;
1010
1373
  }
1011
1374
 
1012
-
1013
1375
  /*
1014
- * call-seq:
1015
- * namespace()
1376
+ * :call-seq:
1377
+ * namespace_definitions() → Array<Nokogiri::XML::Namespace>
1016
1378
  *
1017
- * returns the namespace of the element or attribute node as a Namespace
1018
- * object, or nil if there is no namespace for the element or attribute.
1019
- */
1020
- static VALUE namespace(VALUE self)
1021
- {
1022
- xmlNodePtr node ;
1023
- Data_Get_Struct(self, xmlNode, node);
1024
-
1025
- if (node->ns) {
1026
- return Nokogiri_wrap_xml_namespace(node->doc, node->ns);
1027
- }
1028
-
1029
- return Qnil ;
1030
- }
1031
-
1032
- /*
1033
- * call-seq:
1034
- * namespace_definitions()
1379
+ * [Returns]
1380
+ * Namespaces that are defined directly on this node, as an Array of Namespace objects. The array
1381
+ * will be empty if no namespaces are defined on this node.
1035
1382
  *
1036
- * returns namespaces defined on self element directly, as an array of Namespace objects. Includes both a default namespace (as in"xmlns="), and prefixed namespaces (as in "xmlns:prefix=").
1383
+ * *Example:*
1384
+ *
1385
+ * doc = Nokogiri::XML(<<~EOF)
1386
+ * <root xmlns="http://example.com/root">
1387
+ * <first/>
1388
+ * <second xmlns="http://example.com/child" xmlns:unused="http://example.com/unused"/>
1389
+ * <foo:third xmlns:foo="http://example.com/foo"/>
1390
+ * </root>
1391
+ * EOF
1392
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_definitions
1393
+ * # => []
1394
+ * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace_definitions
1395
+ * # => [#(Namespace:0x3c { href = "http://example.com/child" }),
1396
+ * # #(Namespace:0x50 {
1397
+ * # prefix = "unused",
1398
+ * # href = "http://example.com/unused"
1399
+ * # })]
1400
+ * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace_definitions
1401
+ * # => [#(Namespace:0x64 { prefix = "foo", href = "http://example.com/foo" })]
1037
1402
  */
1038
- static VALUE namespace_definitions(VALUE self)
1403
+ static VALUE
1404
+ namespace_definitions(VALUE rb_node)
1039
1405
  {
1040
1406
  /* this code in the mode of xmlHasProp() */
1041
- xmlNodePtr node ;
1042
- VALUE list;
1043
- xmlNsPtr ns;
1044
-
1045
- Data_Get_Struct(self, xmlNode, node);
1046
-
1047
- list = rb_ary_new();
1407
+ xmlNodePtr c_node ;
1408
+ xmlNsPtr c_namespace;
1409
+ VALUE definitions = rb_ary_new();
1048
1410
 
1049
- ns = node->nsDef;
1411
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1050
1412
 
1051
- if(!ns) { return list; }
1413
+ c_namespace = c_node->nsDef;
1414
+ if (!c_namespace) {
1415
+ return definitions;
1416
+ }
1052
1417
 
1053
- while(NULL != ns) {
1054
- rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns));
1055
- ns = ns->next;
1418
+ while (c_namespace != NULL) {
1419
+ rb_ary_push(definitions, noko_xml_namespace_wrap(c_namespace, c_node->doc));
1420
+ c_namespace = c_namespace->next;
1056
1421
  }
1057
1422
 
1058
- return list;
1423
+ return definitions;
1059
1424
  }
1060
1425
 
1061
1426
  /*
1062
- * call-seq:
1063
- * namespace_scopes()
1427
+ * :call-seq:
1428
+ * namespace_scopes() → Array<Nokogiri::XML::Namespace>
1429
+ *
1430
+ * [Returns] Array of all the Namespaces on this node and its ancestors.
1431
+ *
1432
+ * See also #namespaces
1064
1433
  *
1065
- * returns namespaces in scope for self -- those defined on self element
1066
- * directly or any ancestor node -- as an array of Namespace objects. Default
1067
- * namespaces ("xmlns=" style) for self are included in this array; Default
1068
- * namespaces for ancestors, however, are not. See also #namespaces
1434
+ * *Example:*
1435
+ *
1436
+ * doc = Nokogiri::XML(<<~EOF)
1437
+ * <root xmlns="http://example.com/root" xmlns:bar="http://example.com/bar">
1438
+ * <first/>
1439
+ * <second xmlns="http://example.com/child"/>
1440
+ * <third xmlns:foo="http://example.com/foo"/>
1441
+ * </root>
1442
+ * EOF
1443
+ * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes
1444
+ * # => [#(Namespace:0x3c { href = "http://example.com/root" }),
1445
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1446
+ * doc.at_xpath("//child:second", "child" => "http://example.com/child").namespace_scopes
1447
+ * # => [#(Namespace:0x64 { href = "http://example.com/child" }),
1448
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1449
+ * doc.at_xpath("//root:third", "root" => "http://example.com/root").namespace_scopes
1450
+ * # => [#(Namespace:0x78 { prefix = "foo", href = "http://example.com/foo" }),
1451
+ * # #(Namespace:0x3c { href = "http://example.com/root" }),
1452
+ * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
1069
1453
  */
1070
- static VALUE namespace_scopes(VALUE self)
1454
+ static VALUE
1455
+ rb_xml_node_namespace_scopes(VALUE rb_node)
1071
1456
  {
1072
- xmlNodePtr node ;
1073
- VALUE list;
1074
- xmlNsPtr *ns_list;
1457
+ xmlNodePtr c_node ;
1458
+ xmlNsPtr *namespaces;
1459
+ VALUE scopes = rb_ary_new();
1075
1460
  int j;
1076
1461
 
1077
- Data_Get_Struct(self, xmlNode, node);
1462
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1078
1463
 
1079
- list = rb_ary_new();
1080
- ns_list = xmlGetNsList(node->doc, node);
1081
-
1082
- if(!ns_list) { return list; }
1464
+ namespaces = xmlGetNsList(c_node->doc, c_node);
1465
+ if (!namespaces) {
1466
+ return scopes;
1467
+ }
1083
1468
 
1084
- for (j = 0 ; ns_list[j] != NULL ; ++j) {
1085
- rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns_list[j]));
1469
+ for (j = 0 ; namespaces[j] != NULL ; ++j) {
1470
+ rb_ary_push(scopes, noko_xml_namespace_wrap(namespaces[j], c_node->doc));
1086
1471
  }
1087
1472
 
1088
- xmlFree(ns_list);
1089
- return list;
1473
+ xmlFree(namespaces);
1474
+ return scopes;
1090
1475
  }
1091
1476
 
1092
1477
  /*
@@ -1095,29 +1480,66 @@ static VALUE namespace_scopes(VALUE self)
1095
1480
  *
1096
1481
  * Get the type for this Node
1097
1482
  */
1098
- static VALUE node_type(VALUE self)
1483
+ static VALUE
1484
+ node_type(VALUE self)
1099
1485
  {
1100
1486
  xmlNodePtr node;
1101
- Data_Get_Struct(self, xmlNode, node);
1102
- return INT2NUM((long)node->type);
1487
+ Noko_Node_Get_Struct(self, xmlNode, node);
1488
+ return INT2NUM(node->type);
1103
1489
  }
1104
1490
 
1105
1491
  /*
1106
1492
  * call-seq:
1107
- * content=
1493
+ * native_content=(input)
1494
+ *
1495
+ * Set the content of this node to +input+.
1496
+ *
1497
+ * [Parameters]
1498
+ * - +input+ (String) The new content for this node.
1499
+ *
1500
+ * ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
1501
+ * ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
1502
+ * will contain the entity-escaped version of the input (see example below). For Element and Attr
1503
+ * nodes, it treats the input as parsed content and expects it to be valid markup that is already
1504
+ * entity-escaped.
1505
+ *
1506
+ * 💡 Use Node#content= for a more consistent API across node types.
1507
+ *
1508
+ * [Example]
1509
+ * Note the behavior differences of this method between Text and Element nodes:
1510
+ *
1511
+ * doc = Nokogiri::HTML::Document.parse(<<~HTML)
1512
+ * <html>
1513
+ * <body>
1514
+ * <div id="first">asdf</div>
1515
+ * <div id="second">asdf</div>
1516
+ * HTML
1517
+ *
1518
+ * text_node = doc.at_css("div#first").children.first
1519
+ * div_node = doc.at_css("div#second")
1520
+ *
1521
+ * value = "You &amp; Me"
1108
1522
  *
1109
- * Set the content for this Node
1523
+ * text_node.native_content = value
1524
+ * div_node.native_content = value
1525
+ *
1526
+ * doc.css("div").to_html
1527
+ * # => "<div id=\"first\">You &amp;amp; Me</div>
1528
+ * # <div id=\"second\">You &amp; Me</div>"
1529
+ *
1530
+ * See also: #content=
1110
1531
  */
1111
- static VALUE set_native_content(VALUE self, VALUE content)
1532
+ static VALUE
1533
+ set_native_content(VALUE self, VALUE content)
1112
1534
  {
1113
1535
  xmlNodePtr node, child, next ;
1114
- Data_Get_Struct(self, xmlNode, node);
1536
+ Noko_Node_Get_Struct(self, xmlNode, node);
1115
1537
 
1116
1538
  child = node->children;
1117
1539
  while (NULL != child) {
1118
1540
  next = child->next ;
1119
1541
  xmlUnlinkNode(child) ;
1120
- nokogiri_root_node(child);
1542
+ noko_xml_document_pin_node(child);
1121
1543
  child = next ;
1122
1544
  }
1123
1545
 
@@ -1125,42 +1547,20 @@ static VALUE set_native_content(VALUE self, VALUE content)
1125
1547
  return content;
1126
1548
  }
1127
1549
 
1128
- /*
1129
- * call-seq:
1130
- * content
1131
- *
1132
- * Returns the plaintext content for this Node. Note that entities will always
1133
- * be expanded in the returned string.
1134
- */
1135
- static VALUE get_native_content(VALUE self)
1136
- {
1137
- xmlNodePtr node;
1138
- xmlChar * content;
1139
-
1140
- Data_Get_Struct(self, xmlNode, node);
1141
-
1142
- content = xmlNodeGetContent(node);
1143
- if(content) {
1144
- VALUE rval = NOKOGIRI_STR_NEW2(content);
1145
- xmlFree(content);
1146
- return rval;
1147
- }
1148
- return Qnil;
1149
- }
1150
-
1151
1550
  /*
1152
1551
  * call-seq:
1153
1552
  * lang=
1154
1553
  *
1155
1554
  * Set the language of a node, i.e. the values of the xml:lang attribute.
1156
1555
  */
1157
- static VALUE set_lang(VALUE self_rb, VALUE lang_rb)
1556
+ static VALUE
1557
+ set_lang(VALUE self_rb, VALUE lang_rb)
1158
1558
  {
1159
1559
  xmlNodePtr self ;
1160
- xmlChar* lang ;
1560
+ xmlChar *lang ;
1161
1561
 
1162
- Data_Get_Struct(self_rb, xmlNode, self);
1163
- lang = (xmlChar*)StringValueCStr(lang_rb);
1562
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1563
+ lang = (xmlChar *)StringValueCStr(lang_rb);
1164
1564
 
1165
1565
  xmlNodeSetLang(self, lang);
1166
1566
 
@@ -1174,13 +1574,14 @@ static VALUE set_lang(VALUE self_rb, VALUE lang_rb)
1174
1574
  * Searches the language of a node, i.e. the values of the xml:lang attribute or
1175
1575
  * the one carried by the nearest ancestor.
1176
1576
  */
1177
- static VALUE get_lang(VALUE self_rb)
1577
+ static VALUE
1578
+ get_lang(VALUE self_rb)
1178
1579
  {
1179
1580
  xmlNodePtr self ;
1180
- xmlChar* lang ;
1581
+ xmlChar *lang ;
1181
1582
  VALUE lang_rb ;
1182
1583
 
1183
- Data_Get_Struct(self_rb, xmlNode, self);
1584
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1184
1585
 
1185
1586
  lang = xmlNodeGetLang(self);
1186
1587
  if (lang) {
@@ -1193,7 +1594,8 @@ static VALUE get_lang(VALUE self_rb)
1193
1594
  }
1194
1595
 
1195
1596
  /* :nodoc: */
1196
- static VALUE add_child(VALUE self, VALUE new_child)
1597
+ static VALUE
1598
+ add_child(VALUE self, VALUE new_child)
1197
1599
  {
1198
1600
  return reparent_node_with(self, new_child, xmlAddChild);
1199
1601
  }
@@ -1204,15 +1606,16 @@ static VALUE add_child(VALUE self, VALUE new_child)
1204
1606
  *
1205
1607
  * Get the parent Node for this Node
1206
1608
  */
1207
- static VALUE get_parent(VALUE self)
1609
+ static VALUE
1610
+ get_parent(VALUE self)
1208
1611
  {
1209
1612
  xmlNodePtr node, parent;
1210
- Data_Get_Struct(self, xmlNode, node);
1613
+ Noko_Node_Get_Struct(self, xmlNode, node);
1211
1614
 
1212
1615
  parent = node->parent;
1213
- if(!parent) { return Qnil; }
1616
+ if (!parent) { return Qnil; }
1214
1617
 
1215
- return Nokogiri_wrap_xml_node(Qnil, parent) ;
1618
+ return noko_xml_node_wrap(Qnil, parent) ;
1216
1619
  }
1217
1620
 
1218
1621
  /*
@@ -1221,11 +1624,12 @@ static VALUE get_parent(VALUE self)
1221
1624
  *
1222
1625
  * Set the name for this Node
1223
1626
  */
1224
- static VALUE set_name(VALUE self, VALUE new_name)
1627
+ static VALUE
1628
+ set_name(VALUE self, VALUE new_name)
1225
1629
  {
1226
1630
  xmlNodePtr node;
1227
- Data_Get_Struct(self, xmlNode, node);
1228
- xmlNodeSetName(node, (xmlChar*)StringValueCStr(new_name));
1631
+ Noko_Node_Get_Struct(self, xmlNode, node);
1632
+ xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name));
1229
1633
  return new_name;
1230
1634
  }
1231
1635
 
@@ -1235,11 +1639,12 @@ static VALUE set_name(VALUE self, VALUE new_name)
1235
1639
  *
1236
1640
  * Returns the name for this Node
1237
1641
  */
1238
- static VALUE get_name(VALUE self)
1642
+ static VALUE
1643
+ get_name(VALUE self)
1239
1644
  {
1240
1645
  xmlNodePtr node;
1241
- Data_Get_Struct(self, xmlNode, node);
1242
- if(node->name) {
1646
+ Noko_Node_Get_Struct(self, xmlNode, node);
1647
+ if (node->name) {
1243
1648
  return NOKOGIRI_STR_NEW2(node->name);
1244
1649
  }
1245
1650
  return Qnil;
@@ -1251,28 +1656,39 @@ static VALUE get_name(VALUE self)
1251
1656
  *
1252
1657
  * Returns the path associated with this Node
1253
1658
  */
1254
- static VALUE path(VALUE self)
1659
+ static VALUE
1660
+ rb_xml_node_path(VALUE rb_node)
1255
1661
  {
1256
- xmlNodePtr node;
1257
- xmlChar *path ;
1662
+ xmlNodePtr c_node;
1663
+ xmlChar *c_path ;
1258
1664
  VALUE rval;
1259
1665
 
1260
- Data_Get_Struct(self, xmlNode, node);
1666
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1667
+
1668
+ c_path = xmlGetNodePath(c_node);
1669
+ if (c_path == NULL) {
1670
+ // see https://github.com/sparklemotion/nokogiri/issues/2250
1671
+ // this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we
1672
+ // do this for now to preserve the behavior across libxml2 versions.
1673
+ rval = NOKOGIRI_STR_NEW2("?");
1674
+ } else {
1675
+ rval = NOKOGIRI_STR_NEW2(c_path);
1676
+ xmlFree(c_path);
1677
+ }
1261
1678
 
1262
- path = xmlGetNodePath(node);
1263
- rval = NOKOGIRI_STR_NEW2(path);
1264
- xmlFree(path);
1265
1679
  return rval ;
1266
1680
  }
1267
1681
 
1268
1682
  /* :nodoc: */
1269
- static VALUE add_next_sibling(VALUE self, VALUE new_sibling)
1683
+ static VALUE
1684
+ add_next_sibling(VALUE self, VALUE new_sibling)
1270
1685
  {
1271
1686
  return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
1272
1687
  }
1273
1688
 
1274
1689
  /* :nodoc: */
1275
- static VALUE add_previous_sibling(VALUE self, VALUE new_sibling)
1690
+ static VALUE
1691
+ add_previous_sibling(VALUE self, VALUE new_sibling)
1276
1692
  {
1277
1693
  return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
1278
1694
  }
@@ -1283,7 +1699,8 @@ static VALUE add_previous_sibling(VALUE self, VALUE new_sibling)
1283
1699
  *
1284
1700
  * Write this Node to +io+ with +encoding+ and +options+
1285
1701
  */
1286
- static VALUE native_write_to(
1702
+ static VALUE
1703
+ native_write_to(
1287
1704
  VALUE self,
1288
1705
  VALUE io,
1289
1706
  VALUE encoding,
@@ -1292,10 +1709,10 @@ static VALUE native_write_to(
1292
1709
  )
1293
1710
  {
1294
1711
  xmlNodePtr node;
1295
- const char * before_indent;
1712
+ const char *before_indent;
1296
1713
  xmlSaveCtxtPtr savectx;
1297
1714
 
1298
- Data_Get_Struct(self, xmlNode, node);
1715
+ Noko_Node_Get_Struct(self, xmlNode, node);
1299
1716
 
1300
1717
  xmlIndentTreeOutput = 1;
1301
1718
 
@@ -1304,8 +1721,8 @@ static VALUE native_write_to(
1304
1721
  xmlTreeIndentString = StringValueCStr(indent_string);
1305
1722
 
1306
1723
  savectx = xmlSaveToIO(
1307
- (xmlOutputWriteCallback)io_write_callback,
1308
- (xmlOutputCloseCallback)io_close_callback,
1724
+ (xmlOutputWriteCallback)noko_io_write,
1725
+ (xmlOutputCloseCallback)noko_io_close,
1309
1726
  (void *)io,
1310
1727
  RTEST(encoding) ? StringValueCStr(encoding) : NULL,
1311
1728
  (int)NUM2INT(options)
@@ -1318,93 +1735,377 @@ static VALUE native_write_to(
1318
1735
  return io;
1319
1736
  }
1320
1737
 
1321
- /*
1322
- * call-seq:
1323
- * line
1324
- *
1325
- * Returns the line for this Node
1326
- */
1327
- static VALUE line(VALUE self)
1738
+
1739
+ static inline void
1740
+ output_partial_string(VALUE out, char const *str, size_t length)
1741
+ {
1742
+ if (length) {
1743
+ rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
1744
+ }
1745
+ }
1746
+
1747
+ static inline void
1748
+ output_char(VALUE out, char ch)
1749
+ {
1750
+ output_partial_string(out, &ch, 1);
1751
+ }
1752
+
1753
+ static inline void
1754
+ output_string(VALUE out, char const *str)
1755
+ {
1756
+ output_partial_string(out, str, strlen(str));
1757
+ }
1758
+
1759
+ static inline void
1760
+ output_tagname(VALUE out, xmlNodePtr elem)
1761
+ {
1762
+ // Elements in the HTML, MathML, and SVG namespaces do not use a namespace
1763
+ // prefix in the HTML syntax.
1764
+ char const *name = (char const *)elem->name;
1765
+ xmlNsPtr ns = elem->ns;
1766
+ if (ns && ns->href && ns->prefix
1767
+ && strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
1768
+ && strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
1769
+ && strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
1770
+ output_string(out, (char const *)elem->ns->prefix);
1771
+ output_char(out, ':');
1772
+ char const *colon = strchr(name, ':');
1773
+ if (colon) {
1774
+ name = colon + 1;
1775
+ }
1776
+ }
1777
+ output_string(out, name);
1778
+ }
1779
+
1780
+ static inline void
1781
+ output_attr_name(VALUE out, xmlAttrPtr attr)
1782
+ {
1783
+ xmlNsPtr ns = attr->ns;
1784
+ char const *name = (char const *)attr->name;
1785
+ if (ns && ns->href) {
1786
+ char const *uri = (char const *)ns->href;
1787
+ char const *localname = strchr(name, ':');
1788
+ if (localname) {
1789
+ ++localname;
1790
+ } else {
1791
+ localname = name;
1792
+ }
1793
+
1794
+ if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
1795
+ output_string(out, "xml:");
1796
+ name = localname;
1797
+ } else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
1798
+ // xmlns:xmlns -> xmlns
1799
+ // xmlns:foo -> xmlns:foo
1800
+ if (strcmp(localname, "xmlns")) {
1801
+ output_string(out, "xmlns:");
1802
+ }
1803
+ name = localname;
1804
+ } else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
1805
+ output_string(out, "xlink:");
1806
+ name = localname;
1807
+ } else if (ns->prefix) {
1808
+ output_string(out, (char const *)ns->prefix);
1809
+ output_char(out, ':');
1810
+ name = localname;
1811
+ }
1812
+ }
1813
+ output_string(out, name);
1814
+ }
1815
+
1816
+ static void
1817
+ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1818
+ {
1819
+ xmlChar const *next = start;
1820
+ int ch;
1821
+
1822
+ while ((ch = *next) != 0) {
1823
+ char const *replacement = NULL;
1824
+ size_t replaced_bytes = 1;
1825
+ if (ch == '&') {
1826
+ replacement = "&amp;";
1827
+ } else if (ch == 0xC2 && next[1] == 0xA0) {
1828
+ // U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
1829
+ replacement = "&nbsp;";
1830
+ replaced_bytes = 2;
1831
+ } else if (attr && ch == '"') {
1832
+ replacement = "&quot;";
1833
+ } else if (!attr && ch == '<') {
1834
+ replacement = "&lt;";
1835
+ } else if (!attr && ch == '>') {
1836
+ replacement = "&gt;";
1837
+ } else {
1838
+ ++next;
1839
+ continue;
1840
+ }
1841
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1842
+ output_string(out, replacement);
1843
+ next += replaced_bytes;
1844
+ start = next;
1845
+ }
1846
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1847
+ }
1848
+
1849
+ static bool
1850
+ should_prepend_newline(xmlNodePtr node)
1851
+ {
1852
+ char const *name = (char const *)node->name;
1853
+ xmlNodePtr child = node->children;
1854
+
1855
+ if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
1856
+ return false;
1857
+ }
1858
+
1859
+ return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
1860
+ }
1861
+
1862
+ static VALUE
1863
+ rb_prepend_newline(VALUE self)
1328
1864
  {
1329
1865
  xmlNodePtr node;
1330
- Data_Get_Struct(self, xmlNode, node);
1866
+ Noko_Node_Get_Struct(self, xmlNode, node);
1867
+ return should_prepend_newline(node) ? Qtrue : Qfalse;
1868
+ }
1869
+
1870
+ static bool
1871
+ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1872
+ {
1873
+ char const *name = (char const *)node->name;
1874
+ if (name == NULL) { // fragments don't have a name
1875
+ return false;
1876
+ }
1877
+
1878
+ if (node->ns != NULL) {
1879
+ // if the node has a namespace, it's in a foreign context and is not one of the HTML tags we're
1880
+ // matching against.
1881
+ return false;
1882
+ }
1331
1883
 
1332
- return INT2NUM(xmlGetLineNo(node));
1884
+ for (size_t idx = 0; idx < num_tagnames; ++idx) {
1885
+ if (!strcmp(name, tagnames[idx])) {
1886
+ return true;
1887
+ }
1888
+ }
1889
+ return false;
1333
1890
  }
1334
1891
 
1335
- /*
1336
- * call-seq:
1337
- * add_namespace_definition(prefix, href)
1338
- *
1339
- * Adds a namespace definition with +prefix+ using +href+ value. The result is
1340
- * as if parsed XML for this node had included an attribute
1341
- * 'xmlns:prefix=value'. A default namespace for this node ("xmlns=") can be
1342
- * added by passing 'nil' for prefix. Namespaces added this way will not
1343
- * show up in #attributes, but they will be included as an xmlns attribute
1344
- * when the node is serialized to XML.
1345
- */
1346
- static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
1892
+ static void
1893
+ output_node(
1894
+ VALUE out,
1895
+ xmlNodePtr node,
1896
+ bool preserve_newline
1897
+ )
1347
1898
  {
1348
- xmlNodePtr node, namespace;
1349
- xmlNsPtr ns;
1899
+ static char const *const VOID_ELEMENTS[] = {
1900
+ "area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
1901
+ "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
1902
+ };
1903
+
1904
+ static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
1905
+ "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
1906
+ };
1350
1907
 
1351
- Data_Get_Struct(self, xmlNode, node);
1352
- namespace = node ;
1908
+ switch (node->type) {
1909
+ case XML_ELEMENT_NODE:
1910
+ // Serialize the start tag.
1911
+ output_char(out, '<');
1912
+ output_tagname(out, node);
1913
+
1914
+ // Add attributes.
1915
+ for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1916
+ output_char(out, ' ');
1917
+ output_node(out, (xmlNodePtr)attr, preserve_newline);
1918
+ }
1919
+ output_char(out, '>');
1353
1920
 
1354
- ns = xmlSearchNs(
1355
- node->doc,
1356
- node,
1357
- (const xmlChar *)(NIL_P(prefix) ? NULL : StringValueCStr(prefix))
1358
- );
1921
+ // Add children and end tag if element is not void.
1922
+ if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
1923
+ if (preserve_newline && should_prepend_newline(node)) {
1924
+ output_char(out, '\n');
1925
+ }
1926
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1927
+ output_node(out, child, preserve_newline);
1928
+ }
1929
+ output_string(out, "</");
1930
+ output_tagname(out, node);
1931
+ output_char(out, '>');
1932
+ }
1933
+ break;
1359
1934
 
1360
- if(!ns) {
1361
- if (node->type != XML_ELEMENT_NODE) {
1362
- namespace = node->parent;
1935
+ case XML_ATTRIBUTE_NODE: {
1936
+ xmlAttrPtr attr = (xmlAttrPtr)node;
1937
+ output_attr_name(out, attr);
1938
+ if (attr->children) {
1939
+ output_string(out, "=\"");
1940
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1941
+ output_escaped_string(out, value, true);
1942
+ xmlFree(value);
1943
+ output_char(out, '"');
1944
+ } else {
1945
+ // Output name=""
1946
+ output_string(out, "=\"\"");
1947
+ }
1363
1948
  }
1364
- ns = xmlNewNs(
1365
- namespace,
1366
- (const xmlChar *)StringValueCStr(href),
1367
- (const xmlChar *)(NIL_P(prefix) ? NULL : StringValueCStr(prefix))
1368
- );
1949
+ break;
1950
+
1951
+ case XML_TEXT_NODE:
1952
+ if (node->parent
1953
+ && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
1954
+ sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
1955
+ output_string(out, (char const *)node->content);
1956
+ } else {
1957
+ output_escaped_string(out, node->content, false);
1958
+ }
1959
+ break;
1960
+
1961
+ case XML_CDATA_SECTION_NODE:
1962
+ output_string(out, "<![CDATA[");
1963
+ output_string(out, (char const *)node->content);
1964
+ output_string(out, "]]>");
1965
+ break;
1966
+
1967
+ case XML_COMMENT_NODE:
1968
+ output_string(out, "<!--");
1969
+ output_string(out, (char const *)node->content);
1970
+ output_string(out, "-->");
1971
+ break;
1972
+
1973
+ case XML_PI_NODE:
1974
+ output_string(out, "<?");
1975
+ output_string(out, (char const *)node->content);
1976
+ output_char(out, '>');
1977
+ break;
1978
+
1979
+ case XML_DOCUMENT_TYPE_NODE:
1980
+ case XML_DTD_NODE:
1981
+ output_string(out, "<!DOCTYPE ");
1982
+ output_string(out, (char const *)node->name);
1983
+ output_string(out, ">");
1984
+ break;
1985
+
1986
+ case XML_DOCUMENT_NODE:
1987
+ case XML_DOCUMENT_FRAG_NODE:
1988
+ case XML_HTML_DOCUMENT_NODE:
1989
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1990
+ output_node(out, child, preserve_newline);
1991
+ }
1992
+ break;
1993
+
1994
+ default:
1995
+ rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
1996
+ break;
1369
1997
  }
1998
+ }
1370
1999
 
1371
- if (!ns) { return Qnil ; }
2000
+ static VALUE
2001
+ html_standard_serialize(
2002
+ VALUE self,
2003
+ VALUE preserve_newline
2004
+ )
2005
+ {
2006
+ xmlNodePtr node;
2007
+ Noko_Node_Get_Struct(self, xmlNode, node);
2008
+ VALUE output = rb_str_buf_new(4096);
2009
+ output_node(output, node, RTEST(preserve_newline));
2010
+ return output;
2011
+ }
1372
2012
 
1373
- if(NIL_P(prefix) || node != namespace) { xmlSetNs(node, ns); }
2013
+ /*
2014
+ * :call-seq:
2015
+ * line() → Integer
2016
+ *
2017
+ * [Returns] The line number of this Node.
2018
+ *
2019
+ * ---
2020
+ *
2021
+ * <b> ⚠ The CRuby and JRuby implementations differ in important ways! </b>
2022
+ *
2023
+ * Semantic differences:
2024
+ * - The CRuby method reflects the node's line number <i>in the parsed string</i>
2025
+ * - The JRuby method reflects the node's line number <i>in the final DOM structure</i> after
2026
+ * corrections have been applied
2027
+ *
2028
+ * Performance differences:
2029
+ * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time]
2030
+ * (constant time)
2031
+ * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear
2032
+ * time, where n is the number of nodes before/above the element in the DOM)
2033
+ *
2034
+ * If you'd like to help improve the JRuby implementation, please review these issues and reach out
2035
+ * to the maintainers:
2036
+ * - https://github.com/sparklemotion/nokogiri/issues/1223
2037
+ * - https://github.com/sparklemotion/nokogiri/pull/2177
2038
+ * - https://github.com/sparklemotion/nokogiri/issues/2380
2039
+ */
2040
+ static VALUE
2041
+ rb_xml_node_line(VALUE rb_node)
2042
+ {
2043
+ xmlNodePtr c_node;
2044
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1374
2045
 
1375
- return Nokogiri_wrap_xml_namespace(node->doc, ns);
2046
+ return LONG2NUM(xmlGetLineNo(c_node));
1376
2047
  }
1377
2048
 
1378
2049
  /*
1379
2050
  * call-seq:
1380
- * new(name, document)
2051
+ * line=(num)
1381
2052
  *
1382
- * Create a new node with +name+ sharing GC lifecycle with +document+
2053
+ * Sets the line for this Node. num must be less than 65535.
1383
2054
  */
1384
- static VALUE new(int argc, VALUE *argv, VALUE klass)
2055
+ static VALUE
2056
+ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
1385
2057
  {
1386
- xmlDocPtr doc;
1387
- xmlNodePtr node;
1388
- VALUE name;
1389
- VALUE document;
2058
+ xmlNodePtr c_node;
2059
+ int line_number = NUM2INT(rb_line_number);
2060
+
2061
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2062
+
2063
+ // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2064
+ // search for "psvi" in SAX2.c and tree.c to learn more.
2065
+ if (line_number < 65535) {
2066
+ c_node->line = (short unsigned)line_number;
2067
+ } else {
2068
+ c_node->line = 65535;
2069
+ if (c_node->type == XML_TEXT_NODE) {
2070
+ c_node->psvi = (void *)(ptrdiff_t)line_number;
2071
+ }
2072
+ }
2073
+
2074
+ return rb_line_number;
2075
+ }
2076
+
2077
+ /* :nodoc: documented in lib/nokogiri/xml/node.rb */
2078
+ static VALUE
2079
+ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
2080
+ {
2081
+ xmlNodePtr c_document_node;
2082
+ xmlNodePtr c_node;
2083
+ VALUE rb_name;
2084
+ VALUE rb_document_node;
1390
2085
  VALUE rest;
1391
2086
  VALUE rb_node;
1392
2087
 
1393
- rb_scan_args(argc, argv, "2*", &name, &document, &rest);
2088
+ rb_scan_args(argc, argv, "2*", &rb_name, &rb_document_node, &rest);
1394
2089
 
1395
- Data_Get_Struct(document, xmlDoc, doc);
2090
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlNode)) {
2091
+ rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
2092
+ }
2093
+ if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
2094
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
2095
+ }
2096
+ Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
1396
2097
 
1397
- node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(name));
1398
- node->doc = doc->doc;
1399
- nokogiri_root_node(node);
2098
+ c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name));
2099
+ c_node->doc = c_document_node->doc;
2100
+ noko_xml_document_pin_node(c_node);
1400
2101
 
1401
- rb_node = Nokogiri_wrap_xml_node(
2102
+ rb_node = noko_xml_node_wrap(
1402
2103
  klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
1403
- node
2104
+ c_node
1404
2105
  );
1405
2106
  rb_obj_call_init(rb_node, argc, argv);
1406
2107
 
1407
- if(rb_block_given_p()) { rb_yield(rb_node); }
2108
+ if (rb_block_given_p()) { rb_yield(rb_node); }
1408
2109
 
1409
2110
  return rb_node;
1410
2111
  }
@@ -1415,17 +2116,18 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
1415
2116
  *
1416
2117
  * Returns the Node as html.
1417
2118
  */
1418
- static VALUE dump_html(VALUE self)
2119
+ static VALUE
2120
+ dump_html(VALUE self)
1419
2121
  {
1420
2122
  xmlBufferPtr buf ;
1421
2123
  xmlNodePtr node ;
1422
2124
  VALUE html;
1423
2125
 
1424
- Data_Get_Struct(self, xmlNode, node);
2126
+ Noko_Node_Get_Struct(self, xmlNode, node);
1425
2127
 
1426
2128
  buf = xmlBufferCreate() ;
1427
2129
  htmlNodeDump(buf, node->doc, node);
1428
- html = NOKOGIRI_STR_NEW2(buf->content);
2130
+ html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
1429
2131
  xmlBufferFree(buf);
1430
2132
  return html ;
1431
2133
  }
@@ -1436,52 +2138,57 @@ static VALUE dump_html(VALUE self)
1436
2138
  *
1437
2139
  * Compare this Node to +other+ with respect to their Document
1438
2140
  */
1439
- static VALUE compare(VALUE self, VALUE _other)
2141
+ static VALUE
2142
+ compare(VALUE self, VALUE _other)
1440
2143
  {
1441
2144
  xmlNodePtr node, other;
1442
- Data_Get_Struct(self, xmlNode, node);
1443
- Data_Get_Struct(_other, xmlNode, other);
2145
+ Noko_Node_Get_Struct(self, xmlNode, node);
2146
+ Noko_Node_Get_Struct(_other, xmlNode, other);
1444
2147
 
1445
- return INT2NUM((long)xmlXPathCmpNodes(other, node));
2148
+ return INT2NUM(xmlXPathCmpNodes(other, node));
1446
2149
  }
1447
2150
 
1448
2151
 
1449
2152
  /*
1450
2153
  * call-seq:
1451
- * process_xincludes(options)
2154
+ * process_xincludes(flags)
1452
2155
  *
1453
2156
  * Loads and substitutes all xinclude elements below the node. The
1454
- * parser context will be initialized with +options+.
2157
+ * parser context will be initialized with +flags+.
1455
2158
  */
1456
- static VALUE process_xincludes(VALUE self, VALUE options)
2159
+ static VALUE
2160
+ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
1457
2161
  {
1458
- int rcode ;
1459
- xmlNodePtr node;
1460
- VALUE error_list = rb_ary_new();
2162
+ int status ;
2163
+ xmlNodePtr c_node;
2164
+ VALUE rb_errors = rb_ary_new();
2165
+ libxmlStructuredErrorHandlerState handler_state;
1461
2166
 
1462
- Data_Get_Struct(self, xmlNode, node);
2167
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1463
2168
 
1464
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
1465
- rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
1466
- xmlSetStructuredErrorFunc(NULL, NULL);
2169
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
1467
2170
 
1468
- if (rcode < 0) {
1469
- xmlErrorPtr error;
2171
+ status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
1470
2172
 
1471
- error = xmlGetLastError();
1472
- if(error) {
1473
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
2173
+ noko__structured_error_func_restore(&handler_state);
2174
+
2175
+ if (status < 0) {
2176
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
2177
+
2178
+ if (RB_TEST(exception)) {
2179
+ rb_exc_raise(exception);
1474
2180
  } else {
1475
2181
  rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
1476
2182
  }
1477
2183
  }
1478
2184
 
1479
- return self;
2185
+ return rb_node;
1480
2186
  }
1481
2187
 
1482
2188
 
1483
2189
  /* TODO: DOCUMENT ME */
1484
- static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
2190
+ static VALUE
2191
+ in_context(VALUE self, VALUE _str, VALUE _options)
1485
2192
  {
1486
2193
  xmlNodePtr node, list = 0, tmp, child_iter, node_children, doc_children;
1487
2194
  xmlNodeSetPtr set;
@@ -1489,7 +2196,7 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1489
2196
  VALUE doc, err;
1490
2197
  int doc_is_empty;
1491
2198
 
1492
- Data_Get_Struct(self, xmlNode, node);
2199
+ Noko_Node_Get_Struct(self, xmlNode, node);
1493
2200
 
1494
2201
  doc = DOC_RUBY_OBJECT(node->doc);
1495
2202
  err = rb_iv_get(doc, "@errors");
@@ -1497,14 +2204,7 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1497
2204
  node_children = node->children;
1498
2205
  doc_children = node->doc->children;
1499
2206
 
1500
- xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
1501
-
1502
- /* Twiddle global variable because of a bug in libxml2.
1503
- * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
1504
- */
1505
- #ifndef HTML_PARSE_NOIMPLIED
1506
- htmlHandleOmittedElem(0);
1507
- #endif
2207
+ xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
1508
2208
 
1509
2209
  /* This function adds a fake node to the child of +node+. If the parser
1510
2210
  * does not exit cleanly with XML_ERR_OK, the list is freed. This can
@@ -1530,24 +2230,23 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1530
2230
  */
1531
2231
  child_iter = node->doc->children ;
1532
2232
  while (child_iter) {
1533
- if (child_iter->parent != (xmlNodePtr)node->doc) {
1534
- child_iter->parent = (xmlNodePtr)node->doc;
1535
- }
2233
+ child_iter->parent = (xmlNodePtr)node->doc;
1536
2234
  child_iter = child_iter->next;
1537
2235
  }
1538
2236
 
1539
- #ifndef HTML_PARSE_NOIMPLIED
1540
- htmlHandleOmittedElem(1);
1541
- #endif
1542
-
1543
2237
  xmlSetStructuredErrorFunc(NULL, NULL);
1544
2238
 
1545
- /* Workaround for a libxml2 bug where a parsing error may leave a broken
2239
+ /*
2240
+ * Workaround for a libxml2 bug where a parsing error may leave a broken
1546
2241
  * node reference in node->doc->children.
2242
+ *
2243
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2244
+ *
1547
2245
  * This workaround is limited to when a parse error occurs, the document
1548
2246
  * went from having no children to having children, and the context node is
1549
2247
  * part of a document fragment.
1550
- * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2248
+ *
2249
+ * TODO: This was fixed in libxml 2.8.0 by 71a243d
1551
2250
  */
1552
2251
  if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
1553
2252
  child_iter = node;
@@ -1562,12 +2261,12 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1562
2261
 
1563
2262
  /* FIXME: This probably needs to handle more constants... */
1564
2263
  switch (error) {
1565
- case XML_ERR_INTERNAL_ERROR:
1566
- case XML_ERR_NO_MEMORY:
1567
- rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
1568
- break;
1569
- default:
1570
- break;
2264
+ case XML_ERR_INTERNAL_ERROR:
2265
+ case XML_ERR_NO_MEMORY:
2266
+ rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
2267
+ break;
2268
+ default:
2269
+ break;
1571
2270
  }
1572
2271
 
1573
2272
  set = xmlXPathNodeSetCreate(NULL);
@@ -1576,178 +2275,185 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1576
2275
  tmp = list->next;
1577
2276
  list->next = NULL;
1578
2277
  xmlXPathNodeSetAddUnique(set, list);
1579
- nokogiri_root_node(list);
2278
+ noko_xml_document_pin_node(list);
1580
2279
  list = tmp;
1581
2280
  }
1582
2281
 
1583
- return Nokogiri_wrap_xml_node_set(set, doc);
2282
+ return noko_xml_node_set_wrap(set, doc);
1584
2283
  }
1585
2284
 
2285
+ /* :nodoc: */
2286
+ VALUE
2287
+ rb_xml_node_data_ptr_eh(VALUE self)
2288
+ {
2289
+ xmlNodePtr c_node;
2290
+ Noko_Node_Get_Struct(self, xmlNode, c_node);
2291
+ return c_node ? Qtrue : Qfalse;
2292
+ }
1586
2293
 
1587
- VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
2294
+ VALUE
2295
+ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
1588
2296
  {
1589
- VALUE document = Qnil ;
1590
- VALUE node_cache = Qnil ;
1591
- VALUE rb_node = Qnil ;
2297
+ VALUE rb_document, rb_node_cache, rb_node;
1592
2298
  nokogiriTuplePtr node_has_a_document;
1593
- xmlDocPtr doc;
1594
- void (*mark_method)(xmlNodePtr) = NULL ;
2299
+ xmlDocPtr c_doc;
1595
2300
 
1596
- assert(node);
2301
+ assert(c_node);
1597
2302
 
1598
- if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE) {
1599
- return DOC_RUBY_OBJECT(node->doc);
2303
+ if (c_node->type == XML_DOCUMENT_NODE || c_node->type == XML_HTML_DOCUMENT_NODE) {
2304
+ return DOC_RUBY_OBJECT(c_node->doc);
1600
2305
  }
1601
2306
 
1602
- /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
1603
- /* see https://github.com/sparklemotion/nokogiri/issues/95 */
1604
- /* and https://github.com/sparklemotion/nokogiri/issues/439 */
1605
- doc = node->doc;
1606
- if (doc->type == XML_DOCUMENT_FRAG_NODE) { doc = doc->doc; }
1607
- node_has_a_document = DOC_RUBY_OBJECT_TEST(doc);
2307
+ c_doc = c_node->doc;
2308
+
2309
+ // Nodes yielded from XML::Reader don't have a fully-realized Document
2310
+ node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc);
1608
2311
 
1609
- if(node->_private && node_has_a_document) {
1610
- return (VALUE)node->_private;
2312
+ if (c_node->_private && node_has_a_document) {
2313
+ return (VALUE)c_node->_private;
1611
2314
  }
1612
2315
 
1613
- if(!RTEST(klass)) {
1614
- switch(node->type) {
1615
- case XML_ELEMENT_NODE:
1616
- klass = cNokogiriXmlElement;
1617
- break;
1618
- case XML_TEXT_NODE:
1619
- klass = cNokogiriXmlText;
1620
- break;
1621
- case XML_ATTRIBUTE_NODE:
1622
- klass = cNokogiriXmlAttr;
1623
- break;
1624
- case XML_ENTITY_REF_NODE:
1625
- klass = cNokogiriXmlEntityReference;
1626
- break;
1627
- case XML_COMMENT_NODE:
1628
- klass = cNokogiriXmlComment;
1629
- break;
1630
- case XML_DOCUMENT_FRAG_NODE:
1631
- klass = cNokogiriXmlDocumentFragment;
1632
- break;
1633
- case XML_PI_NODE:
1634
- klass = cNokogiriXmlProcessingInstruction;
1635
- break;
1636
- case XML_ENTITY_DECL:
1637
- klass = cNokogiriXmlEntityDecl;
1638
- break;
1639
- case XML_CDATA_SECTION_NODE:
1640
- klass = cNokogiriXmlCData;
1641
- break;
1642
- case XML_DTD_NODE:
1643
- klass = cNokogiriXmlDtd;
1644
- break;
1645
- case XML_ATTRIBUTE_DECL:
1646
- klass = cNokogiriXmlAttributeDecl;
1647
- break;
1648
- case XML_ELEMENT_DECL:
1649
- klass = cNokogiriXmlElementDecl;
1650
- break;
1651
- default:
1652
- klass = cNokogiriXmlNode;
2316
+ if (!RTEST(rb_class)) {
2317
+ switch (c_node->type) {
2318
+ case XML_ELEMENT_NODE:
2319
+ rb_class = cNokogiriXmlElement;
2320
+ break;
2321
+ case XML_TEXT_NODE:
2322
+ rb_class = cNokogiriXmlText;
2323
+ break;
2324
+ case XML_ATTRIBUTE_NODE:
2325
+ rb_class = cNokogiriXmlAttr;
2326
+ break;
2327
+ case XML_ENTITY_REF_NODE:
2328
+ rb_class = cNokogiriXmlEntityReference;
2329
+ break;
2330
+ case XML_COMMENT_NODE:
2331
+ rb_class = cNokogiriXmlComment;
2332
+ break;
2333
+ case XML_DOCUMENT_FRAG_NODE:
2334
+ rb_class = cNokogiriXmlDocumentFragment;
2335
+ break;
2336
+ case XML_PI_NODE:
2337
+ rb_class = cNokogiriXmlProcessingInstruction;
2338
+ break;
2339
+ case XML_ENTITY_DECL:
2340
+ rb_class = cNokogiriXmlEntityDecl;
2341
+ break;
2342
+ case XML_CDATA_SECTION_NODE:
2343
+ rb_class = cNokogiriXmlCData;
2344
+ break;
2345
+ case XML_DTD_NODE:
2346
+ rb_class = cNokogiriXmlDtd;
2347
+ break;
2348
+ case XML_ATTRIBUTE_DECL:
2349
+ rb_class = cNokogiriXmlAttributeDecl;
2350
+ break;
2351
+ case XML_ELEMENT_DECL:
2352
+ rb_class = cNokogiriXmlElementDecl;
2353
+ break;
2354
+ default:
2355
+ rb_class = cNokogiriXmlNode;
1653
2356
  }
1654
2357
  }
1655
2358
 
1656
- mark_method = node_has_a_document ? mark : NULL ;
1657
-
1658
- rb_node = Data_Wrap_Struct(klass, mark_method, debug_node_dealloc, node) ;
1659
- node->_private = (void *)rb_node;
2359
+ rb_node = _xml_node_alloc(rb_class);
2360
+ _xml_node_data_ptr_set(rb_node, c_node);
1660
2361
 
1661
2362
  if (node_has_a_document) {
1662
- document = DOC_RUBY_OBJECT(doc);
1663
- node_cache = DOC_NODE_CACHE(doc);
1664
- rb_ary_push(node_cache, rb_node);
1665
- rb_funcall(document, decorate, 1, rb_node);
2363
+ rb_document = DOC_RUBY_OBJECT(c_doc);
2364
+ rb_node_cache = DOC_NODE_CACHE(c_doc);
2365
+ rb_ary_push(rb_node_cache, rb_node);
2366
+ rb_funcall(rb_document, id_decorate, 1, rb_node);
1666
2367
  }
1667
2368
 
1668
2369
  return rb_node ;
1669
2370
  }
1670
2371
 
1671
2372
 
1672
- void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_list)
2373
+ /*
2374
+ * return Array<Nokogiri::XML::Attr> containing the node's attributes
2375
+ */
2376
+ VALUE
2377
+ noko_xml_node_attrs(xmlNodePtr c_node)
1673
2378
  {
1674
- xmlAttrPtr prop;
1675
- prop = node->properties ;
1676
- while (prop != NULL) {
1677
- rb_ary_push(attr_list, Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop));
1678
- prop = prop->next ;
2379
+ VALUE rb_properties = rb_ary_new();
2380
+ xmlAttrPtr c_property;
2381
+
2382
+ c_property = c_node->properties ;
2383
+ while (c_property != NULL) {
2384
+ rb_ary_push(rb_properties, noko_xml_node_wrap(Qnil, (xmlNodePtr)c_property));
2385
+ c_property = c_property->next ;
1679
2386
  }
2387
+
2388
+ return rb_properties;
1680
2389
  }
1681
2390
 
1682
- VALUE cNokogiriXmlNode ;
1683
- VALUE cNokogiriXmlElement ;
1684
-
1685
- void init_xml_node()
1686
- {
1687
- VALUE nokogiri = rb_define_module("Nokogiri");
1688
- VALUE xml = rb_define_module_under(nokogiri, "XML");
1689
- VALUE klass = rb_define_class_under(xml, "Node", rb_cObject);
1690
-
1691
- cNokogiriXmlNode = klass;
1692
-
1693
- cNokogiriXmlElement = rb_define_class_under(xml, "Element", klass);
1694
-
1695
- rb_define_singleton_method(klass, "new", new, -1);
1696
-
1697
- rb_define_method(klass, "add_namespace_definition", add_namespace_definition, 2);
1698
- rb_define_method(klass, "node_name", get_name, 0);
1699
- rb_define_method(klass, "document", document, 0);
1700
- rb_define_method(klass, "node_name=", set_name, 1);
1701
- rb_define_method(klass, "parent", get_parent, 0);
1702
- rb_define_method(klass, "child", child, 0);
1703
- rb_define_method(klass, "first_element_child", first_element_child, 0);
1704
- rb_define_method(klass, "last_element_child", last_element_child, 0);
1705
- rb_define_method(klass, "children", children, 0);
1706
- rb_define_method(klass, "element_children", element_children, 0);
1707
- rb_define_method(klass, "next_sibling", next_sibling, 0);
1708
- rb_define_method(klass, "previous_sibling", previous_sibling, 0);
1709
- rb_define_method(klass, "next_element", next_element, 0);
1710
- rb_define_method(klass, "previous_element", previous_element, 0);
1711
- rb_define_method(klass, "node_type", node_type, 0);
1712
- rb_define_method(klass, "path", path, 0);
1713
- rb_define_method(klass, "key?", key_eh, 1);
1714
- rb_define_method(klass, "namespaced_key?", namespaced_key_eh, 2);
1715
- rb_define_method(klass, "blank?", blank_eh, 0);
1716
- rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
1717
- rb_define_method(klass, "attribute", attr, 1);
1718
- rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
1719
- rb_define_method(klass, "namespace", namespace, 0);
1720
- rb_define_method(klass, "namespace_definitions", namespace_definitions, 0);
1721
- rb_define_method(klass, "namespace_scopes", namespace_scopes, 0);
1722
- rb_define_method(klass, "encode_special_chars", encode_special_chars, 1);
1723
- rb_define_method(klass, "dup", duplicate_node, -1);
1724
- rb_define_method(klass, "unlink", unlink_node, 0);
1725
- rb_define_method(klass, "internal_subset", internal_subset, 0);
1726
- rb_define_method(klass, "external_subset", external_subset, 0);
1727
- rb_define_method(klass, "create_internal_subset", create_internal_subset, 3);
1728
- rb_define_method(klass, "create_external_subset", create_external_subset, 3);
1729
- rb_define_method(klass, "pointer_id", pointer_id, 0);
1730
- rb_define_method(klass, "line", line, 0);
1731
- rb_define_method(klass, "content", get_native_content, 0);
1732
- rb_define_method(klass, "native_content=", set_native_content, 1);
1733
- rb_define_method(klass, "lang", get_lang, 0);
1734
- rb_define_method(klass, "lang=", set_lang, 1);
1735
-
1736
- rb_define_private_method(klass, "process_xincludes", process_xincludes, 1);
1737
- rb_define_private_method(klass, "in_context", in_context, 2);
1738
- rb_define_private_method(klass, "add_child_node", add_child, 1);
1739
- rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
1740
- rb_define_private_method(klass, "add_next_sibling_node", add_next_sibling, 1);
1741
- rb_define_private_method(klass, "replace_node", replace, 1);
1742
- rb_define_private_method(klass, "dump_html", dump_html, 0);
1743
- rb_define_private_method(klass, "native_write_to", native_write_to, 4);
1744
- rb_define_private_method(klass, "get", get, 1);
1745
- rb_define_private_method(klass, "set", set, 2);
1746
- rb_define_private_method(klass, "set_namespace", set_namespace, 1);
1747
- rb_define_private_method(klass, "compare", compare, 1);
1748
-
1749
- decorate = rb_intern("decorate");
1750
- decorate_bang = rb_intern("decorate!");
1751
- }
1752
-
1753
- /* vim: set noet sw=4 sws=4 */
2391
+ void
2392
+ noko_init_xml_node(void)
2393
+ {
2394
+ cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2395
+
2396
+ rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
2397
+
2398
+ rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2399
+
2400
+ rb_define_method(cNokogiriXmlNode, "add_namespace_definition", rb_xml_node_add_namespace_definition, 2);
2401
+ rb_define_method(cNokogiriXmlNode, "attribute", rb_xml_node_attribute, 1);
2402
+ rb_define_method(cNokogiriXmlNode, "attribute_nodes", rb_xml_node_attribute_nodes, 0);
2403
+ rb_define_method(cNokogiriXmlNode, "attribute_with_ns", rb_xml_node_attribute_with_ns, 2);
2404
+ rb_define_method(cNokogiriXmlNode, "blank?", rb_xml_node_blank_eh, 0);
2405
+ rb_define_method(cNokogiriXmlNode, "child", rb_xml_node_child, 0);
2406
+ rb_define_method(cNokogiriXmlNode, "children", rb_xml_node_children, 0);
2407
+ rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2408
+ rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2409
+ rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2410
+ rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
2411
+ rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2412
+ rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2413
+ rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2414
+ rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
2415
+ rb_define_method(cNokogiriXmlNode, "first_element_child", rb_xml_node_first_element_child, 0);
2416
+ rb_define_method(cNokogiriXmlNode, "internal_subset", internal_subset, 0);
2417
+ rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1);
2418
+ rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0);
2419
+ rb_define_method(cNokogiriXmlNode, "lang=", set_lang, 1);
2420
+ rb_define_method(cNokogiriXmlNode, "last_element_child", rb_xml_node_last_element_child, 0);
2421
+ rb_define_method(cNokogiriXmlNode, "line", rb_xml_node_line, 0);
2422
+ rb_define_method(cNokogiriXmlNode, "line=", rb_xml_node_line_set, 1);
2423
+ rb_define_method(cNokogiriXmlNode, "namespace", rb_xml_node_namespace, 0);
2424
+ rb_define_method(cNokogiriXmlNode, "namespace_definitions", namespace_definitions, 0);
2425
+ rb_define_method(cNokogiriXmlNode, "namespace_scopes", rb_xml_node_namespace_scopes, 0);
2426
+ rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2);
2427
+ rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1);
2428
+ rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0);
2429
+ rb_define_method(cNokogiriXmlNode, "next_sibling", next_sibling, 0);
2430
+ rb_define_method(cNokogiriXmlNode, "node_name", get_name, 0);
2431
+ rb_define_method(cNokogiriXmlNode, "node_name=", set_name, 1);
2432
+ rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0);
2433
+ rb_define_method(cNokogiriXmlNode, "parent", get_parent, 0);
2434
+ rb_define_method(cNokogiriXmlNode, "path", rb_xml_node_path, 0);
2435
+ rb_define_method(cNokogiriXmlNode, "pointer_id", rb_xml_node_pointer_id, 0);
2436
+ rb_define_method(cNokogiriXmlNode, "previous_element", previous_element, 0);
2437
+ rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2438
+ rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2439
+
2440
+ rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
2441
+
2442
+ rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2443
+ rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2444
+ rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
2445
+ rb_define_private_method(cNokogiriXmlNode, "compare", compare, 1);
2446
+ rb_define_private_method(cNokogiriXmlNode, "dump_html", dump_html, 0);
2447
+ rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
2448
+ rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
2449
+ rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2450
+ rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2451
+ rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2452
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
2453
+ rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2454
+ rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2455
+ rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
2456
+
2457
+ id_decorate = rb_intern("decorate");
2458
+ id_decorate_bang = rb_intern("decorate!");
2459
+ }