nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -1,5 +1,351 @@
1
1
  #include <xml_node.h>
2
2
 
3
+ static ID decorate, decorate_bang;
4
+
5
+ #ifdef DEBUG
6
+ static void debug_node_dealloc(xmlNodePtr x)
7
+ {
8
+ NOKOGIRI_DEBUG_START(x)
9
+ NOKOGIRI_DEBUG_END(x)
10
+ }
11
+ #else
12
+ # define debug_node_dealloc 0
13
+ #endif
14
+
15
+ static void mark(xmlNodePtr node)
16
+ {
17
+ xmlDocPtr doc = node->doc;
18
+ if(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
19
+ if(DOC_RUBY_OBJECT_TEST(doc)) {
20
+ rb_gc_mark(DOC_RUBY_OBJECT(doc));
21
+ }
22
+ } else if(node->doc->_private) {
23
+ rb_gc_mark((VALUE)doc->_private);
24
+ }
25
+ }
26
+
27
+ /* :nodoc: */
28
+ typedef xmlNodePtr (*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
29
+
30
+ /* :nodoc: */
31
+ static void relink_namespace(xmlNodePtr reparented)
32
+ {
33
+ xmlChar *name, *prefix;
34
+ xmlNodePtr child;
35
+ xmlNsPtr ns;
36
+
37
+ if (reparented->type != XML_ATTRIBUTE_NODE &&
38
+ reparented->type != XML_ELEMENT_NODE) { return; }
39
+
40
+ if (reparented->ns == NULL || reparented->ns->prefix == NULL) {
41
+ name = xmlSplitQName2(reparented->name, &prefix);
42
+
43
+ if(reparented->type == XML_ATTRIBUTE_NODE) {
44
+ if (prefix == NULL || strcmp((char*)prefix, XMLNS_PREFIX) == 0) { return; }
45
+ }
46
+
47
+ ns = xmlSearchNs(reparented->doc, reparented, prefix);
48
+
49
+ if (ns == NULL && reparented->parent) {
50
+ ns = xmlSearchNs(reparented->doc, reparented->parent, prefix);
51
+ }
52
+
53
+ if (ns != NULL) {
54
+ xmlNodeSetName(reparented, name);
55
+ xmlSetNs(reparented, ns);
56
+ }
57
+ }
58
+
59
+ /* Avoid segv when relinking against unlinked nodes. */
60
+ if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; }
61
+
62
+ /* Make sure that our reparented node has the correct namespaces */
63
+ if (!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent) {
64
+ xmlSetNs(reparented, reparented->parent->ns);
65
+ }
66
+
67
+ /* Search our parents for an existing definition */
68
+ if (reparented->nsDef) {
69
+ xmlNsPtr curr = reparented->nsDef;
70
+ xmlNsPtr prev = NULL;
71
+
72
+ while (curr) {
73
+ xmlNsPtr ns = xmlSearchNsByHref(
74
+ reparented->doc,
75
+ reparented->parent,
76
+ curr->href
77
+ );
78
+ /* If we find the namespace is already declared, remove it from this
79
+ * definition list. */
80
+ if (ns && ns != curr && xmlStrEqual(ns->prefix, curr->prefix)) {
81
+ if (prev) {
82
+ prev->next = curr->next;
83
+ } else {
84
+ reparented->nsDef = curr->next;
85
+ }
86
+ nokogiri_root_nsdef(curr, reparented->doc);
87
+ } else {
88
+ prev = curr;
89
+ }
90
+ curr = curr->next;
91
+ }
92
+ }
93
+
94
+ /* Only walk all children if there actually is a namespace we need to */
95
+ /* reparent. */
96
+ if (NULL == reparented->ns) { return; }
97
+
98
+ /* When a node gets reparented, walk it's children to make sure that */
99
+ /* their namespaces are reparented as well. */
100
+ child = reparented->children;
101
+ while (NULL != child) {
102
+ relink_namespace(child);
103
+ child = child->next;
104
+ }
105
+
106
+ if (reparented->type == XML_ELEMENT_NODE) {
107
+ child = (xmlNodePtr)((xmlElementPtr)reparented)->attributes;
108
+ while(NULL != child) {
109
+ relink_namespace(child);
110
+ child = child->next;
111
+ }
112
+ }
113
+ }
114
+
115
+ /* :nodoc: */
116
+ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
117
+ {
118
+ xmlNodePtr retval ;
119
+
120
+ retval = xmlReplaceNode(pivot, new_node) ;
121
+
122
+ if (retval == pivot) {
123
+ retval = new_node ; /* return semantics for reparent_node_with */
124
+ }
125
+
126
+ /* work around libxml2 issue: https://bugzilla.gnome.org/show_bug.cgi?id=615612 */
127
+ if (retval && retval->type == XML_TEXT_NODE) {
128
+ if (retval->prev && retval->prev->type == XML_TEXT_NODE) {
129
+ retval = xmlTextMerge(retval->prev, retval);
130
+ }
131
+ if (retval->next && retval->next->type == XML_TEXT_NODE) {
132
+ retval = xmlTextMerge(retval, retval->next);
133
+ }
134
+ }
135
+
136
+ return retval ;
137
+ }
138
+
139
+ /* :nodoc: */
140
+ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
141
+ {
142
+ VALUE reparented_obj ;
143
+ xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text, parent ;
144
+ int original_ns_prefix_is_default = 0 ;
145
+
146
+ if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
147
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
148
+ if(rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument))
149
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
150
+
151
+ Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
152
+ Data_Get_Struct(pivot_obj, xmlNode, pivot);
153
+
154
+ /*
155
+ * Check if nodes given are appropriate to have a parent-child
156
+ * relationship, based on the DOM specification.
157
+ *
158
+ * cf. http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/core.html#ID-1590626202
159
+ */
160
+ if (prf == xmlAddChild) {
161
+ parent = pivot;
162
+ } else {
163
+ parent = pivot->parent;
164
+ }
165
+
166
+ if (parent) {
167
+ switch (parent->type) {
168
+ case XML_DOCUMENT_NODE:
169
+ case XML_HTML_DOCUMENT_NODE:
170
+ switch (reparentee->type) {
171
+ case XML_ELEMENT_NODE:
172
+ case XML_PI_NODE:
173
+ case XML_COMMENT_NODE:
174
+ case XML_DOCUMENT_TYPE_NODE:
175
+ /*
176
+ * The DOM specification says no to adding text-like nodes
177
+ * directly to a document, but we allow it for compatibility.
178
+ */
179
+ case XML_TEXT_NODE:
180
+ case XML_CDATA_SECTION_NODE:
181
+ case XML_ENTITY_REF_NODE:
182
+ goto ok;
183
+ }
184
+ break;
185
+ case XML_DOCUMENT_FRAG_NODE:
186
+ case XML_ENTITY_REF_NODE:
187
+ case XML_ELEMENT_NODE:
188
+ switch (reparentee->type) {
189
+ case XML_ELEMENT_NODE:
190
+ case XML_PI_NODE:
191
+ case XML_COMMENT_NODE:
192
+ case XML_TEXT_NODE:
193
+ case XML_CDATA_SECTION_NODE:
194
+ case XML_ENTITY_REF_NODE:
195
+ goto ok;
196
+ }
197
+ break;
198
+ case XML_ATTRIBUTE_NODE:
199
+ switch (reparentee->type) {
200
+ case XML_TEXT_NODE:
201
+ case XML_ENTITY_REF_NODE:
202
+ goto ok;
203
+ }
204
+ break;
205
+ case XML_TEXT_NODE:
206
+ /*
207
+ * xmlAddChild() breaks the DOM specification in that it allows
208
+ * adding a text node to another, in which case text nodes are
209
+ * coalesced, but since our JRuby version does not support such
210
+ * operation, we should inhibit it.
211
+ */
212
+ break;
213
+ }
214
+
215
+ rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj));
216
+ }
217
+
218
+ ok:
219
+ xmlUnlinkNode(reparentee);
220
+
221
+ if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
222
+ /*
223
+ * if the reparentee is a text node, there's a very good chance it will be
224
+ * merged with an adjacent text node after being reparented, and in that case
225
+ * libxml will free the underlying C struct.
226
+ *
227
+ * since we clearly have a ruby object which references the underlying
228
+ * memory, we can't let the C struct get freed. let's pickle the original
229
+ * reparentee by rooting it; and then we'll reparent a duplicate of the
230
+ * node that we don't care about preserving.
231
+ *
232
+ * alternatively, if the reparentee is from a different document than the
233
+ * pivot node, libxml2 is going to get confused about which document's
234
+ * "dictionary" the node's strings belong to (this is an otherwise
235
+ * uninteresting libxml2 implementation detail). as a result, we cannot
236
+ * reparent the actual reparentee, so we reparent a duplicate.
237
+ */
238
+ if (reparentee->type == XML_TEXT_NODE && reparentee->_private) {
239
+ /*
240
+ * additionally, since we know this C struct isn't going to be related to
241
+ * a Ruby object anymore, let's break the relationship on this end as
242
+ * well.
243
+ *
244
+ * this is not absolutely necessary unless libxml-ruby is also in effect,
245
+ * in which case its global callback `rxml_node_deregisterNode` will try
246
+ * to do things to our data.
247
+ *
248
+ * for more details on this particular (and particularly nasty) edge
249
+ * case, see:
250
+ *
251
+ * https://github.com/sparklemotion/nokogiri/issues/1426
252
+ */
253
+ reparentee->_private = NULL ;
254
+ }
255
+
256
+ if (reparentee->ns != NULL && reparentee->ns->prefix == NULL) {
257
+ original_ns_prefix_is_default = 1;
258
+ }
259
+
260
+ nokogiri_root_node(reparentee);
261
+
262
+ if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
263
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
264
+ }
265
+
266
+ if (original_ns_prefix_is_default && reparentee->ns != NULL && reparentee->ns->prefix != NULL) {
267
+ /* issue #391, where new node's prefix may become the string "default" */
268
+ reparentee->ns->prefix = NULL;
269
+ }
270
+ }
271
+
272
+ if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
273
+ && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
274
+ /*
275
+ * libxml merges text nodes in a right-to-left fashion, meaning that if
276
+ * there are two text nodes who would be adjacent, the right (or following,
277
+ * or next) node will be merged into the left (or preceding, or previous)
278
+ * node.
279
+ *
280
+ * and by "merged" I mean the string contents will be concatenated onto the
281
+ * left node's contents, and then the node will be freed.
282
+ *
283
+ * which means that if we have a ruby object wrapped around the right node,
284
+ * its memory would be freed out from under it.
285
+ *
286
+ * so, we detect this edge case and unlink-and-root the text node before it gets
287
+ * merged. then we dup the node and insert that duplicate back into the
288
+ * document where the real node was.
289
+ *
290
+ * yes, this is totally lame.
291
+ */
292
+ next_text = pivot->next ;
293
+ new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;
294
+
295
+ xmlUnlinkNode(next_text);
296
+ nokogiri_root_node(next_text);
297
+
298
+ xmlAddNextSibling(pivot, new_next_text);
299
+ }
300
+
301
+ if(!(reparented = (*prf)(pivot, reparentee))) {
302
+ rb_raise(rb_eRuntimeError, "Could not reparent node");
303
+ }
304
+
305
+ /*
306
+ * make sure the ruby object is pointed at the just-reparented node, which
307
+ * might be a duplicate (see above) or might be the result of merging
308
+ * adjacent text nodes.
309
+ */
310
+ DATA_PTR(reparentee_obj) = reparented ;
311
+
312
+ relink_namespace(reparented);
313
+
314
+ reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);
315
+
316
+ rb_funcall(reparented_obj, decorate_bang, 0);
317
+
318
+ return reparented_obj ;
319
+ }
320
+
321
+
322
+ /*
323
+ * call-seq:
324
+ * document
325
+ *
326
+ * Get the document for this Node
327
+ */
328
+ static VALUE document(VALUE self)
329
+ {
330
+ xmlNodePtr node;
331
+ Data_Get_Struct(self, xmlNode, node);
332
+ return DOC_RUBY_OBJECT(node->doc);
333
+ }
334
+
335
+ /*
336
+ * call-seq:
337
+ * pointer_id
338
+ *
339
+ * Get the internal pointer number
340
+ */
341
+ static VALUE pointer_id(VALUE self)
342
+ {
343
+ xmlNodePtr node;
344
+ Data_Get_Struct(self, xmlNode, node);
345
+
346
+ return INT2NUM((long)(node));
347
+ }
348
+
3
349
  /*
4
350
  * call-seq:
5
351
  * encode_special_chars(string)
@@ -9,18 +355,113 @@
9
355
  static VALUE encode_special_chars(VALUE self, VALUE string)
10
356
  {
11
357
  xmlNodePtr node;
358
+ xmlChar *encoded;
359
+ VALUE encoded_str;
360
+
12
361
  Data_Get_Struct(self, xmlNode, node);
13
- xmlChar * encoded = xmlEncodeSpecialChars(
362
+ encoded = xmlEncodeSpecialChars(
14
363
  node->doc,
15
- (const xmlChar *)StringValuePtr(string)
364
+ (const xmlChar *)StringValueCStr(string)
16
365
  );
17
366
 
18
- VALUE encoded_str = rb_str_new2((const char *)encoded);
19
- free(encoded);
367
+ encoded_str = NOKOGIRI_STR_NEW2(encoded);
368
+ xmlFree(encoded);
20
369
 
21
370
  return encoded_str;
22
371
  }
23
372
 
373
+ /*
374
+ * call-seq:
375
+ * create_internal_subset(name, external_id, system_id)
376
+ *
377
+ * Create the internal subset of a document.
378
+ *
379
+ * doc.create_internal_subset("chapter", "-//OASIS//DTD DocBook XML//EN", "chapter.dtd")
380
+ * # => <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML//EN" "chapter.dtd">
381
+ *
382
+ * doc.create_internal_subset("chapter", nil, "chapter.dtd")
383
+ * # => <!DOCTYPE chapter SYSTEM "chapter.dtd">
384
+ */
385
+ static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
386
+ {
387
+ xmlNodePtr node;
388
+ xmlDocPtr doc;
389
+ xmlDtdPtr dtd;
390
+
391
+ Data_Get_Struct(self, xmlNode, node);
392
+
393
+ doc = node->doc;
394
+
395
+ if(xmlGetIntSubset(doc))
396
+ rb_raise(rb_eRuntimeError, "Document already has an internal subset");
397
+
398
+ dtd = xmlCreateIntSubset(
399
+ doc,
400
+ NIL_P(name) ? NULL : (const xmlChar *)StringValueCStr(name),
401
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValueCStr(external_id),
402
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
403
+ );
404
+
405
+ if(!dtd) return Qnil;
406
+
407
+ return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
408
+ }
409
+
410
+ /*
411
+ * call-seq:
412
+ * create_external_subset(name, external_id, system_id)
413
+ *
414
+ * Create an external subset
415
+ */
416
+ static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
417
+ {
418
+ xmlNodePtr node;
419
+ xmlDocPtr doc;
420
+ xmlDtdPtr dtd;
421
+
422
+ Data_Get_Struct(self, xmlNode, node);
423
+
424
+ doc = node->doc;
425
+
426
+ if(doc->extSubset)
427
+ rb_raise(rb_eRuntimeError, "Document already has an external subset");
428
+
429
+ dtd = xmlNewDtd(
430
+ doc,
431
+ NIL_P(name) ? NULL : (const xmlChar *)StringValueCStr(name),
432
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValueCStr(external_id),
433
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValueCStr(system_id)
434
+ );
435
+
436
+ if(!dtd) return Qnil;
437
+
438
+ return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
439
+ }
440
+
441
+ /*
442
+ * call-seq:
443
+ * external_subset
444
+ *
445
+ * Get the external subset
446
+ */
447
+ static VALUE external_subset(VALUE self)
448
+ {
449
+ xmlNodePtr node;
450
+ xmlDocPtr doc;
451
+ xmlDtdPtr dtd;
452
+
453
+ Data_Get_Struct(self, xmlNode, node);
454
+
455
+ if(!node->doc) return Qnil;
456
+
457
+ doc = node->doc;
458
+ dtd = doc->extSubset;
459
+
460
+ if(!dtd) return Qnil;
461
+
462
+ return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
463
+ }
464
+
24
465
  /*
25
466
  * call-seq:
26
467
  * internal_subset
@@ -31,32 +472,43 @@ static VALUE internal_subset(VALUE self)
31
472
  {
32
473
  xmlNodePtr node;
33
474
  xmlDocPtr doc;
475
+ xmlDtdPtr dtd;
476
+
34
477
  Data_Get_Struct(self, xmlNode, node);
35
478
 
36
479
  if(!node->doc) return Qnil;
37
480
 
38
481
  doc = node->doc;
482
+ dtd = xmlGetIntSubset(doc);
39
483
 
40
- if(!doc->intSubset) return Qnil;
484
+ if(!dtd) return Qnil;
41
485
 
42
- return Nokogiri_wrap_xml_node((xmlNodePtr)doc->intSubset);
486
+ return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
43
487
  }
44
488
 
45
489
  /*
46
490
  * call-seq:
47
491
  * dup
48
492
  *
49
- * Copy this node
493
+ * Copy this node. An optional depth may be passed in, but it defaults
494
+ * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
50
495
  */
51
- static VALUE duplicate_node(VALUE self)
496
+ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
52
497
  {
498
+ VALUE level;
53
499
  xmlNodePtr node, dup;
500
+
501
+ if(rb_scan_args(argc, argv, "01", &level) == 0)
502
+ level = INT2NUM((long)1);
503
+
54
504
  Data_Get_Struct(self, xmlNode, node);
55
505
 
56
- dup = xmlCopyNode(node, 1);
506
+ dup = xmlDocCopyNode(node, node->doc, (int)NUM2INT(level));
57
507
  if(dup == NULL) return Qnil;
58
508
 
59
- return Nokogiri_wrap_xml_node(dup);
509
+ nokogiri_root_node(dup);
510
+
511
+ return Nokogiri_wrap_xml_node(rb_obj_class(self), dup);
60
512
  }
61
513
 
62
514
  /*
@@ -70,7 +522,7 @@ static VALUE unlink_node(VALUE self)
70
522
  xmlNodePtr node;
71
523
  Data_Get_Struct(self, xmlNode, node);
72
524
  xmlUnlinkNode(node);
73
- Nokogiri_xml_node_owned_set(node);
525
+ nokogiri_root_node(node);
74
526
  return self;
75
527
  }
76
528
 
@@ -84,9 +536,7 @@ static VALUE blank_eh(VALUE self)
84
536
  {
85
537
  xmlNodePtr node;
86
538
  Data_Get_Struct(self, xmlNode, node);
87
- if(1 == xmlIsBlankNode(node))
88
- return Qtrue;
89
- return Qfalse;
539
+ return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
90
540
  }
91
541
 
92
542
  /*
@@ -103,7 +553,7 @@ static VALUE next_sibling(VALUE self)
103
553
  sibling = node->next;
104
554
  if(!sibling) return Qnil;
105
555
 
106
- return Nokogiri_wrap_xml_node(sibling) ;
556
+ return Nokogiri_wrap_xml_node(Qnil, sibling) ;
107
557
  }
108
558
 
109
559
  /*
@@ -117,164 +567,473 @@ static VALUE previous_sibling(VALUE self)
117
567
  xmlNodePtr node, sibling;
118
568
  Data_Get_Struct(self, xmlNode, node);
119
569
 
120
- sibling = node->prev;
121
- if(!sibling) return Qnil;
570
+ sibling = node->prev;
571
+ if(!sibling) return Qnil;
572
+
573
+ return Nokogiri_wrap_xml_node(Qnil, sibling);
574
+ }
575
+
576
+ /*
577
+ * call-seq:
578
+ * next_element
579
+ *
580
+ * Returns the next Nokogiri::XML::Element type sibling node.
581
+ */
582
+ static VALUE next_element(VALUE self)
583
+ {
584
+ xmlNodePtr node, sibling;
585
+ Data_Get_Struct(self, xmlNode, node);
586
+
587
+ sibling = xmlNextElementSibling(node);
588
+ if(!sibling) return Qnil;
589
+
590
+ return Nokogiri_wrap_xml_node(Qnil, sibling);
591
+ }
592
+
593
+ /*
594
+ * call-seq:
595
+ * previous_element
596
+ *
597
+ * Returns the previous Nokogiri::XML::Element type sibling node.
598
+ */
599
+ static VALUE previous_element(VALUE self)
600
+ {
601
+ xmlNodePtr node, sibling;
602
+ Data_Get_Struct(self, xmlNode, node);
603
+
604
+ /*
605
+ * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
606
+ */
607
+ sibling = node->prev;
608
+ if(!sibling) return Qnil;
609
+
610
+ while(sibling && sibling->type != XML_ELEMENT_NODE)
611
+ sibling = sibling->prev;
612
+
613
+ return sibling ? Nokogiri_wrap_xml_node(Qnil, sibling) : Qnil ;
614
+ }
615
+
616
+ /* :nodoc: */
617
+ static VALUE replace(VALUE self, VALUE new_node)
618
+ {
619
+ VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
620
+
621
+ xmlNodePtr pivot;
622
+ Data_Get_Struct(self, xmlNode, pivot);
623
+ nokogiri_root_node(pivot);
624
+
625
+ return reparent;
626
+ }
627
+
628
+ /*
629
+ * call-seq:
630
+ * children
631
+ *
632
+ * Get the list of children for this node as a NodeSet
633
+ */
634
+ static VALUE children(VALUE self)
635
+ {
636
+ xmlNodePtr node;
637
+ xmlNodePtr child;
638
+ xmlNodeSetPtr set;
639
+ VALUE document;
640
+ VALUE node_set;
641
+
642
+ Data_Get_Struct(self, xmlNode, node);
643
+
644
+ child = node->children;
645
+ set = xmlXPathNodeSetCreate(child);
646
+
647
+ document = DOC_RUBY_OBJECT(node->doc);
648
+
649
+ if(!child) return Nokogiri_wrap_xml_node_set(set, document);
650
+
651
+ child = child->next;
652
+ while(NULL != child) {
653
+ xmlXPathNodeSetAddUnique(set, child);
654
+ child = child->next;
655
+ }
656
+
657
+ node_set = Nokogiri_wrap_xml_node_set(set, document);
658
+
659
+ return node_set;
660
+ }
661
+
662
+ /*
663
+ * call-seq:
664
+ * element_children
665
+ *
666
+ * Get the list of children for this node as a NodeSet. All nodes will be
667
+ * element nodes.
668
+ *
669
+ * Example:
670
+ *
671
+ * @doc.root.element_children.all? { |x| x.element? } # => true
672
+ */
673
+ static VALUE element_children(VALUE self)
674
+ {
675
+ xmlNodePtr node;
676
+ xmlNodePtr child;
677
+ xmlNodeSetPtr set;
678
+ VALUE document;
679
+ VALUE node_set;
680
+
681
+ Data_Get_Struct(self, xmlNode, node);
682
+
683
+ child = xmlFirstElementChild(node);
684
+ set = xmlXPathNodeSetCreate(child);
685
+
686
+ document = DOC_RUBY_OBJECT(node->doc);
687
+
688
+ if(!child) return Nokogiri_wrap_xml_node_set(set, document);
689
+
690
+ child = xmlNextElementSibling(child);
691
+ while(NULL != child) {
692
+ xmlXPathNodeSetAddUnique(set, child);
693
+ child = xmlNextElementSibling(child);
694
+ }
695
+
696
+ node_set = Nokogiri_wrap_xml_node_set(set, document);
697
+
698
+ return node_set;
699
+ }
700
+
701
+ /*
702
+ * call-seq:
703
+ * child
704
+ *
705
+ * Returns the child node
706
+ */
707
+ static VALUE child(VALUE self)
708
+ {
709
+ xmlNodePtr node, child;
710
+ Data_Get_Struct(self, xmlNode, node);
711
+
712
+ child = node->children;
713
+ if(!child) return Qnil;
714
+
715
+ return Nokogiri_wrap_xml_node(Qnil, child);
716
+ }
717
+
718
+ /*
719
+ * call-seq:
720
+ * first_element_child
721
+ *
722
+ * Returns the first child node of this node that is an element.
723
+ *
724
+ * Example:
725
+ *
726
+ * @doc.root.first_element_child.element? # => true
727
+ */
728
+ static VALUE first_element_child(VALUE self)
729
+ {
730
+ xmlNodePtr node, child;
731
+ Data_Get_Struct(self, xmlNode, node);
732
+
733
+ child = xmlFirstElementChild(node);
734
+ if(!child) return Qnil;
735
+
736
+ return Nokogiri_wrap_xml_node(Qnil, child);
737
+ }
738
+
739
+ /*
740
+ * call-seq:
741
+ * last_element_child
742
+ *
743
+ * Returns the last child node of this node that is an element.
744
+ *
745
+ * Example:
746
+ *
747
+ * @doc.root.last_element_child.element? # => true
748
+ */
749
+ static VALUE last_element_child(VALUE self)
750
+ {
751
+ xmlNodePtr node, child;
752
+ Data_Get_Struct(self, xmlNode, node);
753
+
754
+ child = xmlLastElementChild(node);
755
+ if(!child) return Qnil;
756
+
757
+ return Nokogiri_wrap_xml_node(Qnil, child);
758
+ }
759
+
760
+ /*
761
+ * call-seq:
762
+ * key?(attribute)
763
+ *
764
+ * Returns true if +attribute+ is set
765
+ */
766
+ static VALUE key_eh(VALUE self, VALUE attribute)
767
+ {
768
+ xmlNodePtr node;
769
+ Data_Get_Struct(self, xmlNode, node);
770
+ if(xmlHasProp(node, (xmlChar *)StringValueCStr(attribute)))
771
+ return Qtrue;
772
+ return Qfalse;
773
+ }
774
+
775
+ /*
776
+ * call-seq:
777
+ * namespaced_key?(attribute, namespace)
778
+ *
779
+ * Returns true if +attribute+ is set with +namespace+
780
+ */
781
+ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
782
+ {
783
+ xmlNodePtr node;
784
+ Data_Get_Struct(self, xmlNode, node);
785
+ if(xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
786
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace)))
787
+ return Qtrue;
788
+ return Qfalse;
789
+ }
790
+
791
+ /*
792
+ * call-seq:
793
+ * []=(property, value)
794
+ *
795
+ * Set the +property+ to +value+
796
+ */
797
+ static VALUE set(VALUE self, VALUE property, VALUE value)
798
+ {
799
+ xmlNodePtr node, cur;
800
+ xmlAttrPtr prop;
801
+ Data_Get_Struct(self, xmlNode, node);
802
+
803
+ /* If a matching attribute node already exists, then xmlSetProp will destroy
804
+ * the existing node's children. However, if Nokogiri has a node object
805
+ * pointing to one of those children, we are left with a broken reference.
806
+ *
807
+ * We can avoid this by unlinking these nodes first.
808
+ */
809
+ if (node->type != XML_ELEMENT_NODE)
810
+ return(Qnil);
811
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(property));
812
+ if (prop && prop->children) {
813
+ for (cur = prop->children; cur; cur = cur->next) {
814
+ if (cur->_private) {
815
+ nokogiri_root_node(cur);
816
+ xmlUnlinkNode(cur);
817
+ }
818
+ }
819
+ }
820
+
821
+ xmlSetProp(node, (xmlChar *)StringValueCStr(property),
822
+ (xmlChar *)StringValueCStr(value));
122
823
 
123
- return Nokogiri_wrap_xml_node(sibling);
824
+ return value;
124
825
  }
125
826
 
126
827
  /*
127
- * call-seq:
128
- * replace(new_node)
828
+ * call-seq:
829
+ * get(attribute)
129
830
  *
130
- * replace node with the new node in the document.
831
+ * Get the value for +attribute+
131
832
  */
132
- static VALUE replace(VALUE self, VALUE _new_node)
833
+ static VALUE get(VALUE self, VALUE rattribute)
133
834
  {
134
- xmlNodePtr node, new_node;
835
+ xmlNodePtr node;
836
+ xmlChar* value = 0;
837
+ VALUE rvalue ;
838
+ char* attribute = 0;
839
+ char *colon = 0, *attr_name = 0, *prefix = 0;
840
+ xmlNsPtr ns;
841
+
842
+ if (NIL_P(rattribute)) return Qnil;
843
+
135
844
  Data_Get_Struct(self, xmlNode, node);
136
- Data_Get_Struct(_new_node, xmlNode, new_node);
845
+ attribute = strdup(StringValueCStr(rattribute));
846
+
847
+ colon = strchr(attribute, ':');
848
+ if (colon) {
849
+ (*colon) = 0 ; /* create two null-terminated strings of the prefix and attribute name */
850
+ prefix = attribute ;
851
+ attr_name = colon + 1 ;
852
+ ns = xmlSearchNs(node->doc, node, (const xmlChar *)(prefix));
853
+ if (ns) {
854
+ value = xmlGetNsProp(node, (xmlChar*)(attr_name), ns->href);
855
+ } else {
856
+ value = xmlGetProp(node, (xmlChar*)StringValueCStr(rattribute));
857
+ }
858
+ } else {
859
+ value = xmlGetNoNsProp(node, (xmlChar*)attribute);
860
+ }
137
861
 
138
- xmlReplaceNode(node, new_node);
139
- Nokogiri_xml_node_owned_set(node);
140
- Nokogiri_xml_node_owned_set(new_node);
141
- return self ;
142
- }
862
+ free(attribute);
863
+ if (!value) return Qnil;
864
+
865
+ rvalue = NOKOGIRI_STR_NEW2(value);
866
+ xmlFree(value);
143
867
 
868
+ return rvalue ;
869
+ }
144
870
 
145
871
  /*
146
872
  * call-seq:
147
- * child
873
+ * set_namespace(namespace)
148
874
  *
149
- * Returns the child node
875
+ * Set the namespace to +namespace+
150
876
  */
151
- static VALUE child(VALUE self)
877
+ static VALUE set_namespace(VALUE self, VALUE namespace)
152
878
  {
153
- xmlNodePtr node, child;
879
+ xmlNodePtr node;
880
+ xmlNsPtr ns = NULL;
881
+
154
882
  Data_Get_Struct(self, xmlNode, node);
155
883
 
156
- child = node->children;
157
- if(!child) return Qnil;
884
+ if(!NIL_P(namespace))
885
+ Data_Get_Struct(namespace, xmlNs, ns);
886
+
887
+ xmlSetNs(node, ns);
158
888
 
159
- return Nokogiri_wrap_xml_node(child);
889
+ return self;
160
890
  }
161
891
 
162
892
  /*
163
893
  * call-seq:
164
- * key?(attribute)
894
+ * attribute(name)
165
895
  *
166
- * Returns true if +attribute+ is set
896
+ * Get the attribute node with +name+
167
897
  */
168
- static VALUE key_eh(VALUE self, VALUE attribute)
898
+ static VALUE attr(VALUE self, VALUE name)
169
899
  {
170
900
  xmlNodePtr node;
901
+ xmlAttrPtr prop;
171
902
  Data_Get_Struct(self, xmlNode, node);
172
- if(xmlHasProp(node, (xmlChar *)StringValuePtr(attribute)))
173
- return Qtrue;
174
- return Qfalse;
903
+ prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name));
904
+
905
+ if(! prop) return Qnil;
906
+ return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop);
175
907
  }
176
908
 
177
909
  /*
178
910
  * call-seq:
179
- * []=(property, value)
911
+ * attribute_with_ns(name, namespace)
180
912
  *
181
- * Set the +property+ to +value+
913
+ * Get the attribute node with +name+ and +namespace+
182
914
  */
183
- static VALUE set(VALUE self, VALUE property, VALUE value)
915
+ static VALUE attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
184
916
  {
185
917
  xmlNodePtr node;
918
+ xmlAttrPtr prop;
186
919
  Data_Get_Struct(self, xmlNode, node);
187
- xmlSetProp(node, (xmlChar *)StringValuePtr(property),
188
- (xmlChar *)StringValuePtr(value));
920
+ prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name),
921
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace));
189
922
 
190
- return value;
923
+ if(! prop) return Qnil;
924
+ return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop);
191
925
  }
192
926
 
193
927
  /*
194
928
  * call-seq:
195
- * remove_attribute(property)
929
+ * attribute_nodes()
196
930
  *
197
- * remove the property +property+
931
+ * returns a list containing the Node attributes.
198
932
  */
199
- static VALUE remove_prop(VALUE self, VALUE property)
933
+ static VALUE attribute_nodes(VALUE self)
200
934
  {
201
- xmlNodePtr node;
202
- xmlAttrPtr attr ;
203
- Data_Get_Struct(self, xmlNode, node);
204
- attr = xmlHasProp(node, (xmlChar *)StringValuePtr(property));
205
- if (attr) { xmlRemoveProp(attr); }
206
- return Qnil;
935
+ /* this code in the mode of xmlHasProp() */
936
+ xmlNodePtr node;
937
+ VALUE attr;
938
+
939
+ Data_Get_Struct(self, xmlNode, node);
940
+
941
+ attr = rb_ary_new();
942
+ Nokogiri_xml_node_properties(node, attr);
943
+
944
+ return attr ;
207
945
  }
208
946
 
947
+
209
948
  /*
210
- * call-seq:
211
- * get(attribute)
949
+ * call-seq:
950
+ * namespace()
212
951
  *
213
- * Get the value for +attribute+
952
+ * returns the namespace of the element or attribute node as a Namespace
953
+ * object, or nil if there is no namespace for the element or attribute.
214
954
  */
215
- static VALUE get(VALUE self, VALUE attribute)
955
+ static VALUE namespace(VALUE self)
216
956
  {
217
- xmlNodePtr node;
218
- xmlChar* propstr ;
219
- VALUE rval ;
957
+ xmlNodePtr node ;
220
958
  Data_Get_Struct(self, xmlNode, node);
221
- propstr = xmlGetProp(node, (xmlChar *)StringValuePtr(attribute));
222
- rval = rb_str_new2((char *)propstr) ;
223
- xmlFree(propstr);
224
- return rval ;
959
+
960
+ if (node->ns)
961
+ return Nokogiri_wrap_xml_namespace(node->doc, node->ns);
962
+
963
+ return Qnil ;
225
964
  }
226
965
 
227
966
  /*
228
967
  * call-seq:
229
- * attributes()
968
+ * namespace_definitions()
230
969
  *
231
- * returns a hash containing the node's attributes.
970
+ * returns namespaces defined on self element directly, as an array of Namespace objects. Includes both a default namespace (as in"xmlns="), and prefixed namespaces (as in "xmlns:prefix=").
232
971
  */
233
- static VALUE attributes(VALUE self)
972
+ static VALUE namespace_definitions(VALUE self)
234
973
  {
235
- /* this code in the mode of xmlHasProp() */
236
- xmlNodePtr node ;
237
- VALUE attr ;
974
+ /* this code in the mode of xmlHasProp() */
975
+ xmlNodePtr node ;
976
+ VALUE list;
977
+ xmlNsPtr ns;
238
978
 
239
- attr = rb_hash_new() ;
240
- Data_Get_Struct(self, xmlNode, node);
979
+ Data_Get_Struct(self, xmlNode, node);
241
980
 
242
- Nokogiri_xml_node_properties(node, attr);
981
+ list = rb_ary_new();
243
982
 
244
- return attr ;
983
+ ns = node->nsDef;
984
+
985
+ if(!ns) return list;
986
+
987
+ while(NULL != ns) {
988
+ rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns));
989
+ ns = ns->next;
990
+ }
991
+
992
+ return list;
245
993
  }
246
994
 
247
995
  /*
248
996
  * call-seq:
249
- * namespaces()
997
+ * namespace_scopes()
250
998
  *
251
- * returns a hash containing the node's namespaces.
999
+ * returns namespaces in scope for self -- those defined on self element
1000
+ * directly or any ancestor node -- as an array of Namespace objects. Default
1001
+ * namespaces ("xmlns=" style) for self are included in this array; Default
1002
+ * namespaces for ancestors, however, are not. See also #namespaces
252
1003
  */
253
- static VALUE namespaces(VALUE self)
1004
+ static VALUE namespace_scopes(VALUE self)
254
1005
  {
255
- /* this code in the mode of xmlHasProp() */
256
- xmlNodePtr node ;
257
- VALUE attr ;
1006
+ xmlNodePtr node ;
1007
+ VALUE list;
1008
+ xmlNsPtr *ns_list;
1009
+ int j;
258
1010
 
259
- attr = rb_hash_new() ;
260
- Data_Get_Struct(self, xmlNode, node);
1011
+ Data_Get_Struct(self, xmlNode, node);
261
1012
 
262
- Nokogiri_xml_node_namespaces(node, attr);
1013
+ list = rb_ary_new();
1014
+ ns_list = xmlGetNsList(node->doc, node);
263
1015
 
264
- return attr ;
1016
+ if(!ns_list) return list;
1017
+
1018
+ for (j = 0 ; ns_list[j] != NULL ; ++j) {
1019
+ rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns_list[j]));
1020
+ }
1021
+
1022
+ xmlFree(ns_list);
1023
+ return list;
265
1024
  }
266
1025
 
267
1026
  /*
268
1027
  * call-seq:
269
- * type
1028
+ * node_type
270
1029
  *
271
- * Get the type for this node
1030
+ * Get the type for this Node
272
1031
  */
273
- static VALUE type(VALUE self)
1032
+ static VALUE node_type(VALUE self)
274
1033
  {
275
1034
  xmlNodePtr node;
276
1035
  Data_Get_Struct(self, xmlNode, node);
277
- return INT2NUM((int)node->type);
1036
+ return INT2NUM((long)node->type);
278
1037
  }
279
1038
 
280
1039
  /*
@@ -283,11 +1042,20 @@ static VALUE type(VALUE self)
283
1042
  *
284
1043
  * Set the content for this Node
285
1044
  */
286
- static VALUE set_content(VALUE self, VALUE content)
1045
+ static VALUE set_native_content(VALUE self, VALUE content)
287
1046
  {
288
- xmlNodePtr node;
1047
+ xmlNodePtr node, child, next ;
289
1048
  Data_Get_Struct(self, xmlNode, node);
290
- xmlNodeSetContent(node, (xmlChar *)StringValuePtr(content));
1049
+
1050
+ child = node->children;
1051
+ while (NULL != child) {
1052
+ next = child->next ;
1053
+ xmlUnlinkNode(child) ;
1054
+ nokogiri_root_node(child);
1055
+ child = next ;
1056
+ }
1057
+
1058
+ xmlNodeSetContent(node, (xmlChar *)StringValueCStr(content));
291
1059
  return content;
292
1060
  }
293
1061
 
@@ -297,14 +1065,16 @@ static VALUE set_content(VALUE self, VALUE content)
297
1065
  *
298
1066
  * Returns the content for this Node
299
1067
  */
300
- static VALUE get_content(VALUE self)
1068
+ static VALUE get_native_content(VALUE self)
301
1069
  {
302
1070
  xmlNodePtr node;
1071
+ xmlChar * content;
1072
+
303
1073
  Data_Get_Struct(self, xmlNode, node);
304
1074
 
305
- xmlChar * content = xmlNodeGetContent(node);
1075
+ content = xmlNodeGetContent(node);
306
1076
  if(content) {
307
- VALUE rval = rb_str_new2((char *)content);
1077
+ VALUE rval = NOKOGIRI_STR_NEW2(content);
308
1078
  xmlFree(content);
309
1079
  return rval;
310
1080
  }
@@ -313,19 +1083,52 @@ static VALUE get_content(VALUE self)
313
1083
 
314
1084
  /*
315
1085
  * call-seq:
316
- * parent=(parent_node)
1086
+ * lang=
317
1087
  *
318
- * Set the parent Node for this Node
1088
+ * Set the language of a node, i.e. the values of the xml:lang attribute.
319
1089
  */
320
- static VALUE set_parent(VALUE self, VALUE parent_node)
1090
+ static VALUE set_lang(VALUE self_rb, VALUE lang_rb)
321
1091
  {
322
- xmlNodePtr node, parent;
323
- Data_Get_Struct(self, xmlNode, node);
324
- Data_Get_Struct(parent_node, xmlNode, parent);
1092
+ xmlNodePtr self ;
1093
+ xmlChar* lang ;
1094
+
1095
+ Data_Get_Struct(self_rb, xmlNode, self);
1096
+ lang = (xmlChar*)StringValueCStr(lang_rb);
1097
+
1098
+ xmlNodeSetLang(self, lang);
1099
+
1100
+ return Qnil ;
1101
+ }
1102
+
1103
+ /*
1104
+ * call-seq:
1105
+ * lang
1106
+ *
1107
+ * Searches the language of a node, i.e. the values of the xml:lang attribute or
1108
+ * the one carried by the nearest ancestor.
1109
+ */
1110
+ static VALUE get_lang(VALUE self_rb)
1111
+ {
1112
+ xmlNodePtr self ;
1113
+ xmlChar* lang ;
1114
+ VALUE lang_rb ;
1115
+
1116
+ Data_Get_Struct(self_rb, xmlNode, self);
1117
+
1118
+ lang = xmlNodeGetLang(self);
1119
+ if (lang) {
1120
+ lang_rb = NOKOGIRI_STR_NEW2(lang);
1121
+ xmlFree(lang);
1122
+ return lang_rb ;
1123
+ }
1124
+
1125
+ return Qnil ;
1126
+ }
325
1127
 
326
- xmlAddChild(parent, node);
327
- Nokogiri_xml_node_owned_set(node);
328
- return parent_node;
1128
+ /* :nodoc: */
1129
+ static VALUE add_child(VALUE self, VALUE new_child)
1130
+ {
1131
+ return reparent_node_with(self, new_child, xmlAddChild);
329
1132
  }
330
1133
 
331
1134
  /*
@@ -342,7 +1145,7 @@ static VALUE get_parent(VALUE self)
342
1145
  parent = node->parent;
343
1146
  if(!parent) return Qnil;
344
1147
 
345
- return Nokogiri_wrap_xml_node(parent) ;
1148
+ return Nokogiri_wrap_xml_node(Qnil, parent) ;
346
1149
  }
347
1150
 
348
1151
  /*
@@ -355,7 +1158,7 @@ static VALUE set_name(VALUE self, VALUE new_name)
355
1158
  {
356
1159
  xmlNodePtr node;
357
1160
  Data_Get_Struct(self, xmlNode, node);
358
- xmlNodeSetName(node, (xmlChar*)StringValuePtr(new_name));
1161
+ xmlNodeSetName(node, (xmlChar*)StringValueCStr(new_name));
359
1162
  return new_name;
360
1163
  }
361
1164
 
@@ -369,7 +1172,9 @@ static VALUE get_name(VALUE self)
369
1172
  {
370
1173
  xmlNodePtr node;
371
1174
  Data_Get_Struct(self, xmlNode, node);
372
- return rb_str_new2((const char *)node->name);
1175
+ if(node->name)
1176
+ return NOKOGIRI_STR_NEW2(node->name);
1177
+ return Qnil;
373
1178
  }
374
1179
 
375
1180
  /*
@@ -382,328 +1187,493 @@ static VALUE path(VALUE self)
382
1187
  {
383
1188
  xmlNodePtr node;
384
1189
  xmlChar *path ;
385
- VALUE rval ;
1190
+ VALUE rval;
1191
+
386
1192
  Data_Get_Struct(self, xmlNode, node);
387
-
1193
+
388
1194
  path = xmlGetNodePath(node);
389
- rval = rb_str_new2((char *)path);
1195
+ rval = NOKOGIRI_STR_NEW2(path);
390
1196
  xmlFree(path);
391
1197
  return rval ;
392
1198
  }
393
1199
 
1200
+ /* :nodoc: */
1201
+ static VALUE add_next_sibling(VALUE self, VALUE new_sibling)
1202
+ {
1203
+ return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
1204
+ }
1205
+
1206
+ /* :nodoc: */
1207
+ static VALUE add_previous_sibling(VALUE self, VALUE new_sibling)
1208
+ {
1209
+ return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
1210
+ }
1211
+
394
1212
  /*
395
1213
  * call-seq:
396
- * document
1214
+ * native_write_to(io, encoding, options)
397
1215
  *
398
- * Returns the Nokogiri::XML::Document associated with this Node
1216
+ * Write this Node to +io+ with +encoding+ and +options+
399
1217
  */
400
- static VALUE document(VALUE self)
1218
+ static VALUE native_write_to(
1219
+ VALUE self,
1220
+ VALUE io,
1221
+ VALUE encoding,
1222
+ VALUE indent_string,
1223
+ VALUE options
1224
+ ) {
1225
+ xmlNodePtr node;
1226
+ const char * before_indent;
1227
+ xmlSaveCtxtPtr savectx;
1228
+
1229
+ Data_Get_Struct(self, xmlNode, node);
1230
+
1231
+ xmlIndentTreeOutput = 1;
1232
+
1233
+ before_indent = xmlTreeIndentString;
1234
+
1235
+ xmlTreeIndentString = StringValueCStr(indent_string);
1236
+
1237
+ savectx = xmlSaveToIO(
1238
+ (xmlOutputWriteCallback)io_write_callback,
1239
+ (xmlOutputCloseCallback)io_close_callback,
1240
+ (void *)io,
1241
+ RTEST(encoding) ? StringValueCStr(encoding) : NULL,
1242
+ (int)NUM2INT(options)
1243
+ );
1244
+
1245
+ xmlSaveTree(savectx, node);
1246
+ xmlSaveClose(savectx);
1247
+
1248
+ xmlTreeIndentString = before_indent;
1249
+ return io;
1250
+ }
1251
+
1252
+ /*
1253
+ * call-seq:
1254
+ * line
1255
+ *
1256
+ * Returns the line for this Node
1257
+ */
1258
+ static VALUE line(VALUE self)
401
1259
  {
402
1260
  xmlNodePtr node;
403
1261
  Data_Get_Struct(self, xmlNode, node);
404
1262
 
405
- if(!node->doc) return Qnil;
406
- return (VALUE)node->doc->_private;
1263
+ return INT2NUM(xmlGetLineNo(node));
407
1264
  }
408
1265
 
409
1266
  /*
410
- * call-seq:
411
- * add_next_sibling(node)
1267
+ * call-seq:
1268
+ * add_namespace_definition(prefix, href)
412
1269
  *
413
- * Insert +node+ after this node (as a sibling).
1270
+ * Adds a namespace definition with +prefix+ using +href+ value. The result is
1271
+ * as if parsed XML for this node had included an attribute
1272
+ * 'xmlns:prefix=value'. A default namespace for this node ("xmlns=") can be
1273
+ * added by passing 'nil' for prefix. Namespaces added this way will not
1274
+ * show up in #attributes, but they will be included as an xmlns attribute
1275
+ * when the node is serialized to XML.
414
1276
  */
415
- static VALUE add_next_sibling(VALUE self, VALUE rb_node)
1277
+ static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
416
1278
  {
417
- xmlNodePtr node, new_sibling;
1279
+ xmlNodePtr node, namespacee;
1280
+ xmlNsPtr ns;
1281
+
418
1282
  Data_Get_Struct(self, xmlNode, node);
419
- Data_Get_Struct(rb_node, xmlNode, new_sibling);
420
- xmlAddNextSibling(node, new_sibling);
1283
+ namespacee = node ;
421
1284
 
422
- rb_funcall(rb_node, rb_intern("decorate!"), 0);
423
- Nokogiri_xml_node_owned_set(new_sibling);
1285
+ ns = xmlSearchNs(
1286
+ node->doc,
1287
+ node,
1288
+ (const xmlChar *)(NIL_P(prefix) ? NULL : StringValueCStr(prefix))
1289
+ );
424
1290
 
425
- return rb_node;
1291
+ if(!ns) {
1292
+ if (node->type != XML_ELEMENT_NODE) {
1293
+ namespacee = node->parent;
1294
+ }
1295
+ ns = xmlNewNs(
1296
+ namespacee,
1297
+ (const xmlChar *)StringValueCStr(href),
1298
+ (const xmlChar *)(NIL_P(prefix) ? NULL : StringValueCStr(prefix))
1299
+ );
1300
+ }
1301
+
1302
+ if (!ns) return Qnil ;
1303
+
1304
+ if(NIL_P(prefix) || node != namespacee) xmlSetNs(node, ns);
1305
+
1306
+ return Nokogiri_wrap_xml_namespace(node->doc, ns);
426
1307
  }
427
1308
 
428
1309
  /*
429
1310
  * call-seq:
430
- * add_previous_sibling(node)
1311
+ * new(name, document)
431
1312
  *
432
- * Insert +node+ before this node (as a sibling).
1313
+ * Create a new node with +name+ sharing GC lifecycle with +document+
433
1314
  */
434
- static VALUE add_previous_sibling(VALUE self, VALUE rb_node)
1315
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
435
1316
  {
436
- xmlNodePtr node, new_sibling;
437
- Data_Get_Struct(self, xmlNode, node);
438
- Data_Get_Struct(rb_node, xmlNode, new_sibling);
439
- xmlAddPrevSibling(node, new_sibling);
1317
+ xmlDocPtr doc;
1318
+ xmlNodePtr node;
1319
+ VALUE name;
1320
+ VALUE document;
1321
+ VALUE rest;
1322
+ VALUE rb_node;
1323
+
1324
+ rb_scan_args(argc, argv, "2*", &name, &document, &rest);
1325
+
1326
+ Data_Get_Struct(document, xmlDoc, doc);
1327
+
1328
+ node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(name));
1329
+ node->doc = doc->doc;
1330
+ nokogiri_root_node(node);
1331
+
1332
+ rb_node = Nokogiri_wrap_xml_node(
1333
+ klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
1334
+ node
1335
+ );
1336
+ rb_obj_call_init(rb_node, argc, argv);
440
1337
 
441
- rb_funcall(rb_node, rb_intern("decorate!"), 0);
442
- Nokogiri_xml_node_owned_set(new_sibling);
1338
+ if(rb_block_given_p()) rb_yield(rb_node);
443
1339
 
444
1340
  return rb_node;
445
1341
  }
446
1342
 
447
1343
  /*
448
1344
  * call-seq:
449
- * to_xml
1345
+ * dump_html
450
1346
  *
451
- * Returns this node as XML
1347
+ * Returns the Node as html.
452
1348
  */
453
- static VALUE to_xml(VALUE self)
1349
+ static VALUE dump_html(VALUE self)
454
1350
  {
455
1351
  xmlBufferPtr buf ;
456
1352
  xmlNodePtr node ;
457
- VALUE xml ;
1353
+ VALUE html;
458
1354
 
459
1355
  Data_Get_Struct(self, xmlNode, node);
460
1356
 
461
1357
  buf = xmlBufferCreate() ;
462
- xmlNodeDump(buf, node->doc, node, 2, 1);
463
- xml = rb_str_new2((char*)buf->content);
1358
+ htmlNodeDump(buf, node->doc, node);
1359
+ html = NOKOGIRI_STR_NEW2(buf->content);
464
1360
  xmlBufferFree(buf);
465
- return xml ;
1361
+ return html ;
466
1362
  }
467
1363
 
468
-
469
1364
  /*
470
1365
  * call-seq:
471
- * new(name)
1366
+ * compare(other)
472
1367
  *
473
- * Create a new node with +name+
1368
+ * Compare this Node to +other+ with respect to their Document
474
1369
  */
475
- static VALUE new(VALUE klass, VALUE name)
1370
+ static VALUE compare(VALUE self, VALUE _other)
476
1371
  {
477
- xmlNodePtr node = xmlNewNode(NULL, (xmlChar *)StringValuePtr(name));
478
- VALUE rb_node = Nokogiri_wrap_xml_node(node) ;
479
-
480
- if(rb_block_given_p()) rb_yield(rb_node);
481
-
482
- Nokogiri_xml_node_owned_set(node);
1372
+ xmlNodePtr node, other;
1373
+ Data_Get_Struct(self, xmlNode, node);
1374
+ Data_Get_Struct(_other, xmlNode, other);
483
1375
 
484
- return rb_node;
1376
+ return INT2NUM((long)xmlXPathCmpNodes(other, node));
485
1377
  }
486
1378
 
487
1379
 
488
1380
  /*
489
1381
  * call-seq:
490
- * new_from_str(string)
1382
+ * process_xincludes(options)
491
1383
  *
492
- * Create a new node by parsing +string+
1384
+ * Loads and substitutes all xinclude elements below the node. The
1385
+ * parser context will be initialized with +options+.
493
1386
  */
494
- static VALUE new_from_str(VALUE klass, VALUE xml)
1387
+ static VALUE process_xincludes(VALUE self, VALUE options)
495
1388
  {
496
- /*
497
- * I couldn't find a more efficient way to do this. So we create a new
498
- * document and copy (recursively) the root node.
499
- */
500
- VALUE rb_doc ;
501
- xmlDocPtr doc ;
502
- xmlNodePtr node ;
1389
+ int rcode ;
1390
+ xmlNodePtr node;
1391
+ VALUE error_list = rb_ary_new();
503
1392
 
504
- rb_doc = rb_funcall(cNokogiriXmlDocument, rb_intern("read_memory"), 4,
505
- xml, Qnil, Qnil, INT2NUM(0));
506
- Data_Get_Struct(rb_doc, xmlDoc, doc);
507
- node = xmlCopyNode(xmlDocGetRootElement(doc), 1); /* 1 => recursive */
508
- return Nokogiri_wrap_xml_node(node);
509
- }
1393
+ Data_Get_Struct(self, xmlNode, node);
510
1394
 
511
- static void deallocate(xmlNodePtr node)
512
- {
513
- if (! Nokogiri_xml_node_owned_get(node)) {
514
- NOKOGIRI_DEBUG_START_NODE(node);
515
- xmlFreeNode(node);
516
- NOKOGIRI_DEBUG_END(node);
1395
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
1396
+ rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
1397
+ xmlSetStructuredErrorFunc(NULL, NULL);
1398
+
1399
+ if (rcode < 0) {
1400
+ xmlErrorPtr error;
1401
+
1402
+ error = xmlGetLastError();
1403
+ if(error)
1404
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
1405
+ else
1406
+ rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
517
1407
  }
1408
+
1409
+ return self;
518
1410
  }
519
1411
 
520
- static void gc_mark_node(xmlNodePtr node)
1412
+
1413
+ /* TODO: DOCUMENT ME */
1414
+ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
521
1415
  {
522
- xmlNodePtr child ;
523
- /* mark document */
524
- if (node && node->doc && node->doc->_private)
525
- rb_gc_mark((VALUE)node->doc->_private);
526
- /* mark parent node */
527
- if (node && node->parent && node->parent->_private)
528
- rb_gc_mark((VALUE)node->parent->_private);
529
- /* mark children nodes */
530
- for (child = node->children ; child ; child = child->next) {
531
- if (child->_private)
532
- rb_gc_mark((VALUE)child->_private);
533
- }
534
- /* mark sibling nodes */
535
- if (node->next && node->next->_private)
536
- rb_gc_mark((VALUE)node->next->_private) ;
537
- if (node->prev && node->prev->_private)
538
- rb_gc_mark((VALUE)node->prev->_private) ;
1416
+ xmlNodePtr node, list = 0, tmp, child_iter, node_children, doc_children;
1417
+ xmlNodeSetPtr set;
1418
+ xmlParserErrors error;
1419
+ VALUE doc, err;
1420
+ int doc_is_empty;
1421
+
1422
+ Data_Get_Struct(self, xmlNode, node);
1423
+
1424
+ doc = DOC_RUBY_OBJECT(node->doc);
1425
+ err = rb_iv_get(doc, "@errors");
1426
+ doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
1427
+ node_children = node->children;
1428
+ doc_children = node->doc->children;
1429
+
1430
+ xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
1431
+
1432
+ /* Twiddle global variable because of a bug in libxml2.
1433
+ * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
1434
+ */
1435
+ #ifndef HTML_PARSE_NOIMPLIED
1436
+ htmlHandleOmittedElem(0);
1437
+ #endif
1438
+
1439
+ /* This function adds a fake node to the child of +node+. If the parser
1440
+ * does not exit cleanly with XML_ERR_OK, the list is freed. This can
1441
+ * leave the child pointers in a bad state if they were originally empty.
1442
+ *
1443
+ * http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
1444
+ * */
1445
+ error = xmlParseInNodeContext(node, StringValuePtr(_str),
1446
+ (int)RSTRING_LEN(_str),
1447
+ (int)NUM2INT(_options), &list);
1448
+
1449
+ /* xmlParseInNodeContext should not mutate the original document or node,
1450
+ * so reassigning these pointers should be OK. The reason we're reassigning
1451
+ * is because if there were errors, it's possible for the child pointers
1452
+ * to be manipulated. */
1453
+ if (error != XML_ERR_OK) {
1454
+ node->doc->children = doc_children;
1455
+ node->children = node_children;
1456
+ }
1457
+
1458
+ /* make sure parent/child pointers are coherent so an unlink will work
1459
+ * properly (#331)
1460
+ */
1461
+ child_iter = node->doc->children ;
1462
+ while (child_iter) {
1463
+ if (child_iter->parent != (xmlNodePtr)node->doc)
1464
+ child_iter->parent = (xmlNodePtr)node->doc;
1465
+ child_iter = child_iter->next;
1466
+ }
1467
+
1468
+ #ifndef HTML_PARSE_NOIMPLIED
1469
+ htmlHandleOmittedElem(1);
1470
+ #endif
1471
+
1472
+ xmlSetStructuredErrorFunc(NULL, NULL);
1473
+
1474
+ /* Workaround for a libxml2 bug where a parsing error may leave a broken
1475
+ * node reference in node->doc->children.
1476
+ * This workaround is limited to when a parse error occurs, the document
1477
+ * went from having no children to having children, and the context node is
1478
+ * part of a document fragment.
1479
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
1480
+ */
1481
+ if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
1482
+ child_iter = node;
1483
+ while (child_iter->parent)
1484
+ child_iter = child_iter->parent;
1485
+
1486
+ if (child_iter->type == XML_DOCUMENT_FRAG_NODE)
1487
+ node->doc->children = NULL;
1488
+ }
1489
+
1490
+ /* FIXME: This probably needs to handle more constants... */
1491
+ switch (error) {
1492
+ case XML_ERR_INTERNAL_ERROR:
1493
+ case XML_ERR_NO_MEMORY:
1494
+ rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
1495
+ break;
1496
+ default:
1497
+ break;
1498
+ }
1499
+
1500
+ set = xmlXPathNodeSetCreate(NULL);
1501
+
1502
+ while (list) {
1503
+ tmp = list->next;
1504
+ list->next = NULL;
1505
+ xmlXPathNodeSetAddUnique(set, list);
1506
+ nokogiri_root_node(list);
1507
+ list = tmp;
1508
+ }
1509
+
1510
+ return Nokogiri_wrap_xml_node_set(set, doc);
539
1511
  }
540
1512
 
541
- VALUE Nokogiri_wrap_xml_node(xmlNodePtr node)
1513
+
1514
+ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
542
1515
  {
543
- if (node->_private)
544
- return (VALUE)node->_private ;
1516
+ VALUE document = Qnil ;
1517
+ VALUE node_cache = Qnil ;
1518
+ VALUE rb_node = Qnil ;
1519
+ nokogiriTuplePtr node_has_a_document;
1520
+ xmlDocPtr doc;
1521
+ void (*mark_method)(xmlNodePtr) = NULL ;
1522
+
1523
+ assert(node);
1524
+
1525
+ if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
1526
+ return DOC_RUBY_OBJECT(node->doc);
1527
+
1528
+ /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
1529
+ /* see https://github.com/sparklemotion/nokogiri/issues/95 */
1530
+ /* and https://github.com/sparklemotion/nokogiri/issues/439 */
1531
+ doc = node->doc;
1532
+ if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
1533
+ node_has_a_document = DOC_RUBY_OBJECT_TEST(doc);
545
1534
 
546
- VALUE rb_node = Qnil;
547
-
548
- switch(node->type)
549
- {
550
- VALUE klass;
1535
+ if(node->_private && node_has_a_document)
1536
+ return (VALUE)node->_private;
551
1537
 
1538
+ if(!RTEST(klass)) {
1539
+ switch(node->type)
1540
+ {
1541
+ case XML_ELEMENT_NODE:
1542
+ klass = cNokogiriXmlElement;
1543
+ break;
552
1544
  case XML_TEXT_NODE:
553
- klass = rb_const_get(mNokogiriXml, rb_intern("Text"));
554
- rb_node = Data_Wrap_Struct(klass, gc_mark_node, deallocate, node) ;
1545
+ klass = cNokogiriXmlText;
555
1546
  break;
556
- case XML_ELEMENT_NODE:
557
- klass = rb_const_get(mNokogiriXml, rb_intern("Element"));
558
- rb_node = Data_Wrap_Struct(klass, gc_mark_node, deallocate, node) ;
1547
+ case XML_ATTRIBUTE_NODE:
1548
+ klass = cNokogiriXmlAttr;
1549
+ break;
1550
+ case XML_ENTITY_REF_NODE:
1551
+ klass = cNokogiriXmlEntityReference;
1552
+ break;
1553
+ case XML_COMMENT_NODE:
1554
+ klass = cNokogiriXmlComment;
1555
+ break;
1556
+ case XML_DOCUMENT_FRAG_NODE:
1557
+ klass = cNokogiriXmlDocumentFragment;
1558
+ break;
1559
+ case XML_PI_NODE:
1560
+ klass = cNokogiriXmlProcessingInstruction;
559
1561
  break;
560
1562
  case XML_ENTITY_DECL:
561
- klass = rb_const_get(mNokogiriXml, rb_intern("EntityDeclaration"));
562
- rb_node = Data_Wrap_Struct(klass, gc_mark_node, deallocate, node) ;
1563
+ klass = cNokogiriXmlEntityDecl;
563
1564
  break;
564
1565
  case XML_CDATA_SECTION_NODE:
565
- klass = rb_const_get(mNokogiriXml, rb_intern("CDATA"));
566
- rb_node = Data_Wrap_Struct(klass, gc_mark_node, deallocate, node) ;
1566
+ klass = cNokogiriXmlCData;
567
1567
  break;
568
1568
  case XML_DTD_NODE:
569
- klass = rb_const_get(mNokogiriXml, rb_intern("DTD"));
570
- rb_node = Data_Wrap_Struct(klass, gc_mark_node, deallocate, node) ;
1569
+ klass = cNokogiriXmlDtd;
1570
+ break;
1571
+ case XML_ATTRIBUTE_DECL:
1572
+ klass = cNokogiriXmlAttributeDecl;
1573
+ break;
1574
+ case XML_ELEMENT_DECL:
1575
+ klass = cNokogiriXmlElementDecl;
571
1576
  break;
572
1577
  default:
573
- rb_node = Data_Wrap_Struct(cNokogiriXmlNode, gc_mark_node, deallocate, node) ;
1578
+ klass = cNokogiriXmlNode;
1579
+ }
1580
+ }
1581
+
1582
+ mark_method = node_has_a_document ? mark : NULL ;
1583
+
1584
+ rb_node = Data_Wrap_Struct(klass, mark_method, debug_node_dealloc, node) ;
1585
+ node->_private = (void *)rb_node;
1586
+
1587
+ if (node_has_a_document) {
1588
+ document = DOC_RUBY_OBJECT(doc);
1589
+ node_cache = DOC_NODE_CACHE(doc);
1590
+ rb_ary_push(node_cache, rb_node);
1591
+ rb_funcall(document, decorate, 1, rb_node);
574
1592
  }
575
1593
 
576
- node->_private = (void*)rb_node ;
577
- rb_funcall(rb_node, rb_intern("decorate!"), 0);
578
- Nokogiri_xml_node_owned_set(node);
579
1594
  return rb_node ;
580
1595
  }
581
1596
 
582
1597
 
583
- void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_hash)
1598
+ void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_list)
584
1599
  {
585
1600
  xmlAttrPtr prop;
586
- xmlChar* propstr ;
587
1601
  prop = node->properties ;
588
1602
  while (prop != NULL) {
589
- propstr = xmlGetProp(node, prop->name) ;
590
- rb_hash_aset(attr_hash, rb_str_new2((const char*)prop->name),
591
- rb_str_new2((char*)propstr));
592
- xmlFree(propstr);
1603
+ rb_ary_push(attr_list, Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop));
593
1604
  prop = prop->next ;
594
1605
  }
595
1606
  }
596
1607
 
597
-
598
- #define XMLNS_PREFIX "xmlns"
599
- #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
600
- #define XMLNS_BUFFER_LEN 128
601
- void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
602
- {
603
- xmlNsPtr ns;
604
- static char buffer[XMLNS_BUFFER_LEN] ;
605
- char *key ;
606
- size_t keylen ;
607
-
608
- if (node->type != XML_ELEMENT_NODE) return ;
609
-
610
- ns = node->nsDef;
611
- while (ns != NULL) {
612
-
613
- keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
614
- if (keylen > XMLNS_BUFFER_LEN) {
615
- key = (char*)malloc(keylen) ;
616
- } else {
617
- key = buffer ;
618
- }
619
-
620
- if (ns->prefix) {
621
- sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
622
- } else {
623
- sprintf(key, "%s", XMLNS_PREFIX);
624
- }
625
-
626
- rb_hash_aset(attr_hash, rb_str_new2(key), rb_str_new2((const char*)ns->href)) ;
627
- if (key != buffer) {
628
- free(key);
629
- }
630
- ns = ns->next ;
631
- }
632
- }
633
-
634
-
635
- void Nokogiri_xml_node_owned_set(xmlNodePtr node)
636
- {
637
- VALUE hash = rb_cvar_get(cNokogiriXmlNode, rb_intern("@@owned"));
638
- rb_hash_aset(hash, INT2NUM((long)node), node->parent ? Qtrue : Qfalse) ;
639
- }
640
-
641
- int Nokogiri_xml_node_owned_get(xmlNodePtr node)
642
- {
643
- VALUE hash = rb_cvar_get(cNokogiriXmlNode, rb_intern("@@owned"));
644
- VALUE q = rb_hash_aref(hash, INT2NUM((long)node)) ;
645
- return q == Qtrue ? Qtrue : Qfalse ;
646
- }
647
-
648
- /*
649
- * call-seq:
650
- * owned?
651
- *
652
- * Is this node owned by a document?
653
- */
654
- static VALUE owned_eh(VALUE self)
655
- {
656
- xmlNodePtr node ;
657
- VALUE hash = rb_cvar_get(cNokogiriXmlNode, rb_intern("@@owned"));
658
- Data_Get_Struct(self, xmlNode, node);
659
- return rb_hash_aref(hash, INT2NUM((long)node)) == Qtrue ? Qtrue : Qfalse ;
660
- }
661
-
662
-
663
1608
  VALUE cNokogiriXmlNode ;
1609
+ VALUE cNokogiriXmlElement ;
1610
+
664
1611
  void init_xml_node()
665
1612
  {
666
- /*
667
- * HACK. This is so that rdoc will work with this C file.
668
- */
669
- /*
670
1613
  VALUE nokogiri = rb_define_module("Nokogiri");
671
1614
  VALUE xml = rb_define_module_under(nokogiri, "XML");
672
1615
  VALUE klass = rb_define_class_under(xml, "Node", rb_cObject);
673
- */
674
1616
 
675
- VALUE klass = cNokogiriXmlNode = rb_const_get(mNokogiriXml, rb_intern("Node"));
1617
+ cNokogiriXmlNode = klass;
676
1618
 
677
- rb_define_singleton_method(klass, "new", new, 1);
678
- rb_define_singleton_method(klass, "new_from_str", new_from_str, 1);
1619
+ cNokogiriXmlElement = rb_define_class_under(xml, "Element", klass);
679
1620
 
1621
+ rb_define_singleton_method(klass, "new", new, -1);
1622
+
1623
+ rb_define_method(klass, "add_namespace_definition", add_namespace_definition, 2);
1624
+ rb_define_method(klass, "node_name", get_name, 0);
680
1625
  rb_define_method(klass, "document", document, 0);
681
- rb_define_method(klass, "name", get_name, 0);
682
- rb_define_method(klass, "name=", set_name, 1);
683
- rb_define_method(klass, "parent=", set_parent, 1);
1626
+ rb_define_method(klass, "node_name=", set_name, 1);
684
1627
  rb_define_method(klass, "parent", get_parent, 0);
685
1628
  rb_define_method(klass, "child", child, 0);
1629
+ rb_define_method(klass, "first_element_child", first_element_child, 0);
1630
+ rb_define_method(klass, "last_element_child", last_element_child, 0);
1631
+ rb_define_method(klass, "children", children, 0);
1632
+ rb_define_method(klass, "element_children", element_children, 0);
686
1633
  rb_define_method(klass, "next_sibling", next_sibling, 0);
687
1634
  rb_define_method(klass, "previous_sibling", previous_sibling, 0);
688
- rb_define_method(klass, "replace", replace, 1);
689
- rb_define_method(klass, "type", type, 0);
690
- rb_define_method(klass, "content", get_content, 0);
1635
+ rb_define_method(klass, "next_element", next_element, 0);
1636
+ rb_define_method(klass, "previous_element", previous_element, 0);
1637
+ rb_define_method(klass, "node_type", node_type, 0);
691
1638
  rb_define_method(klass, "path", path, 0);
692
1639
  rb_define_method(klass, "key?", key_eh, 1);
1640
+ rb_define_method(klass, "namespaced_key?", namespaced_key_eh, 2);
693
1641
  rb_define_method(klass, "blank?", blank_eh, 0);
694
- rb_define_method(klass, "[]=", set, 2);
695
- rb_define_method(klass, "remove_attribute", remove_prop, 1);
696
- rb_define_method(klass, "attributes", attributes, 0);
697
- rb_define_method(klass, "namespaces", namespaces, 0);
698
- rb_define_method(klass, "add_previous_sibling", add_previous_sibling, 1);
699
- rb_define_method(klass, "add_next_sibling", add_next_sibling, 1);
1642
+ rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
1643
+ rb_define_method(klass, "attribute", attr, 1);
1644
+ rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
1645
+ rb_define_method(klass, "namespace", namespace, 0);
1646
+ rb_define_method(klass, "namespace_definitions", namespace_definitions, 0);
1647
+ rb_define_method(klass, "namespace_scopes", namespace_scopes, 0);
700
1648
  rb_define_method(klass, "encode_special_chars", encode_special_chars, 1);
701
- rb_define_method(klass, "to_xml", to_xml, 0);
702
- rb_define_method(klass, "dup", duplicate_node, 0);
1649
+ rb_define_method(klass, "dup", duplicate_node, -1);
703
1650
  rb_define_method(klass, "unlink", unlink_node, 0);
704
1651
  rb_define_method(klass, "internal_subset", internal_subset, 0);
705
-
706
- rb_define_private_method(klass, "native_content=", set_content, 1);
1652
+ rb_define_method(klass, "external_subset", external_subset, 0);
1653
+ rb_define_method(klass, "create_internal_subset", create_internal_subset, 3);
1654
+ rb_define_method(klass, "create_external_subset", create_external_subset, 3);
1655
+ rb_define_method(klass, "pointer_id", pointer_id, 0);
1656
+ rb_define_method(klass, "line", line, 0);
1657
+ rb_define_method(klass, "content", get_native_content, 0);
1658
+ rb_define_method(klass, "native_content=", set_native_content, 1);
1659
+ rb_define_method(klass, "lang", get_lang, 0);
1660
+ rb_define_method(klass, "lang=", set_lang, 1);
1661
+
1662
+ rb_define_private_method(klass, "process_xincludes", process_xincludes, 1);
1663
+ rb_define_private_method(klass, "in_context", in_context, 2);
1664
+ rb_define_private_method(klass, "add_child_node", add_child, 1);
1665
+ rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
1666
+ rb_define_private_method(klass, "add_next_sibling_node", add_next_sibling, 1);
1667
+ rb_define_private_method(klass, "replace_node", replace, 1);
1668
+ rb_define_private_method(klass, "dump_html", dump_html, 0);
1669
+ rb_define_private_method(klass, "native_write_to", native_write_to, 4);
707
1670
  rb_define_private_method(klass, "get", get, 1);
708
- rb_define_private_method(klass, "owned?", owned_eh, 0);
1671
+ rb_define_private_method(klass, "set", set, 2);
1672
+ rb_define_private_method(klass, "set_namespace", set_namespace, 1);
1673
+ rb_define_private_method(klass, "compare", compare, 1);
1674
+
1675
+ decorate = rb_intern("decorate");
1676
+ decorate_bang = rb_intern("decorate!");
709
1677
  }
1678
+
1679
+ /* vim: set noet sw=4 sws=4 */