nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,88 +1,168 @@
1
- #include <xml_document.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
3
+ VALUE cNokogiriXmlDocument ;
4
+
5
+ static int
6
+ dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
4
7
  {
5
- switch(node->type) {
6
- case XML_ATTRIBUTE_NODE:
7
- xmlFreePropList((xmlAttrPtr)node);
8
- break;
9
- case XML_NAMESPACE_DECL:
10
- xmlFree(node);
11
- break;
12
- default:
13
- if(node->parent == NULL) {
14
- xmlAddChild((xmlNodePtr)doc, node);
15
- }
8
+ switch (node->type) {
9
+ case XML_ATTRIBUTE_NODE:
10
+ xmlFreePropList((xmlAttrPtr)node);
11
+ break;
12
+ case XML_NAMESPACE_DECL:
13
+ xmlFreeNs((xmlNsPtr)node);
14
+ break;
15
+ case XML_DTD_NODE:
16
+ xmlFreeDtd((xmlDtdPtr)node);
17
+ break;
18
+ default:
19
+ if (node->parent == NULL) {
20
+ xmlAddChild((xmlNodePtr)doc, node);
21
+ }
16
22
  }
17
23
  return ST_CONTINUE;
18
24
  }
19
25
 
20
- static void remove_private(xmlNodePtr node)
26
+ static int
27
+ dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
28
+ {
29
+ return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
30
+ }
31
+
32
+ static void
33
+ remove_private(xmlNodePtr node)
21
34
  {
22
35
  xmlNodePtr child;
23
36
 
24
- for (child = node->children; child; child = child->next)
37
+ for (child = node->children; child; child = child->next) {
25
38
  remove_private(child);
39
+ }
26
40
 
27
41
  if ((node->type == XML_ELEMENT_NODE ||
28
42
  node->type == XML_XINCLUDE_START ||
29
43
  node->type == XML_XINCLUDE_END) &&
30
44
  node->properties) {
31
- for (child = (xmlNodePtr)node->properties; child; child = child->next)
45
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
32
46
  remove_private(child);
47
+ }
33
48
  }
34
49
 
35
50
  node->_private = NULL;
36
51
  }
37
52
 
38
- static void dealloc(xmlDocPtr doc)
53
+ static void
54
+ mark(void *data)
39
55
  {
40
- st_table *node_hash;
56
+ xmlDocPtr doc = (xmlDocPtr)data;
57
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
58
+ if (tuple) {
59
+ rb_gc_mark(tuple->doc);
60
+ rb_gc_mark(tuple->node_cache);
61
+ }
62
+ }
41
63
 
42
- NOKOGIRI_DEBUG_START(doc);
64
+ static void
65
+ dealloc(void *data)
66
+ {
67
+ xmlDocPtr doc = (xmlDocPtr)data;
68
+ st_table *node_hash;
43
69
 
44
70
  node_hash = DOC_UNLINKED_NODE_HASH(doc);
45
71
 
46
72
  st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
47
73
  st_free_table(node_hash);
48
74
 
49
- free(doc->_private);
75
+ ruby_xfree(doc->_private);
50
76
 
51
- /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
52
- * have their _private pointers cleared. This is to avoid libxml-ruby's
53
- * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
54
- * free context, which can result in segfaults.
77
+ #pragma GCC diagnostic push
78
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
79
+ /*
80
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
81
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
82
+ * nokogiri, which will result in segfaults.
83
+ *
84
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
85
+ * before that callback gets invoked.
86
+ *
87
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
88
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
55
89
  */
56
- if (xmlDeregisterNodeDefaultValue)
90
+ if (xmlDeregisterNodeDefaultValue) {
57
91
  remove_private((xmlNodePtr)doc);
92
+ }
93
+ #pragma GCC diagnostic pop
58
94
 
59
95
  xmlFreeDoc(doc);
96
+ }
97
+
98
+ static size_t
99
+ memsize_node(const xmlNodePtr node)
100
+ {
101
+ /* note we don't count namespace definitions, just going for a good-enough number here */
102
+ xmlNodePtr child;
103
+ size_t memsize = 0;
104
+
105
+ memsize += xmlStrlen(node->name);
106
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
107
+ memsize += sizeof(xmlAttr) + memsize_node(child);
108
+ }
109
+ if (node->type == XML_TEXT_NODE) {
110
+ memsize += xmlStrlen(node->content);
111
+ }
112
+ for (child = node->children; child; child = child->next) {
113
+ memsize += sizeof(xmlNode) + memsize_node(child);
114
+ }
115
+ return memsize;
116
+ }
60
117
 
61
- NOKOGIRI_DEBUG_END(doc);
118
+ static size_t
119
+ memsize(const void *data)
120
+ {
121
+ xmlDocPtr doc = (const xmlDocPtr)data;
122
+ size_t memsize = sizeof(xmlDoc);
123
+ /* This may not account for all memory use */
124
+ memsize += memsize_node((xmlNodePtr)doc);
125
+ return memsize;
62
126
  }
63
127
 
64
- static void recursively_remove_namespaces_from_node(xmlNodePtr node)
128
+ static const rb_data_type_t noko_xml_document_data_type = {
129
+ .wrap_struct_name = "Nokogiri::XML::Document",
130
+ .function = {
131
+ .dmark = mark,
132
+ .dfree = dealloc,
133
+ .dsize = memsize,
134
+ },
135
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
136
+ };
137
+
138
+ static void
139
+ recursively_remove_namespaces_from_node(xmlNodePtr node)
65
140
  {
66
141
  xmlNodePtr child ;
67
142
  xmlAttrPtr property ;
68
143
 
69
144
  xmlSetNs(node, NULL);
70
145
 
71
- for (child = node->children ; child ; child = child->next)
146
+ for (child = node->children ; child ; child = child->next) {
72
147
  recursively_remove_namespaces_from_node(child);
148
+ }
73
149
 
74
150
  if (((node->type == XML_ELEMENT_NODE) ||
75
151
  (node->type == XML_XINCLUDE_START) ||
76
152
  (node->type == XML_XINCLUDE_END)) &&
77
153
  node->nsDef) {
78
- xmlFreeNsList(node->nsDef);
154
+ xmlNsPtr curr = node->nsDef;
155
+ while (curr) {
156
+ noko_xml_document_pin_namespace(curr, node->doc);
157
+ curr = curr->next;
158
+ }
79
159
  node->nsDef = NULL;
80
160
  }
81
161
 
82
162
  if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
83
163
  property = node->properties ;
84
164
  while (property != NULL) {
85
- if (property->ns) property->ns = NULL ;
165
+ if (property->ns) { property->ns = NULL ; }
86
166
  property = property->next ;
87
167
  }
88
168
  }
@@ -94,12 +174,12 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
94
174
  *
95
175
  * Get the url name for this document.
96
176
  */
97
- static VALUE url(VALUE self)
177
+ static VALUE
178
+ url(VALUE self)
98
179
  {
99
- xmlDocPtr doc;
100
- Data_Get_Struct(self, xmlDoc, doc);
180
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
101
181
 
102
- if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
182
+ if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
103
183
 
104
184
  return Qnil;
105
185
  }
@@ -110,42 +190,42 @@ static VALUE url(VALUE self)
110
190
  *
111
191
  * Set the root element on this document
112
192
  */
113
- static VALUE set_root(VALUE self, VALUE root)
193
+ static VALUE
194
+ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
114
195
  {
115
- xmlDocPtr doc;
116
- xmlNodePtr new_root;
117
- xmlNodePtr old_root;
118
-
119
- Data_Get_Struct(self, xmlDoc, doc);
196
+ xmlDocPtr c_document;
197
+ xmlNodePtr c_new_root = NULL, c_current_root;
120
198
 
121
- old_root = NULL;
199
+ c_document = noko_xml_document_unwrap(self);
122
200
 
123
- if(NIL_P(root)) {
124
- old_root = xmlDocGetRootElement(doc);
125
-
126
- if(old_root) {
127
- xmlUnlinkNode(old_root);
128
- nokogiri_root_node(old_root);
129
- }
130
-
131
- return root;
201
+ c_current_root = xmlDocGetRootElement(c_document);
202
+ if (c_current_root) {
203
+ xmlUnlinkNode(c_current_root);
204
+ noko_xml_document_pin_node(c_current_root);
132
205
  }
133
206
 
134
- Data_Get_Struct(root, xmlNode, new_root);
207
+ if (!NIL_P(rb_new_root)) {
208
+ if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
209
+ rb_raise(rb_eArgError,
210
+ "expected Nokogiri::XML::Node but received %"PRIsVALUE,
211
+ rb_obj_class(rb_new_root));
212
+ }
135
213
 
214
+ Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
136
215
 
137
- /* If the new root's document is not the same as the current document,
138
- * then we need to dup the node in to this document. */
139
- if(new_root->doc != doc) {
140
- old_root = xmlDocGetRootElement(doc);
141
- if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
142
- rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
216
+ /* If the new root's document is not the same as the current document,
217
+ * then we need to dup the node in to this document. */
218
+ if (c_new_root->doc != c_document) {
219
+ c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
220
+ if (!c_new_root) {
221
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
222
+ }
143
223
  }
144
224
  }
145
225
 
146
- xmlDocSetRootElement(doc, new_root);
147
- if(old_root) nokogiri_root_node(old_root);
148
- return root;
226
+ xmlDocSetRootElement(c_document, c_new_root);
227
+
228
+ return rb_new_root;
149
229
  }
150
230
 
151
231
  /*
@@ -154,17 +234,20 @@ static VALUE set_root(VALUE self, VALUE root)
154
234
  *
155
235
  * Get the root node for this document.
156
236
  */
157
- static VALUE root(VALUE self)
237
+ static VALUE
238
+ rb_xml_document_root(VALUE self)
158
239
  {
159
- xmlDocPtr doc;
160
- xmlNodePtr root;
240
+ xmlDocPtr c_document;
241
+ xmlNodePtr c_root;
161
242
 
162
- Data_Get_Struct(self, xmlDoc, doc);
243
+ c_document = noko_xml_document_unwrap(self);
163
244
 
164
- root = xmlDocGetRootElement(doc);
245
+ c_root = xmlDocGetRootElement(c_document);
246
+ if (!c_root) {
247
+ return Qnil;
248
+ }
165
249
 
166
- if(!root) return Qnil;
167
- return Nokogiri_wrap_xml_node(Qnil, root) ;
250
+ return noko_xml_node_wrap(Qnil, c_root) ;
168
251
  }
169
252
 
170
253
  /*
@@ -173,13 +256,14 @@ static VALUE root(VALUE self)
173
256
  *
174
257
  * Set the encoding string for this Document
175
258
  */
176
- static VALUE set_encoding(VALUE self, VALUE encoding)
259
+ static VALUE
260
+ set_encoding(VALUE self, VALUE encoding)
177
261
  {
178
- xmlDocPtr doc;
179
- Data_Get_Struct(self, xmlDoc, doc);
262
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
180
263
 
181
- if (doc->encoding)
182
- free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */
264
+ if (doc->encoding) {
265
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
266
+ }
183
267
 
184
268
  doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
185
269
 
@@ -192,12 +276,12 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
192
276
  *
193
277
  * Get the encoding for this Document
194
278
  */
195
- static VALUE encoding(VALUE self)
279
+ static VALUE
280
+ encoding(VALUE self)
196
281
  {
197
- xmlDocPtr doc;
198
- Data_Get_Struct(self, xmlDoc, doc);
282
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
199
283
 
200
- if(!doc->encoding) return Qnil;
284
+ if (!doc->encoding) { return Qnil; }
201
285
  return NOKOGIRI_STR_NEW2(doc->encoding);
202
286
  }
203
287
 
@@ -207,12 +291,12 @@ static VALUE encoding(VALUE self)
207
291
  *
208
292
  * Get the XML version for this Document
209
293
  */
210
- static VALUE version(VALUE self)
294
+ static VALUE
295
+ version(VALUE self)
211
296
  {
212
- xmlDocPtr doc;
213
- Data_Get_Struct(self, xmlDoc, doc);
297
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
214
298
 
215
- if(!doc->version) return Qnil;
299
+ if (!doc->version) { return Qnil; }
216
300
  return NOKOGIRI_STR_NEW2(doc->version);
217
301
  }
218
302
 
@@ -222,14 +306,15 @@ static VALUE version(VALUE self)
222
306
  *
223
307
  * Create a new document from an IO object
224
308
  */
225
- static VALUE read_io( VALUE klass,
226
- VALUE io,
227
- VALUE url,
228
- VALUE encoding,
229
- VALUE options )
309
+ static VALUE
310
+ read_io(VALUE klass,
311
+ VALUE io,
312
+ VALUE url,
313
+ VALUE encoding,
314
+ VALUE options)
230
315
  {
231
- const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
232
- const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
316
+ const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
317
+ const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
233
318
  VALUE error_list = rb_ary_new();
234
319
  VALUE document;
235
320
  xmlDocPtr doc;
@@ -238,30 +323,31 @@ static VALUE read_io( VALUE klass,
238
323
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
239
324
 
240
325
  doc = xmlReadIO(
241
- (xmlInputReadCallback)io_read_callback,
242
- (xmlInputCloseCallback)io_close_callback,
243
- (void *)io,
244
- c_url,
245
- c_enc,
246
- (int)NUM2INT(options)
247
- );
326
+ (xmlInputReadCallback)noko_io_read,
327
+ (xmlInputCloseCallback)noko_io_close,
328
+ (void *)io,
329
+ c_url,
330
+ c_enc,
331
+ (int)NUM2INT(options)
332
+ );
248
333
  xmlSetStructuredErrorFunc(NULL, NULL);
249
334
 
250
- if(doc == NULL) {
335
+ if (doc == NULL) {
251
336
  xmlErrorPtr error;
252
337
 
253
338
  xmlFreeDoc(doc);
254
339
 
255
340
  error = xmlGetLastError();
256
- if(error)
341
+ if (error) {
257
342
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
258
- else
343
+ } else {
259
344
  rb_raise(rb_eRuntimeError, "Could not parse document");
345
+ }
260
346
 
261
347
  return Qnil;
262
348
  }
263
349
 
264
- document = Nokogiri_wrap_xml_document(klass, doc);
350
+ document = noko_xml_document_wrap(klass, doc);
265
351
  rb_iv_set(document, "@errors", error_list);
266
352
  return document;
267
353
  }
@@ -272,15 +358,16 @@ static VALUE read_io( VALUE klass,
272
358
  *
273
359
  * Create a new document from a String
274
360
  */
275
- static VALUE read_memory( VALUE klass,
276
- VALUE string,
277
- VALUE url,
278
- VALUE encoding,
279
- VALUE options )
361
+ static VALUE
362
+ read_memory(VALUE klass,
363
+ VALUE string,
364
+ VALUE url,
365
+ VALUE encoding,
366
+ VALUE options)
280
367
  {
281
- const char * c_buffer = StringValuePtr(string);
282
- const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
283
- const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
368
+ const char *c_buffer = StringValuePtr(string);
369
+ const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
370
+ const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
284
371
  int len = (int)RSTRING_LEN(string);
285
372
  VALUE error_list = rb_ary_new();
286
373
  VALUE document;
@@ -291,21 +378,22 @@ static VALUE read_memory( VALUE klass,
291
378
  doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
292
379
  xmlSetStructuredErrorFunc(NULL, NULL);
293
380
 
294
- if(doc == NULL) {
381
+ if (doc == NULL) {
295
382
  xmlErrorPtr error;
296
383
 
297
384
  xmlFreeDoc(doc);
298
385
 
299
386
  error = xmlGetLastError();
300
- if(error)
387
+ if (error) {
301
388
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
302
- else
389
+ } else {
303
390
  rb_raise(rb_eRuntimeError, "Could not parse document");
391
+ }
304
392
 
305
393
  return Qnil;
306
394
  }
307
395
 
308
- document = Nokogiri_wrap_xml_document(klass, doc);
396
+ document = noko_xml_document_wrap(klass, doc);
309
397
  rb_iv_set(document, "@errors", error_list);
310
398
  return document;
311
399
  }
@@ -317,26 +405,26 @@ static VALUE read_memory( VALUE klass,
317
405
  * Copy this Document. An optional depth may be passed in, but it defaults
318
406
  * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
319
407
  */
320
- static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
408
+ static VALUE
409
+ duplicate_document(int argc, VALUE *argv, VALUE self)
321
410
  {
322
411
  xmlDocPtr doc, dup;
323
412
  VALUE copy;
324
413
  VALUE level;
325
- VALUE error_list;
326
414
 
327
- if(rb_scan_args(argc, argv, "01", &level) == 0)
415
+ if (rb_scan_args(argc, argv, "01", &level) == 0) {
328
416
  level = INT2NUM((long)1);
417
+ }
329
418
 
330
- Data_Get_Struct(self, xmlDoc, doc);
419
+ doc = noko_xml_document_unwrap(self);
331
420
 
332
421
  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
333
422
 
334
- if(dup == NULL) return Qnil;
423
+ if (dup == NULL) { return Qnil; }
335
424
 
336
425
  dup->type = doc->type;
337
- copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
338
- error_list = rb_iv_get(self, "@errors");
339
- rb_iv_set(copy, "@errors", error_list);
426
+ copy = noko_xml_document_wrap(rb_obj_class(self), dup);
427
+ rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors"));
340
428
  return copy ;
341
429
  }
342
430
 
@@ -346,18 +434,18 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
346
434
  *
347
435
  * Create a new document with +version+ (defaults to "1.0")
348
436
  */
349
- static VALUE new(int argc, VALUE *argv, VALUE klass)
437
+ static VALUE
438
+ new (int argc, VALUE *argv, VALUE klass)
350
439
  {
351
440
  xmlDocPtr doc;
352
441
  VALUE version, rest, rb_doc ;
353
442
 
354
443
  rb_scan_args(argc, argv, "0*", &rest);
355
444
  version = rb_ary_entry(rest, (long)0);
356
- if (NIL_P(version)) version = rb_str_new2("1.0");
445
+ if (NIL_P(version)) { version = rb_str_new2("1.0"); }
357
446
 
358
447
  doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
359
- rb_doc = Nokogiri_wrap_xml_document(klass, doc);
360
- rb_obj_call_init(rb_doc, argc, argv);
448
+ rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
361
449
  return rb_doc ;
362
450
  }
363
451
 
@@ -398,10 +486,10 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
398
486
  * please direct your browser to
399
487
  * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
400
488
  */
401
- VALUE remove_namespaces_bang(VALUE self)
489
+ static VALUE
490
+ remove_namespaces_bang(VALUE self)
402
491
  {
403
- xmlDocPtr doc ;
404
- Data_Get_Struct(self, xmlDoc, doc);
492
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
405
493
 
406
494
  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
407
495
  return self;
@@ -418,7 +506,8 @@ VALUE remove_namespaces_bang(VALUE self)
418
506
  * +external_id+, +system_id+, and +content+ set the External ID, System ID,
419
507
  * and content respectively. All of these parameters are optional.
420
508
  */
421
- static VALUE create_entity(int argc, VALUE *argv, VALUE self)
509
+ static VALUE
510
+ create_entity(int argc, VALUE *argv, VALUE self)
422
511
  {
423
512
  VALUE name;
424
513
  VALUE type;
@@ -428,55 +517,53 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
428
517
  xmlEntityPtr ptr;
429
518
  xmlDocPtr doc ;
430
519
 
431
- Data_Get_Struct(self, xmlDoc, doc);
520
+ doc = noko_xml_document_unwrap(self);
432
521
 
433
522
  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
434
- &content);
523
+ &content);
435
524
 
436
525
  xmlResetLastError();
437
526
  ptr = xmlAddDocEntity(
438
- doc,
439
- (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
440
- (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
441
- (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
442
- (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
443
- (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
444
- );
445
-
446
- if(NULL == ptr) {
527
+ doc,
528
+ (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
529
+ (int)(NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
530
+ (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
531
+ (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
532
+ (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
533
+ );
534
+
535
+ if (NULL == ptr) {
447
536
  xmlErrorPtr error = xmlGetLastError();
448
- if(error)
537
+ if (error) {
449
538
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
450
- else
539
+ } else {
451
540
  rb_raise(rb_eRuntimeError, "Could not create entity");
541
+ }
452
542
 
453
543
  return Qnil;
454
544
  }
455
545
 
456
- return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
546
+ return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
457
547
  }
458
548
 
459
- static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
549
+ static int
550
+ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
460
551
  {
461
- VALUE block;
462
- VALUE node;
463
- VALUE parent;
552
+ VALUE block = (VALUE)ctx;
553
+ VALUE rb_node;
554
+ VALUE rb_parent_node;
464
555
  VALUE ret;
465
556
 
466
- if(_node->type == XML_NAMESPACE_DECL){
467
- node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
468
- }
469
- else{
470
- node = Nokogiri_wrap_xml_node(Qnil, _node);
557
+ if (c_node->type == XML_NAMESPACE_DECL) {
558
+ rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
559
+ } else {
560
+ rb_node = noko_xml_node_wrap(Qnil, c_node);
471
561
  }
472
- parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
473
- block = (VALUE)ctx;
562
+ rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
474
563
 
475
- ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
564
+ ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
476
565
 
477
- if(Qfalse == ret || Qnil == ret) return 0;
478
-
479
- return 1;
566
+ return (Qfalse == ret || Qnil == ret) ? 0 : 1;
480
567
  }
481
568
 
482
569
  /* call-seq:
@@ -489,117 +576,194 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
489
576
  * The block must return a non-nil, non-false value if the +obj+ passed in
490
577
  * should be included in the canonicalized document.
491
578
  */
492
- static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
579
+ static VALUE
580
+ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
493
581
  {
494
- VALUE mode;
495
- VALUE incl_ns;
496
- VALUE with_comments;
497
- xmlChar **ns;
498
- long ns_len, i;
582
+ VALUE rb_mode;
583
+ VALUE rb_namespaces;
584
+ VALUE rb_comments_p;
585
+ int c_mode = 0;
586
+ xmlChar **c_namespaces;
499
587
 
500
- xmlDocPtr doc;
501
- xmlOutputBufferPtr buf;
502
- xmlC14NIsVisibleCallback cb = NULL;
503
- void * ctx = NULL;
588
+ xmlDocPtr c_doc;
589
+ xmlOutputBufferPtr c_obuf;
590
+ xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
591
+ void *rb_callback = NULL;
504
592
 
505
593
  VALUE rb_cStringIO;
506
- VALUE io;
594
+ VALUE rb_io;
507
595
 
508
- rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
596
+ rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
597
+ if (!NIL_P(rb_mode)) {
598
+ Check_Type(rb_mode, T_FIXNUM);
599
+ c_mode = NUM2INT(rb_mode);
600
+ }
601
+ if (!NIL_P(rb_namespaces)) {
602
+ Check_Type(rb_namespaces, T_ARRAY);
603
+ if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
604
+ rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
605
+ }
606
+ }
509
607
 
510
- Data_Get_Struct(self, xmlDoc, doc);
608
+ c_doc = noko_xml_document_unwrap(self);
511
609
 
512
610
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
513
- io = rb_class_new_instance(0, 0, rb_cStringIO);
514
- buf = xmlAllocOutputBuffer(NULL);
611
+ rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
612
+ c_obuf = xmlAllocOutputBuffer(NULL);
515
613
 
516
- buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
517
- buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
518
- buf->context = (void *)io;
614
+ c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
615
+ c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
616
+ c_obuf->context = (void *)rb_io;
519
617
 
520
- if(rb_block_given_p()) {
521
- cb = block_caller;
522
- ctx = (void *)rb_block_proc();
618
+ if (rb_block_given_p()) {
619
+ c_callback_wrapper = block_caller;
620
+ rb_callback = (void *)rb_block_proc();
523
621
  }
524
622
 
525
- if(NIL_P(incl_ns)){
526
- ns = NULL;
527
- }
528
- else{
529
- Check_Type(incl_ns, T_ARRAY);
530
- ns_len = RARRAY_LEN(incl_ns);
531
- ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
532
- for (i = 0 ; i < ns_len ; i++) {
533
- VALUE entry = rb_ary_entry(incl_ns, i);
534
- ns[i] = (xmlChar*)StringValueCStr(entry);
623
+ if (NIL_P(rb_namespaces)) {
624
+ c_namespaces = NULL;
625
+ } else {
626
+ long ns_len = RARRAY_LEN(rb_namespaces);
627
+ c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
628
+ for (int j = 0 ; j < ns_len ; j++) {
629
+ VALUE entry = rb_ary_entry(rb_namespaces, j);
630
+ c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
535
631
  }
536
632
  }
537
633
 
634
+ xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
635
+ c_mode,
636
+ c_namespaces,
637
+ (int)RTEST(rb_comments_p),
638
+ c_obuf);
639
+
640
+ ruby_xfree(c_namespaces);
641
+ xmlOutputBufferClose(c_obuf);
642
+
643
+ return rb_funcall(rb_io, rb_intern("string"), 0);
644
+ }
645
+
646
+ VALUE
647
+ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
648
+ {
649
+ VALUE rb_document;
650
+ nokogiriTuplePtr tuple;
651
+
652
+ if (!klass) {
653
+ klass = cNokogiriXmlDocument;
654
+ }
655
+
656
+ rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
657
+
658
+ tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
659
+ tuple->doc = rb_document;
660
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
661
+ tuple->node_cache = rb_ary_new();
662
+
663
+ c_document->_private = tuple ;
538
664
 
539
- xmlC14NExecute(doc, cb, ctx,
540
- (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
541
- ns,
542
- (int) RTEST(with_comments),
543
- buf);
665
+ rb_iv_set(rb_document, "@decorators", Qnil);
666
+ rb_iv_set(rb_document, "@errors", Qnil);
667
+ rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
544
668
 
545
- xmlOutputBufferClose(buf);
669
+ rb_obj_call_init(rb_document, argc, argv);
546
670
 
547
- return rb_funcall(io, rb_intern("string"), 0);
671
+ return rb_document ;
548
672
  }
549
673
 
550
- VALUE cNokogiriXmlDocument ;
551
- void init_xml_document()
674
+
675
+ /* deprecated. use noko_xml_document_wrap() instead. */
676
+ VALUE
677
+ Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
552
678
  {
553
- VALUE nokogiri = rb_define_module("Nokogiri");
554
- VALUE xml = rb_define_module_under(nokogiri, "XML");
555
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
679
+ /* TODO: deprecate this method in v2.0 */
680
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
681
+ }
556
682
 
557
- /*
558
- * Nokogiri::XML::Document wraps an xml document.
559
- */
560
- VALUE klass = rb_define_class_under(xml, "Document", node);
561
-
562
- cNokogiriXmlDocument = klass;
563
-
564
- rb_define_singleton_method(klass, "read_memory", read_memory, 4);
565
- rb_define_singleton_method(klass, "read_io", read_io, 4);
566
- rb_define_singleton_method(klass, "new", new, -1);
567
-
568
- rb_define_method(klass, "root", root, 0);
569
- rb_define_method(klass, "root=", set_root, 1);
570
- rb_define_method(klass, "encoding", encoding, 0);
571
- rb_define_method(klass, "encoding=", set_encoding, 1);
572
- rb_define_method(klass, "version", version, 0);
573
- rb_define_method(klass, "canonicalize", canonicalize, -1);
574
- rb_define_method(klass, "dup", duplicate_document, -1);
575
- rb_define_method(klass, "url", url, 0);
576
- rb_define_method(klass, "create_entity", create_entity, -1);
577
- rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
683
+ VALUE
684
+ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
685
+ {
686
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
578
687
  }
579
688
 
689
+ xmlDocPtr
690
+ noko_xml_document_unwrap(VALUE rb_document)
691
+ {
692
+ xmlDocPtr c_document;
693
+ TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
694
+ return c_document;
695
+ }
580
696
 
581
- /* this takes klass as a param because it's used for HtmlDocument, too. */
582
- VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
697
+ /* Schema creation will remove and deallocate "blank" nodes.
698
+ * If those blank nodes have been exposed to Ruby, they could get freed
699
+ * out from under the VALUE pointer. This function checks to see if any of
700
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
701
+ */
702
+ int
703
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
583
704
  {
584
- nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
705
+ VALUE cache = DOC_NODE_CACHE(c_document);
585
706
 
586
- VALUE rb_doc = Data_Wrap_Struct(
587
- klass ? klass : cNokogiriXmlDocument,
588
- 0,
589
- dealloc,
590
- doc
591
- );
707
+ if (NIL_P(cache)) {
708
+ return 0;
709
+ }
592
710
 
593
- VALUE cache = rb_ary_new();
594
- rb_iv_set(rb_doc, "@decorators", Qnil);
595
- rb_iv_set(rb_doc, "@node_cache", cache);
711
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
712
+ xmlNodePtr node;
713
+ VALUE element = rb_ary_entry(cache, jnode);
596
714
 
597
- tuple->doc = rb_doc;
598
- tuple->unlinkedNodes = st_init_numtable_with_size(128);
599
- tuple->node_cache = cache;
600
- doc->_private = tuple ;
715
+ Noko_Node_Get_Struct(element, xmlNode, node);
716
+ if (xmlIsBlankNode(node)) {
717
+ return 1;
718
+ }
719
+ }
601
720
 
602
- rb_obj_call_init(rb_doc, 0, NULL);
721
+ return 0;
722
+ }
603
723
 
604
- return rb_doc ;
724
+ void
725
+ noko_xml_document_pin_node(xmlNodePtr node)
726
+ {
727
+ xmlDocPtr doc;
728
+ nokogiriTuplePtr tuple;
729
+
730
+ doc = node->doc;
731
+ tuple = (nokogiriTuplePtr)doc->_private;
732
+ st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
733
+ }
734
+
735
+
736
+ void
737
+ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
738
+ {
739
+ nokogiriTuplePtr tuple;
740
+
741
+ tuple = (nokogiriTuplePtr)doc->_private;
742
+ st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
743
+ }
744
+
745
+
746
+ void
747
+ noko_init_xml_document(void)
748
+ {
749
+ assert(cNokogiriXmlNode);
750
+ /*
751
+ * Nokogiri::XML::Document wraps an xml document.
752
+ */
753
+ cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
754
+
755
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", read_memory, 4);
756
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4);
757
+ rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
758
+
759
+ rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
760
+ rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
761
+ rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
762
+ rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
763
+ rb_define_method(cNokogiriXmlDocument, "version", version, 0);
764
+ rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
765
+ rb_define_method(cNokogiriXmlDocument, "dup", duplicate_document, -1);
766
+ rb_define_method(cNokogiriXmlDocument, "url", url, 0);
767
+ rb_define_method(cNokogiriXmlDocument, "create_entity", create_entity, -1);
768
+ rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
605
769
  }