nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,13 +1,24 @@
1
- #include <xml_reader.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static void dealloc(xmlTextReaderPtr reader)
3
+ VALUE cNokogiriXmlReader;
4
+
5
+ static void
6
+ xml_reader_deallocate(void *data)
4
7
  {
5
- NOKOGIRI_DEBUG_START(reader);
8
+ xmlTextReaderPtr reader = data;
6
9
  xmlFreeTextReader(reader);
7
- NOKOGIRI_DEBUG_END(reader);
8
10
  }
9
11
 
10
- static int has_attributes(xmlTextReaderPtr reader)
12
+ static const rb_data_type_t xml_reader_type = {
13
+ .wrap_struct_name = "Nokogiri::XML::Reader",
14
+ .function = {
15
+ .dfree = xml_reader_deallocate,
16
+ },
17
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
+ };
19
+
20
+ static int
21
+ has_attributes(xmlTextReaderPtr reader)
11
22
  {
12
23
  /*
13
24
  * this implementation of xmlTextReaderHasAttributes explicitly includes
@@ -16,47 +27,40 @@ static int has_attributes(xmlTextReaderPtr reader)
16
27
  */
17
28
  xmlNodePtr node ;
18
29
  node = xmlTextReaderCurrentNode(reader);
19
- if (node == NULL)
20
- return(0);
30
+ if (node == NULL) {
31
+ return (0);
32
+ }
21
33
 
22
34
  if ((node->type == XML_ELEMENT_NODE) &&
23
- ((node->properties != NULL) || (node->nsDef != NULL)))
24
- return(1);
25
- return(0);
35
+ ((node->properties != NULL) || (node->nsDef != NULL))) {
36
+ return (1);
37
+ }
38
+ return (0);
26
39
  }
27
40
 
28
- static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
41
+ // TODO: merge this function into the `namespaces` method implementation
42
+ static void
43
+ Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
29
44
  {
30
45
  xmlNsPtr ns;
31
- static char buffer[XMLNS_BUFFER_LEN] ;
32
- char *key ;
33
- size_t keylen ;
46
+ VALUE key;
34
47
 
35
- if (node->type != XML_ELEMENT_NODE) return ;
48
+ if (node->type != XML_ELEMENT_NODE) { return ; }
36
49
 
37
50
  ns = node->nsDef;
38
51
  while (ns != NULL) {
39
52
 
40
- keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
41
- if (keylen > XMLNS_BUFFER_LEN) {
42
- key = (char*)malloc(keylen) ;
43
- } else {
44
- key = buffer ;
45
- }
46
-
53
+ key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding());
47
54
  if (ns->prefix) {
48
- sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
49
- } else {
50
- sprintf(key, "%s", XMLNS_PREFIX);
55
+ rb_str_cat_cstr(key, ":");
56
+ rb_str_cat_cstr(key, (const char *)ns->prefix);
51
57
  }
52
58
 
59
+ key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding());
53
60
  rb_hash_aset(attr_hash,
54
- NOKOGIRI_STR_NEW2(key),
55
- (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
56
- );
57
- if (key != buffer) {
58
- free(key);
59
- }
61
+ key,
62
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
63
+ );
60
64
  ns = ns->next ;
61
65
  }
62
66
  }
@@ -68,15 +72,16 @@ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
68
72
  *
69
73
  * Was an attribute generated from the default value in the DTD or schema?
70
74
  */
71
- static VALUE default_eh(VALUE self)
75
+ static VALUE
76
+ default_eh(VALUE self)
72
77
  {
73
78
  xmlTextReaderPtr reader;
74
79
  int eh;
75
80
 
76
- Data_Get_Struct(self, xmlTextReader, reader);
81
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
77
82
  eh = xmlTextReaderIsDefault(reader);
78
- if(eh == 0) return Qfalse;
79
- if(eh == 1) return Qtrue;
83
+ if (eh == 0) { return Qfalse; }
84
+ if (eh == 1) { return Qtrue; }
80
85
 
81
86
  return Qnil;
82
87
  }
@@ -87,15 +92,16 @@ static VALUE default_eh(VALUE self)
87
92
  *
88
93
  * Does this node have a text value?
89
94
  */
90
- static VALUE value_eh(VALUE self)
95
+ static VALUE
96
+ value_eh(VALUE self)
91
97
  {
92
98
  xmlTextReaderPtr reader;
93
99
  int eh;
94
100
 
95
- Data_Get_Struct(self, xmlTextReader, reader);
101
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
96
102
  eh = xmlTextReaderHasValue(reader);
97
- if(eh == 0) return Qfalse;
98
- if(eh == 1) return Qtrue;
103
+ if (eh == 0) { return Qfalse; }
104
+ if (eh == 1) { return Qtrue; }
99
105
 
100
106
  return Qnil;
101
107
  }
@@ -106,15 +112,16 @@ static VALUE value_eh(VALUE self)
106
112
  *
107
113
  * Does this node have attributes?
108
114
  */
109
- static VALUE attributes_eh(VALUE self)
115
+ static VALUE
116
+ attributes_eh(VALUE self)
110
117
  {
111
118
  xmlTextReaderPtr reader;
112
119
  int eh;
113
120
 
114
- Data_Get_Struct(self, xmlTextReader, reader);
121
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
115
122
  eh = has_attributes(reader);
116
- if(eh == 0) return Qfalse;
117
- if(eh == 1) return Qtrue;
123
+ if (eh == 0) { return Qfalse; }
124
+ if (eh == 1) { return Qtrue; }
118
125
 
119
126
  return Qnil;
120
127
  }
@@ -125,52 +132,136 @@ static VALUE attributes_eh(VALUE self)
125
132
  *
126
133
  * Get a hash of namespaces for this Node
127
134
  */
128
- static VALUE namespaces(VALUE self)
135
+ static VALUE
136
+ rb_xml_reader_namespaces(VALUE rb_reader)
129
137
  {
130
- xmlTextReaderPtr reader;
131
- xmlNodePtr ptr;
132
- VALUE attr ;
138
+ VALUE rb_namespaces = rb_hash_new() ;
139
+ xmlTextReaderPtr c_reader;
140
+ xmlNodePtr c_node;
141
+ VALUE rb_errors;
142
+
143
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
133
144
 
134
- Data_Get_Struct(self, xmlTextReader, reader);
145
+ if (! has_attributes(c_reader)) {
146
+ return rb_namespaces ;
147
+ }
135
148
 
136
- attr = rb_hash_new() ;
149
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
137
150
 
138
- if (! has_attributes(reader))
139
- return attr ;
151
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
152
+ c_node = xmlTextReaderExpand(c_reader);
153
+ xmlSetStructuredErrorFunc(NULL, NULL);
140
154
 
141
- ptr = xmlTextReaderExpand(reader);
142
- if(ptr == NULL) return Qnil;
155
+ if (c_node == NULL) {
156
+ if (RARRAY_LEN(rb_errors) > 0) {
157
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
158
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
159
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
160
+ }
161
+ return Qnil;
162
+ }
143
163
 
144
- Nokogiri_xml_node_namespaces(ptr, attr);
164
+ Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
145
165
 
146
- return attr ;
166
+ return rb_namespaces ;
147
167
  }
148
168
 
149
169
  /*
150
- * call-seq:
151
- * attribute_nodes
152
- *
153
- * Get a list of attributes for this Node
170
+ :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
171
+
172
+ Get the attributes of the current node as an Array of XML:Attr
173
+
174
+ ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
175
+
176
+ See related: #attribute_hash, #attributes
154
177
  */
155
- static VALUE attribute_nodes(VALUE self)
178
+ static VALUE
179
+ rb_xml_reader_attribute_nodes(VALUE rb_reader)
156
180
  {
157
- xmlTextReaderPtr reader;
158
- xmlNodePtr ptr;
159
- VALUE attr ;
181
+ xmlTextReaderPtr c_reader;
182
+ xmlNodePtr c_node;
183
+ VALUE attr_nodes;
184
+ int j;
160
185
 
161
- Data_Get_Struct(self, xmlTextReader, reader);
186
+ // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
187
+ // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
188
+ NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
162
189
 
163
- attr = rb_ary_new() ;
190
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
164
191
 
165
- if (! has_attributes(reader))
166
- return attr ;
192
+ if (! has_attributes(c_reader)) {
193
+ return rb_ary_new() ;
194
+ }
195
+
196
+ c_node = xmlTextReaderExpand(c_reader);
197
+ if (c_node == NULL) {
198
+ return Qnil;
199
+ }
167
200
 
168
- ptr = xmlTextReaderExpand(reader);
169
- if(ptr == NULL) return Qnil;
201
+ attr_nodes = noko_xml_node_attrs(c_node);
170
202
 
171
- Nokogiri_xml_node_properties(ptr, attr);
203
+ /* ensure that the Reader won't be GCed as long as a node is referenced */
204
+ for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
205
+ rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
206
+ }
172
207
 
173
- return attr ;
208
+ return attr_nodes;
209
+ }
210
+
211
+ /*
212
+ :call-seq: attribute_hash() → Hash<String ⇒ String>
213
+
214
+ Get the attributes of the current node as a Hash of names and values.
215
+
216
+ See related: #attributes and #namespaces
217
+ */
218
+ static VALUE
219
+ rb_xml_reader_attribute_hash(VALUE rb_reader)
220
+ {
221
+ VALUE rb_attributes = rb_hash_new();
222
+ xmlTextReaderPtr c_reader;
223
+ xmlNodePtr c_node;
224
+ xmlAttrPtr c_property;
225
+ VALUE rb_errors;
226
+
227
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
228
+
229
+ if (!has_attributes(c_reader)) {
230
+ return rb_attributes;
231
+ }
232
+
233
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
234
+
235
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
236
+ c_node = xmlTextReaderExpand(c_reader);
237
+ xmlSetStructuredErrorFunc(NULL, NULL);
238
+
239
+ if (c_node == NULL) {
240
+ if (RARRAY_LEN(rb_errors) > 0) {
241
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
242
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
243
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
244
+ }
245
+ return Qnil;
246
+ }
247
+
248
+ c_property = c_node->properties;
249
+ while (c_property != NULL) {
250
+ VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
251
+ VALUE rb_value = Qnil;
252
+ xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
253
+
254
+ if (c_value) {
255
+ rb_value = NOKOGIRI_STR_NEW2(c_value);
256
+ xmlFree(c_value);
257
+ }
258
+
259
+ rb_hash_aset(rb_attributes, rb_name, rb_value);
260
+
261
+ c_property = c_property->next;
262
+ }
263
+
264
+ return rb_attributes;
174
265
  }
175
266
 
176
267
  /*
@@ -179,22 +270,23 @@ static VALUE attribute_nodes(VALUE self)
179
270
  *
180
271
  * Get the value of attribute at +index+
181
272
  */
182
- static VALUE attribute_at(VALUE self, VALUE index)
273
+ static VALUE
274
+ attribute_at(VALUE self, VALUE index)
183
275
  {
184
276
  xmlTextReaderPtr reader;
185
277
  xmlChar *value;
186
278
  VALUE rb_value;
187
279
 
188
- Data_Get_Struct(self, xmlTextReader, reader);
280
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
189
281
 
190
- if(NIL_P(index)) return Qnil;
282
+ if (NIL_P(index)) { return Qnil; }
191
283
  index = rb_Integer(index);
192
284
 
193
285
  value = xmlTextReaderGetAttributeNo(
194
- reader,
195
- (int)NUM2INT(index)
196
- );
197
- if(value == NULL) return Qnil;
286
+ reader,
287
+ (int)NUM2INT(index)
288
+ );
289
+ if (value == NULL) { return Qnil; }
198
290
 
199
291
  rb_value = NOKOGIRI_STR_NEW2(value);
200
292
  xmlFree(value);
@@ -207,19 +299,20 @@ static VALUE attribute_at(VALUE self, VALUE index)
207
299
  *
208
300
  * Get the value of attribute named +name+
209
301
  */
210
- static VALUE reader_attribute(VALUE self, VALUE name)
302
+ static VALUE
303
+ reader_attribute(VALUE self, VALUE name)
211
304
  {
212
305
  xmlTextReaderPtr reader;
213
306
  xmlChar *value ;
214
307
  VALUE rb_value;
215
308
 
216
- Data_Get_Struct(self, xmlTextReader, reader);
309
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
217
310
 
218
- if(NIL_P(name)) return Qnil;
311
+ if (NIL_P(name)) { return Qnil; }
219
312
  name = StringValue(name) ;
220
313
 
221
- value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValueCStr(name));
222
- if(value == NULL) return Qnil;
314
+ value = xmlTextReaderGetAttribute(reader, (xmlChar *)StringValueCStr(name));
315
+ if (value == NULL) { return Qnil; }
223
316
 
224
317
  rb_value = NOKOGIRI_STR_NEW2(value);
225
318
  xmlFree(value);
@@ -232,16 +325,17 @@ static VALUE reader_attribute(VALUE self, VALUE name)
232
325
  *
233
326
  * Get the number of attributes for the current node
234
327
  */
235
- static VALUE attribute_count(VALUE self)
328
+ static VALUE
329
+ attribute_count(VALUE self)
236
330
  {
237
331
  xmlTextReaderPtr reader;
238
332
  int count;
239
333
 
240
- Data_Get_Struct(self, xmlTextReader, reader);
334
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
241
335
  count = xmlTextReaderAttributeCount(reader);
242
- if(count == -1) return Qnil;
336
+ if (count == -1) { return Qnil; }
243
337
 
244
- return INT2NUM((long)count);
338
+ return INT2NUM(count);
245
339
  }
246
340
 
247
341
  /*
@@ -250,16 +344,17 @@ static VALUE attribute_count(VALUE self)
250
344
  *
251
345
  * Get the depth of the node
252
346
  */
253
- static VALUE depth(VALUE self)
347
+ static VALUE
348
+ depth(VALUE self)
254
349
  {
255
350
  xmlTextReaderPtr reader;
256
351
  int depth;
257
352
 
258
- Data_Get_Struct(self, xmlTextReader, reader);
353
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
259
354
  depth = xmlTextReaderDepth(reader);
260
- if(depth == -1) return Qnil;
355
+ if (depth == -1) { return Qnil; }
261
356
 
262
- return INT2NUM((long)depth);
357
+ return INT2NUM(depth);
263
358
  }
264
359
 
265
360
  /*
@@ -268,14 +363,15 @@ static VALUE depth(VALUE self)
268
363
  *
269
364
  * Get the XML version of the document being read
270
365
  */
271
- static VALUE xml_version(VALUE self)
366
+ static VALUE
367
+ xml_version(VALUE self)
272
368
  {
273
369
  xmlTextReaderPtr reader;
274
370
  const char *version;
275
371
 
276
- Data_Get_Struct(self, xmlTextReader, reader);
372
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
277
373
  version = (const char *)xmlTextReaderConstXmlVersion(reader);
278
- if(version == NULL) return Qnil;
374
+ if (version == NULL) { return Qnil; }
279
375
 
280
376
  return NOKOGIRI_STR_NEW2(version);
281
377
  }
@@ -286,14 +382,15 @@ static VALUE xml_version(VALUE self)
286
382
  *
287
383
  * Get the xml:lang scope within which the node resides.
288
384
  */
289
- static VALUE lang(VALUE self)
385
+ static VALUE
386
+ lang(VALUE self)
290
387
  {
291
388
  xmlTextReaderPtr reader;
292
389
  const char *lang;
293
390
 
294
- Data_Get_Struct(self, xmlTextReader, reader);
391
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
295
392
  lang = (const char *)xmlTextReaderConstXmlLang(reader);
296
- if(lang == NULL) return Qnil;
393
+ if (lang == NULL) { return Qnil; }
297
394
 
298
395
  return NOKOGIRI_STR_NEW2(lang);
299
396
  }
@@ -304,14 +401,15 @@ static VALUE lang(VALUE self)
304
401
  *
305
402
  * Get the text value of the node if present. Returns a utf-8 encoded string.
306
403
  */
307
- static VALUE value(VALUE self)
404
+ static VALUE
405
+ value(VALUE self)
308
406
  {
309
407
  xmlTextReaderPtr reader;
310
408
  const char *value;
311
409
 
312
- Data_Get_Struct(self, xmlTextReader, reader);
410
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
313
411
  value = (const char *)xmlTextReaderConstValue(reader);
314
- if(value == NULL) return Qnil;
412
+ if (value == NULL) { return Qnil; }
315
413
 
316
414
  return NOKOGIRI_STR_NEW2(value);
317
415
  }
@@ -322,14 +420,15 @@ static VALUE value(VALUE self)
322
420
  *
323
421
  * Get the shorthand reference to the namespace associated with the node.
324
422
  */
325
- static VALUE prefix(VALUE self)
423
+ static VALUE
424
+ prefix(VALUE self)
326
425
  {
327
426
  xmlTextReaderPtr reader;
328
427
  const char *prefix;
329
428
 
330
- Data_Get_Struct(self, xmlTextReader, reader);
429
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
331
430
  prefix = (const char *)xmlTextReaderConstPrefix(reader);
332
- if(prefix == NULL) return Qnil;
431
+ if (prefix == NULL) { return Qnil; }
333
432
 
334
433
  return NOKOGIRI_STR_NEW2(prefix);
335
434
  }
@@ -340,14 +439,15 @@ static VALUE prefix(VALUE self)
340
439
  *
341
440
  * Get the URI defining the namespace associated with the node
342
441
  */
343
- static VALUE namespace_uri(VALUE self)
442
+ static VALUE
443
+ namespace_uri(VALUE self)
344
444
  {
345
445
  xmlTextReaderPtr reader;
346
446
  const char *uri;
347
447
 
348
- Data_Get_Struct(self, xmlTextReader, reader);
448
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
349
449
  uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
350
- if(uri == NULL) return Qnil;
450
+ if (uri == NULL) { return Qnil; }
351
451
 
352
452
  return NOKOGIRI_STR_NEW2(uri);
353
453
  }
@@ -358,14 +458,15 @@ static VALUE namespace_uri(VALUE self)
358
458
  *
359
459
  * Get the local name of the node
360
460
  */
361
- static VALUE local_name(VALUE self)
461
+ static VALUE
462
+ local_name(VALUE self)
362
463
  {
363
464
  xmlTextReaderPtr reader;
364
465
  const char *name;
365
466
 
366
- Data_Get_Struct(self, xmlTextReader, reader);
467
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
367
468
  name = (const char *)xmlTextReaderConstLocalName(reader);
368
- if(name == NULL) return Qnil;
469
+ if (name == NULL) { return Qnil; }
369
470
 
370
471
  return NOKOGIRI_STR_NEW2(name);
371
472
  }
@@ -376,14 +477,15 @@ static VALUE local_name(VALUE self)
376
477
  *
377
478
  * Get the name of the node. Returns a utf-8 encoded string.
378
479
  */
379
- static VALUE name(VALUE self)
480
+ static VALUE
481
+ name(VALUE self)
380
482
  {
381
483
  xmlTextReaderPtr reader;
382
484
  const char *name;
383
485
 
384
- Data_Get_Struct(self, xmlTextReader, reader);
486
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
385
487
  name = (const char *)xmlTextReaderConstName(reader);
386
- if(name == NULL) return Qnil;
488
+ if (name == NULL) { return Qnil; }
387
489
 
388
490
  return NOKOGIRI_STR_NEW2(name);
389
491
  }
@@ -394,16 +496,24 @@ static VALUE name(VALUE self)
394
496
  *
395
497
  * Get the xml:base of the node
396
498
  */
397
- static VALUE base_uri(VALUE self)
499
+ static VALUE
500
+ rb_xml_reader_base_uri(VALUE rb_reader)
398
501
  {
399
- xmlTextReaderPtr reader;
400
- const char * base_uri;
502
+ VALUE rb_base_uri;
503
+ xmlTextReaderPtr c_reader;
504
+ xmlChar *c_base_uri;
505
+
506
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
507
+
508
+ c_base_uri = xmlTextReaderBaseUri(c_reader);
509
+ if (c_base_uri == NULL) {
510
+ return Qnil;
511
+ }
401
512
 
402
- Data_Get_Struct(self, xmlTextReader, reader);
403
- base_uri = (const char *)xmlTextReaderBaseUri(reader);
404
- if (base_uri == NULL) return Qnil;
513
+ rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
514
+ xmlFree(c_base_uri);
405
515
 
406
- return NOKOGIRI_STR_NEW2(base_uri);
516
+ return rb_base_uri;
407
517
  }
408
518
 
409
519
  /*
@@ -412,11 +522,12 @@ static VALUE base_uri(VALUE self)
412
522
  *
413
523
  * Get the state of the reader
414
524
  */
415
- static VALUE state(VALUE self)
525
+ static VALUE
526
+ state(VALUE self)
416
527
  {
417
528
  xmlTextReaderPtr reader;
418
- Data_Get_Struct(self, xmlTextReader, reader);
419
- return INT2NUM((long)xmlTextReaderReadState(reader));
529
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
530
+ return INT2NUM(xmlTextReaderReadState(reader));
420
531
  }
421
532
 
422
533
  /*
@@ -425,11 +536,12 @@ static VALUE state(VALUE self)
425
536
  *
426
537
  * Get the type of readers current node
427
538
  */
428
- static VALUE node_type(VALUE self)
539
+ static VALUE
540
+ node_type(VALUE self)
429
541
  {
430
542
  xmlTextReaderPtr reader;
431
- Data_Get_Struct(self, xmlTextReader, reader);
432
- return INT2NUM((long)xmlTextReaderNodeType(reader));
543
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
544
+ return INT2NUM(xmlTextReaderNodeType(reader));
433
545
  }
434
546
 
435
547
  /*
@@ -438,14 +550,15 @@ static VALUE node_type(VALUE self)
438
550
  *
439
551
  * Move the Reader forward through the XML document.
440
552
  */
441
- static VALUE read_more(VALUE self)
553
+ static VALUE
554
+ read_more(VALUE self)
442
555
  {
443
556
  xmlTextReaderPtr reader;
444
557
  xmlErrorPtr error;
445
558
  VALUE error_list;
446
559
  int ret;
447
560
 
448
- Data_Get_Struct(self, xmlTextReader, reader);
561
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
449
562
 
450
563
  error_list = rb_funcall(self, rb_intern("errors"), 0);
451
564
 
@@ -453,14 +566,15 @@ static VALUE read_more(VALUE self)
453
566
  ret = xmlTextReaderRead(reader);
454
567
  xmlSetStructuredErrorFunc(NULL, NULL);
455
568
 
456
- if(ret == 1) return self;
457
- if(ret == 0) return Qnil;
569
+ if (ret == 1) { return self; }
570
+ if (ret == 0) { return Qnil; }
458
571
 
459
572
  error = xmlGetLastError();
460
- if(error)
573
+ if (error) {
461
574
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
462
- else
575
+ } else {
463
576
  rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
577
+ }
464
578
 
465
579
  return Qnil;
466
580
  }
@@ -472,19 +586,20 @@ static VALUE read_more(VALUE self)
472
586
  * Read the contents of the current node, including child nodes and markup.
473
587
  * Returns a utf-8 encoded string.
474
588
  */
475
- static VALUE inner_xml(VALUE self)
589
+ static VALUE
590
+ inner_xml(VALUE self)
476
591
  {
477
592
  xmlTextReaderPtr reader;
478
- xmlChar* value;
593
+ xmlChar *value;
479
594
  VALUE str;
480
595
 
481
- Data_Get_Struct(self, xmlTextReader, reader);
596
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
482
597
 
483
598
  value = xmlTextReaderReadInnerXml(reader);
484
599
 
485
600
  str = Qnil;
486
- if(value) {
487
- str = NOKOGIRI_STR_NEW2((char*)value);
601
+ if (value) {
602
+ str = NOKOGIRI_STR_NEW2((char *)value);
488
603
  xmlFree(value);
489
604
  }
490
605
 
@@ -498,18 +613,19 @@ static VALUE inner_xml(VALUE self)
498
613
  * Read the current node and its contents, including child nodes and markup.
499
614
  * Returns a utf-8 encoded string.
500
615
  */
501
- static VALUE outer_xml(VALUE self)
616
+ static VALUE
617
+ outer_xml(VALUE self)
502
618
  {
503
619
  xmlTextReaderPtr reader;
504
620
  xmlChar *value;
505
621
  VALUE str = Qnil;
506
622
 
507
- Data_Get_Struct(self, xmlTextReader, reader);
623
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
508
624
 
509
625
  value = xmlTextReaderReadOuterXml(reader);
510
626
 
511
- if(value) {
512
- str = NOKOGIRI_STR_NEW2((char*)value);
627
+ if (value) {
628
+ str = NOKOGIRI_STR_NEW2((char *)value);
513
629
  xmlFree(value);
514
630
  }
515
631
  return str;
@@ -521,36 +637,37 @@ static VALUE outer_xml(VALUE self)
521
637
  *
522
638
  * Create a new reader that parses +string+
523
639
  */
524
- static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
640
+ static VALUE
641
+ from_memory(int argc, VALUE *argv, VALUE klass)
525
642
  {
526
643
  VALUE rb_buffer, rb_url, encoding, rb_options;
527
644
  xmlTextReaderPtr reader;
528
- const char * c_url = NULL;
529
- const char * c_encoding = NULL;
645
+ const char *c_url = NULL;
646
+ const char *c_encoding = NULL;
530
647
  int c_options = 0;
531
648
  VALUE rb_reader, args[3];
532
649
 
533
650
  rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
534
651
 
535
- if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
536
- if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
537
- if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
538
- if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
652
+ if (!RTEST(rb_buffer)) { rb_raise(rb_eArgError, "string cannot be nil"); }
653
+ if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
654
+ if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
655
+ if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
539
656
 
540
657
  reader = xmlReaderForMemory(
541
- StringValuePtr(rb_buffer),
542
- (int)RSTRING_LEN(rb_buffer),
543
- c_url,
544
- c_encoding,
545
- c_options
546
- );
547
-
548
- if(reader == NULL) {
658
+ StringValuePtr(rb_buffer),
659
+ (int)RSTRING_LEN(rb_buffer),
660
+ c_url,
661
+ c_encoding,
662
+ c_options
663
+ );
664
+
665
+ if (reader == NULL) {
549
666
  xmlFreeTextReader(reader);
550
667
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
551
668
  }
552
669
 
553
- rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
670
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
554
671
  args[0] = rb_buffer;
555
672
  args[1] = rb_url;
556
673
  args[2] = encoding;
@@ -565,37 +682,38 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
565
682
  *
566
683
  * Create a new reader that parses +io+
567
684
  */
568
- static VALUE from_io(int argc, VALUE *argv, VALUE klass)
685
+ static VALUE
686
+ from_io(int argc, VALUE *argv, VALUE klass)
569
687
  {
570
688
  VALUE rb_io, rb_url, encoding, rb_options;
571
689
  xmlTextReaderPtr reader;
572
- const char * c_url = NULL;
573
- const char * c_encoding = NULL;
690
+ const char *c_url = NULL;
691
+ const char *c_encoding = NULL;
574
692
  int c_options = 0;
575
693
  VALUE rb_reader, args[3];
576
694
 
577
695
  rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
578
696
 
579
- if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
580
- if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
581
- if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
582
- if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
697
+ if (!RTEST(rb_io)) { rb_raise(rb_eArgError, "io cannot be nil"); }
698
+ if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
699
+ if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
700
+ if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
583
701
 
584
702
  reader = xmlReaderForIO(
585
- (xmlInputReadCallback)io_read_callback,
586
- (xmlInputCloseCallback)io_close_callback,
587
- (void *)rb_io,
588
- c_url,
589
- c_encoding,
590
- c_options
591
- );
592
-
593
- if(reader == NULL) {
703
+ (xmlInputReadCallback)noko_io_read,
704
+ (xmlInputCloseCallback)noko_io_close,
705
+ (void *)rb_io,
706
+ c_url,
707
+ c_encoding,
708
+ c_options
709
+ );
710
+
711
+ if (reader == NULL) {
594
712
  xmlFreeTextReader(reader);
595
713
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
596
714
  }
597
715
 
598
- rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
716
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
599
717
  args[0] = rb_io;
600
718
  args[1] = rb_url;
601
719
  args[2] = encoding;
@@ -610,59 +728,76 @@ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
610
728
  *
611
729
  * Returns true if the current node is empty, otherwise false.
612
730
  */
613
- static VALUE empty_element_p(VALUE self)
731
+ static VALUE
732
+ empty_element_p(VALUE self)
614
733
  {
615
734
  xmlTextReaderPtr reader;
616
735
 
617
- Data_Get_Struct(self, xmlTextReader, reader);
736
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
618
737
 
619
- if(xmlTextReaderIsEmptyElement(reader))
738
+ if (xmlTextReaderIsEmptyElement(reader)) {
620
739
  return Qtrue;
740
+ }
621
741
 
622
742
  return Qfalse;
623
743
  }
624
744
 
625
- VALUE cNokogiriXmlReader;
626
-
627
- void init_xml_reader()
745
+ static VALUE
746
+ rb_xml_reader_encoding(VALUE rb_reader)
628
747
  {
629
- VALUE module = rb_define_module("Nokogiri");
630
- VALUE xml = rb_define_module_under(module, "XML");
748
+ xmlTextReaderPtr c_reader;
749
+ const char *parser_encoding;
750
+ VALUE constructor_encoding;
751
+
752
+ constructor_encoding = rb_iv_get(rb_reader, "@encoding");
753
+ if (RTEST(constructor_encoding)) {
754
+ return constructor_encoding;
755
+ }
631
756
 
757
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
758
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
759
+ if (parser_encoding == NULL) { return Qnil; }
760
+ return NOKOGIRI_STR_NEW2(parser_encoding);
761
+ }
762
+
763
+ void
764
+ noko_init_xml_reader(void)
765
+ {
632
766
  /*
633
767
  * The Reader parser allows you to effectively pull parse an XML document.
634
768
  * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
635
769
  * node. Note that you may only iterate over the document once!
636
770
  */
637
- VALUE klass = rb_define_class_under(xml, "Reader", rb_cObject);
638
-
639
- cNokogiriXmlReader = klass;
640
-
641
- rb_define_singleton_method(klass, "from_memory", from_memory, -1);
642
- rb_define_singleton_method(klass, "from_io", from_io, -1);
643
-
644
- rb_define_method(klass, "read", read_more, 0);
645
- rb_define_method(klass, "inner_xml", inner_xml, 0);
646
- rb_define_method(klass, "outer_xml", outer_xml, 0);
647
- rb_define_method(klass, "state", state, 0);
648
- rb_define_method(klass, "node_type", node_type, 0);
649
- rb_define_method(klass, "name", name, 0);
650
- rb_define_method(klass, "local_name", local_name, 0);
651
- rb_define_method(klass, "namespace_uri", namespace_uri, 0);
652
- rb_define_method(klass, "prefix", prefix, 0);
653
- rb_define_method(klass, "value", value, 0);
654
- rb_define_method(klass, "lang", lang, 0);
655
- rb_define_method(klass, "xml_version", xml_version, 0);
656
- rb_define_method(klass, "depth", depth, 0);
657
- rb_define_method(klass, "attribute_count", attribute_count, 0);
658
- rb_define_method(klass, "attribute", reader_attribute, 1);
659
- rb_define_method(klass, "namespaces", namespaces, 0);
660
- rb_define_method(klass, "attribute_at", attribute_at, 1);
661
- rb_define_method(klass, "empty_element?", empty_element_p, 0);
662
- rb_define_method(klass, "attributes?", attributes_eh, 0);
663
- rb_define_method(klass, "value?", value_eh, 0);
664
- rb_define_method(klass, "default?", default_eh, 0);
665
- rb_define_method(klass, "base_uri", base_uri, 0);
666
-
667
- rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
771
+ cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
772
+
773
+ rb_undef_alloc_func(cNokogiriXmlReader);
774
+
775
+ rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1);
776
+ rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1);
777
+
778
+ rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
779
+ rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
780
+ rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
781
+ rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
782
+ rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
783
+ rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
784
+ rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
785
+ rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
786
+ rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
787
+ rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
788
+ rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
789
+ rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
790
+ rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
791
+ rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
792
+ rb_define_method(cNokogiriXmlReader, "name", name, 0);
793
+ rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
794
+ rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
795
+ rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
796
+ rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
797
+ rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
798
+ rb_define_method(cNokogiriXmlReader, "read", read_more, 0);
799
+ rb_define_method(cNokogiriXmlReader, "state", state, 0);
800
+ rb_define_method(cNokogiriXmlReader, "value", value, 0);
801
+ rb_define_method(cNokogiriXmlReader, "value?", value_eh, 0);
802
+ rb_define_method(cNokogiriXmlReader, "xml_version", xml_version, 0);
668
803
  }