nokogiri 1.8.5 → 1.13.6

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (356) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +750 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +191 -89
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +41 -36
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +13 -18
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +291 -216
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +61 -52
  33. data/ext/nokogiri/xml_node.c +1044 -616
  34. data/ext/nokogiri/xml_node_set.c +174 -162
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +226 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +13 -17
  44. data/ext/nokogiri/xml_xpath_context.c +223 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +220 -266
  177. data/.autotest +0 -22
  178. data/.cross_rubies +0 -8
  179. data/.editorconfig +0 -17
  180. data/.gemtest +0 -0
  181. data/.travis.yml +0 -63
  182. data/CHANGELOG.md +0 -1368
  183. data/CONTRIBUTING.md +0 -42
  184. data/C_CODING_STYLE.rdoc +0 -33
  185. data/Gemfile-libxml-ruby +0 -3
  186. data/Manifest.txt +0 -370
  187. data/ROADMAP.md +0 -111
  188. data/Rakefile +0 -348
  189. data/SECURITY.md +0 -19
  190. data/STANDARD_RESPONSES.md +0 -47
  191. data/Y_U_NO_GEMSPEC.md +0 -155
  192. data/appveyor.yml +0 -29
  193. data/build_all +0 -44
  194. data/ext/nokogiri/html_document.c +0 -170
  195. data/ext/nokogiri/html_document.h +0 -10
  196. data/ext/nokogiri/html_element_description.c +0 -279
  197. data/ext/nokogiri/html_element_description.h +0 -10
  198. data/ext/nokogiri/html_entity_lookup.c +0 -32
  199. data/ext/nokogiri/html_entity_lookup.h +0 -8
  200. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  201. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  202. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  203. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  204. data/ext/nokogiri/xml_attr.h +0 -9
  205. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  206. data/ext/nokogiri/xml_cdata.h +0 -9
  207. data/ext/nokogiri/xml_comment.h +0 -9
  208. data/ext/nokogiri/xml_document.h +0 -23
  209. data/ext/nokogiri/xml_document_fragment.h +0 -10
  210. data/ext/nokogiri/xml_dtd.h +0 -10
  211. data/ext/nokogiri/xml_element_content.h +0 -10
  212. data/ext/nokogiri/xml_element_decl.h +0 -9
  213. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  214. data/ext/nokogiri/xml_entity_decl.h +0 -10
  215. data/ext/nokogiri/xml_entity_reference.h +0 -9
  216. data/ext/nokogiri/xml_io.c +0 -61
  217. data/ext/nokogiri/xml_io.h +0 -11
  218. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  219. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  220. data/ext/nokogiri/xml_namespace.h +0 -15
  221. data/ext/nokogiri/xml_node.h +0 -13
  222. data/ext/nokogiri/xml_node_set.h +0 -12
  223. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  224. data/ext/nokogiri/xml_reader.h +0 -10
  225. data/ext/nokogiri/xml_relax_ng.h +0 -9
  226. data/ext/nokogiri/xml_sax_parser.h +0 -39
  227. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  228. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  229. data/ext/nokogiri/xml_schema.h +0 -9
  230. data/ext/nokogiri/xml_syntax_error.h +0 -13
  231. data/ext/nokogiri/xml_text.h +0 -9
  232. data/ext/nokogiri/xml_xpath_context.h +0 -10
  233. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  234. data/lib/nokogiri/html/document_fragment.rb +0 -49
  235. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  236. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  237. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  238. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  239. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  240. data/patches/sort-patches-by-date +0 -25
  241. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  242. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  243. data/suppressions/README.txt +0 -1
  244. data/suppressions/nokogiri_ruby-2.supp +0 -10
  245. data/tasks/test.rb +0 -100
  246. data/test/css/test_nthiness.rb +0 -226
  247. data/test/css/test_parser.rb +0 -386
  248. data/test/css/test_tokenizer.rb +0 -215
  249. data/test/css/test_xpath_visitor.rb +0 -96
  250. data/test/decorators/test_slop.rb +0 -23
  251. data/test/files/2ch.html +0 -108
  252. data/test/files/GH_1042.html +0 -18
  253. data/test/files/address_book.rlx +0 -12
  254. data/test/files/address_book.xml +0 -10
  255. data/test/files/atom.xml +0 -344
  256. data/test/files/bar/bar.xsd +0 -4
  257. data/test/files/bogus.xml +0 -0
  258. data/test/files/dont_hurt_em_why.xml +0 -422
  259. data/test/files/encoding.html +0 -82
  260. data/test/files/encoding.xhtml +0 -84
  261. data/test/files/exslt.xml +0 -8
  262. data/test/files/exslt.xslt +0 -35
  263. data/test/files/foo/foo.xsd +0 -4
  264. data/test/files/metacharset.html +0 -10
  265. data/test/files/namespace_pressure_test.xml +0 -1684
  266. data/test/files/noencoding.html +0 -47
  267. data/test/files/po.xml +0 -32
  268. data/test/files/po.xsd +0 -66
  269. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  270. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  271. data/test/files/saml/xenc_schema.xsd +0 -146
  272. data/test/files/saml/xmldsig_schema.xsd +0 -318
  273. data/test/files/shift_jis.html +0 -10
  274. data/test/files/shift_jis.xml +0 -5
  275. data/test/files/shift_jis_no_charset.html +0 -9
  276. data/test/files/slow-xpath.xml +0 -25509
  277. data/test/files/snuggles.xml +0 -3
  278. data/test/files/staff.dtd +0 -10
  279. data/test/files/staff.xml +0 -59
  280. data/test/files/staff.xslt +0 -32
  281. data/test/files/test_document_url/bar.xml +0 -2
  282. data/test/files/test_document_url/document.dtd +0 -4
  283. data/test/files/test_document_url/document.xml +0 -6
  284. data/test/files/tlm.html +0 -851
  285. data/test/files/to_be_xincluded.xml +0 -2
  286. data/test/files/valid_bar.xml +0 -2
  287. data/test/files/xinclude.xml +0 -4
  288. data/test/helper.rb +0 -271
  289. data/test/html/sax/test_parser.rb +0 -168
  290. data/test/html/sax/test_parser_context.rb +0 -46
  291. data/test/html/sax/test_parser_text.rb +0 -163
  292. data/test/html/sax/test_push_parser.rb +0 -87
  293. data/test/html/test_attributes.rb +0 -85
  294. data/test/html/test_builder.rb +0 -164
  295. data/test/html/test_document.rb +0 -712
  296. data/test/html/test_document_encoding.rb +0 -143
  297. data/test/html/test_document_fragment.rb +0 -310
  298. data/test/html/test_element_description.rb +0 -105
  299. data/test/html/test_named_characters.rb +0 -14
  300. data/test/html/test_node.rb +0 -212
  301. data/test/html/test_node_encoding.rb +0 -91
  302. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  303. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  304. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  305. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  306. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  308. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  309. data/test/test_convert_xpath.rb +0 -135
  310. data/test/test_css_cache.rb +0 -47
  311. data/test/test_encoding_handler.rb +0 -48
  312. data/test/test_memory_leak.rb +0 -156
  313. data/test/test_nokogiri.rb +0 -138
  314. data/test/test_soap4r_sax.rb +0 -52
  315. data/test/test_xslt_transforms.rb +0 -314
  316. data/test/xml/node/test_save_options.rb +0 -28
  317. data/test/xml/node/test_subclass.rb +0 -44
  318. data/test/xml/sax/test_parser.rb +0 -402
  319. data/test/xml/sax/test_parser_context.rb +0 -115
  320. data/test/xml/sax/test_parser_text.rb +0 -202
  321. data/test/xml/sax/test_push_parser.rb +0 -265
  322. data/test/xml/test_attr.rb +0 -74
  323. data/test/xml/test_attribute_decl.rb +0 -86
  324. data/test/xml/test_builder.rb +0 -341
  325. data/test/xml/test_c14n.rb +0 -180
  326. data/test/xml/test_cdata.rb +0 -54
  327. data/test/xml/test_comment.rb +0 -40
  328. data/test/xml/test_document.rb +0 -982
  329. data/test/xml/test_document_encoding.rb +0 -31
  330. data/test/xml/test_document_fragment.rb +0 -298
  331. data/test/xml/test_dtd.rb +0 -187
  332. data/test/xml/test_dtd_encoding.rb +0 -31
  333. data/test/xml/test_element_content.rb +0 -56
  334. data/test/xml/test_element_decl.rb +0 -73
  335. data/test/xml/test_entity_decl.rb +0 -122
  336. data/test/xml/test_entity_reference.rb +0 -262
  337. data/test/xml/test_namespace.rb +0 -96
  338. data/test/xml/test_node.rb +0 -1325
  339. data/test/xml/test_node_attributes.rb +0 -115
  340. data/test/xml/test_node_encoding.rb +0 -75
  341. data/test/xml/test_node_inheritance.rb +0 -32
  342. data/test/xml/test_node_reparenting.rb +0 -592
  343. data/test/xml/test_node_set.rb +0 -809
  344. data/test/xml/test_parse_options.rb +0 -64
  345. data/test/xml/test_processing_instruction.rb +0 -30
  346. data/test/xml/test_reader.rb +0 -620
  347. data/test/xml/test_reader_encoding.rb +0 -134
  348. data/test/xml/test_relax_ng.rb +0 -60
  349. data/test/xml/test_schema.rb +0 -142
  350. data/test/xml/test_syntax_error.rb +0 -36
  351. data/test/xml/test_text.rb +0 -60
  352. data/test/xml/test_unparented_node.rb +0 -483
  353. data/test/xml/test_xinclude.rb +0 -83
  354. data/test/xml/test_xpath.rb +0 -470
  355. data/test/xslt/test_custom_functions.rb +0 -133
  356. data/test/xslt/test_exception_handling.rb +0 -37
@@ -0,0 +1,121 @@
1
+ #ifndef HAVE_XMLFIRSTELEMENTCHILD
2
+ #include <nokogiri.h>
3
+ /**
4
+ * xmlFirstElementChild:
5
+ * @parent: the parent node
6
+ *
7
+ * Finds the first child node of that element which is a Element node
8
+ * Note the handling of entities references is different than in
9
+ * the W3C DOM element traversal spec since we don't have back reference
10
+ * from entities content to entities references.
11
+ *
12
+ * Returns the first element child or NULL if not available
13
+ */
14
+ xmlNodePtr
15
+ xmlFirstElementChild(xmlNodePtr parent)
16
+ {
17
+ xmlNodePtr cur = NULL;
18
+
19
+ if (parent == NULL) {
20
+ return (NULL);
21
+ }
22
+ switch (parent->type) {
23
+ case XML_ELEMENT_NODE:
24
+ case XML_ENTITY_NODE:
25
+ case XML_DOCUMENT_NODE:
26
+ case XML_HTML_DOCUMENT_NODE:
27
+ cur = parent->children;
28
+ break;
29
+ default:
30
+ return (NULL);
31
+ }
32
+ while (cur != NULL) {
33
+ if (cur->type == XML_ELEMENT_NODE) {
34
+ return (cur);
35
+ }
36
+ cur = cur->next;
37
+ }
38
+ return (NULL);
39
+ }
40
+
41
+ /**
42
+ * xmlNextElementSibling:
43
+ * @node: the current node
44
+ *
45
+ * Finds the first closest next sibling of the node which is an
46
+ * element node.
47
+ * Note the handling of entities references is different than in
48
+ * the W3C DOM element traversal spec since we don't have back reference
49
+ * from entities content to entities references.
50
+ *
51
+ * Returns the next element sibling or NULL if not available
52
+ */
53
+ xmlNodePtr
54
+ xmlNextElementSibling(xmlNodePtr node)
55
+ {
56
+ if (node == NULL) {
57
+ return (NULL);
58
+ }
59
+ switch (node->type) {
60
+ case XML_ELEMENT_NODE:
61
+ case XML_TEXT_NODE:
62
+ case XML_CDATA_SECTION_NODE:
63
+ case XML_ENTITY_REF_NODE:
64
+ case XML_ENTITY_NODE:
65
+ case XML_PI_NODE:
66
+ case XML_COMMENT_NODE:
67
+ case XML_DTD_NODE:
68
+ case XML_XINCLUDE_START:
69
+ case XML_XINCLUDE_END:
70
+ node = node->next;
71
+ break;
72
+ default:
73
+ return (NULL);
74
+ }
75
+ while (node != NULL) {
76
+ if (node->type == XML_ELEMENT_NODE) {
77
+ return (node);
78
+ }
79
+ node = node->next;
80
+ }
81
+ return (NULL);
82
+ }
83
+
84
+ /**
85
+ * xmlLastElementChild:
86
+ * @parent: the parent node
87
+ *
88
+ * Finds the last child node of that element which is a Element node
89
+ * Note the handling of entities references is different than in
90
+ * the W3C DOM element traversal spec since we don't have back reference
91
+ * from entities content to entities references.
92
+ *
93
+ * Returns the last element child or NULL if not available
94
+ */
95
+ xmlNodePtr
96
+ xmlLastElementChild(xmlNodePtr parent)
97
+ {
98
+ xmlNodePtr cur = NULL;
99
+
100
+ if (parent == NULL) {
101
+ return (NULL);
102
+ }
103
+ switch (parent->type) {
104
+ case XML_ELEMENT_NODE:
105
+ case XML_ENTITY_NODE:
106
+ case XML_DOCUMENT_NODE:
107
+ case XML_HTML_DOCUMENT_NODE:
108
+ cur = parent->last;
109
+ break;
110
+ default:
111
+ return (NULL);
112
+ }
113
+ while (cur != NULL) {
114
+ if (cur->type == XML_ELEMENT_NODE) {
115
+ return (cur);
116
+ }
117
+ cur = cur->prev;
118
+ }
119
+ return (NULL);
120
+ }
121
+ #endif
@@ -1,101 +1,189 @@
1
1
  #include <nokogiri.h>
2
2
 
3
3
  VALUE mNokogiri ;
4
+ VALUE mNokogiriGumbo ;
5
+ VALUE mNokogiriHtml4 ;
6
+ VALUE mNokogiriHtml4Sax ;
7
+ VALUE mNokogiriHtml5 ;
4
8
  VALUE mNokogiriXml ;
5
- VALUE mNokogiriHtml ;
6
- VALUE mNokogiriXslt ;
7
9
  VALUE mNokogiriXmlSax ;
8
- VALUE mNokogiriHtmlSax ;
10
+ VALUE mNokogiriXmlXpath ;
11
+ VALUE mNokogiriXslt ;
12
+
13
+ VALUE cNokogiriSyntaxError;
14
+ VALUE cNokogiriXmlCharacterData;
15
+ VALUE cNokogiriXmlElement;
16
+ VALUE cNokogiriXmlXpathSyntaxError;
17
+
18
+ void noko_init_xml_attr(void);
19
+ void noko_init_xml_attribute_decl(void);
20
+ void noko_init_xml_cdata(void);
21
+ void noko_init_xml_comment(void);
22
+ void noko_init_xml_document(void);
23
+ void noko_init_xml_document_fragment(void);
24
+ void noko_init_xml_dtd(void);
25
+ void noko_init_xml_element_content(void);
26
+ void noko_init_xml_element_decl(void);
27
+ void noko_init_xml_encoding_handler(void);
28
+ void noko_init_xml_entity_decl(void);
29
+ void noko_init_xml_entity_reference(void);
30
+ void noko_init_xml_namespace(void);
31
+ void noko_init_xml_node(void);
32
+ void noko_init_xml_node_set(void);
33
+ void noko_init_xml_processing_instruction(void);
34
+ void noko_init_xml_reader(void);
35
+ void noko_init_xml_relax_ng(void);
36
+ void noko_init_xml_sax_parser(void);
37
+ void noko_init_xml_sax_parser_context(void);
38
+ void noko_init_xml_sax_push_parser(void);
39
+ void noko_init_xml_schema(void);
40
+ void noko_init_xml_syntax_error(void);
41
+ void noko_init_xml_text(void);
42
+ void noko_init_xml_xpath_context(void);
43
+ void noko_init_xslt_stylesheet(void);
44
+ void noko_init_html_document(void);
45
+ void noko_init_html_element_description(void);
46
+ void noko_init_html_entity_lookup(void);
47
+ void noko_init_html_sax_parser_context(void);
48
+ void noko_init_html_sax_push_parser(void);
49
+ void noko_init_gumbo(void);
50
+ void noko_init_test_global_handlers(void);
9
51
 
10
- #ifdef USE_INCLUDED_VASPRINTF
52
+ static ID id_read, id_write;
53
+
54
+
55
+ #ifndef HAVE_VASPRINTF
11
56
  /*
12
- * I srsly hate windows. it doesn't have vasprintf.
13
57
  * Thank you Geoffroy Couprie for this implementation of vasprintf!
14
58
  */
15
- int vasprintf (char **strp, const char *fmt, va_list ap)
59
+ int
60
+ vasprintf(char **strp, const char *fmt, va_list ap)
16
61
  {
17
62
  /* Mingw32/64 have a broken vsnprintf implementation that fails when
18
63
  * using a zero-byte limit in order to retrieve the required size for malloc.
19
64
  * So we use a one byte buffer instead.
20
65
  */
21
66
  char tmp[1];
22
- int len = vsnprintf (tmp, 1, fmt, ap) + 1;
67
+ int len = vsnprintf(tmp, 1, fmt, ap) + 1;
23
68
  char *res = (char *)malloc((unsigned int)len);
24
- if (res == NULL)
25
- return -1;
69
+ if (res == NULL) {
70
+ return -1;
71
+ }
26
72
  *strp = res;
27
73
  return vsnprintf(res, (unsigned int)len, fmt, ap);
28
74
  }
29
75
  #endif
30
76
 
31
- void vasprintf_free (void *p)
77
+
78
+ static VALUE
79
+ read_check(VALUE val)
32
80
  {
33
- free(p);
81
+ VALUE *args = (VALUE *)val;
82
+ return rb_funcall(args[0], id_read, 1, args[1]);
34
83
  }
35
84
 
36
- #ifdef HAVE_RUBY_UTIL_H
37
- #include "ruby/util.h"
38
- #else
39
- #include "util.h"
40
- #endif
41
85
 
42
- void nokogiri_root_node(xmlNodePtr node)
86
+ static VALUE
87
+ read_failed(VALUE arg, VALUE exc)
43
88
  {
44
- xmlDocPtr doc;
45
- nokogiriTuplePtr tuple;
89
+ return Qundef;
90
+ }
91
+
92
+
93
+ int
94
+ noko_io_read(void *ctx, char *buffer, int len)
95
+ {
96
+ VALUE string, args[2];
97
+ size_t str_len, safe_len;
98
+
99
+ args[0] = (VALUE)ctx;
100
+ args[1] = INT2NUM(len);
46
101
 
47
- doc = node->doc;
48
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
49
- tuple = (nokogiriTuplePtr)doc->_private;
50
- st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
102
+ string = rb_rescue(read_check, (VALUE)args, read_failed, 0);
103
+
104
+ if (NIL_P(string)) { return 0; }
105
+ if (string == Qundef) { return -1; }
106
+ if (TYPE(string) != T_STRING) { return -1; }
107
+
108
+ str_len = (size_t)RSTRING_LEN(string);
109
+ safe_len = str_len > (size_t)len ? (size_t)len : str_len;
110
+ memcpy(buffer, StringValuePtr(string), safe_len);
111
+
112
+ return (int)safe_len;
51
113
  }
52
114
 
53
- void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc)
115
+
116
+ static VALUE
117
+ write_check(VALUE val)
54
118
  {
55
- nokogiriTuplePtr tuple;
119
+ VALUE *args = (VALUE *)val;
120
+ return rb_funcall(args[0], id_write, 1, args[1]);
121
+ }
122
+
56
123
 
57
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
58
- tuple = (nokogiriTuplePtr)doc->_private;
59
- st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
124
+ static VALUE
125
+ write_failed(VALUE arg, VALUE exc)
126
+ {
127
+ return Qundef;
60
128
  }
61
129
 
62
- void Init_nokogiri()
130
+
131
+ int
132
+ noko_io_write(void *ctx, char *buffer, int len)
63
133
  {
64
- xmlMemSetup(
65
- (xmlFreeFunc)ruby_xfree,
66
- (xmlMallocFunc)ruby_xmalloc,
67
- (xmlReallocFunc)ruby_xrealloc,
68
- ruby_strdup
69
- );
134
+ VALUE args[2], size;
135
+
136
+ args[0] = (VALUE)ctx;
137
+ args[1] = rb_str_new(buffer, (long)len);
138
+
139
+ size = rb_rescue(write_check, (VALUE)args, write_failed, 0);
140
+
141
+ if (size == Qundef) { return -1; }
142
+
143
+ return NUM2INT(size);
144
+ }
70
145
 
146
+
147
+ int
148
+ noko_io_close(void *ctx)
149
+ {
150
+ return 0;
151
+ }
152
+
153
+
154
+ void
155
+ Init_nokogiri()
156
+ {
71
157
  mNokogiri = rb_define_module("Nokogiri");
158
+ mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
159
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
160
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
161
+ mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
72
162
  mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
73
- mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
74
- mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
75
163
  mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
76
- mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
77
-
78
- rb_const_set( mNokogiri,
79
- rb_intern("LIBXML_VERSION"),
80
- NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)
81
- );
82
- rb_const_set( mNokogiri,
83
- rb_intern("LIBXML_PARSER_VERSION"),
84
- NOKOGIRI_STR_NEW2(xmlParserVersion)
85
- );
86
-
87
- #ifdef NOKOGIRI_USE_PACKAGED_LIBRARIES
88
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qtrue);
89
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATH));
90
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATH));
91
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
92
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
164
+ mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
165
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
166
+
167
+ rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
168
+ rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
169
+
170
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
171
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
172
+
173
+ #ifdef NOKOGIRI_PACKAGED_LIBRARIES
174
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
175
+ # ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
176
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qtrue);
177
+ # else
178
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
179
+ # endif
180
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
181
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
93
182
  #else
94
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qfalse);
95
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), Qnil);
96
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), Qnil);
97
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), Qnil);
98
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), Qnil);
183
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qfalse);
184
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
185
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), Qnil);
186
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), Qnil);
99
187
  #endif
100
188
 
101
189
  #ifdef LIBXML_ICONV_ENABLED
@@ -104,38 +192,87 @@ void Init_nokogiri()
104
192
  rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
105
193
  #endif
106
194
 
195
+ #ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
196
+ rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
197
+ #endif
198
+
199
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
200
+ /*
201
+ * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
202
+ * issue in libxml 2.9.12:
203
+ *
204
+ * https://github.com/sparklemotion/nokogiri/issues/2241
205
+ *
206
+ * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
207
+ * this config only for the specific libxml2 versions 2.9.12.
208
+ *
209
+ * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
210
+ * default memory management functions (recall that this config was introduced to reduce memory
211
+ * bloat and allow Ruby to GC more often); but we should *really* test with production workloads
212
+ * before making that kind of a potentially-invasive change.
213
+ */
214
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
215
+ #else
216
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
217
+ xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
218
+ #endif
219
+
107
220
  xmlInitParser();
221
+ exsltRegisterAll();
222
+
223
+ if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
224
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
225
+ } else {
226
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
227
+ }
228
+
229
+ cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
230
+ noko_init_xml_syntax_error();
231
+ assert(cNokogiriXmlSyntaxError);
232
+ cNokogiriXmlXpathSyntaxError = rb_define_class_under(mNokogiriXmlXpath, "SyntaxError", cNokogiriXmlSyntaxError);
233
+
234
+ noko_init_xml_element_content();
235
+ noko_init_xml_encoding_handler();
236
+ noko_init_xml_namespace();
237
+ noko_init_xml_node_set();
238
+ noko_init_xml_reader();
239
+ noko_init_xml_sax_parser();
240
+ noko_init_xml_xpath_context();
241
+ noko_init_xslt_stylesheet();
242
+ noko_init_html_element_description();
243
+ noko_init_html_entity_lookup();
244
+
245
+ noko_init_xml_schema();
246
+ noko_init_xml_relax_ng();
247
+
248
+ noko_init_xml_sax_parser_context();
249
+ noko_init_html_sax_parser_context();
250
+
251
+ noko_init_xml_sax_push_parser();
252
+ noko_init_html_sax_push_parser();
253
+
254
+ noko_init_xml_node();
255
+ noko_init_xml_attr();
256
+ noko_init_xml_attribute_decl();
257
+ noko_init_xml_dtd();
258
+ noko_init_xml_element_decl();
259
+ noko_init_xml_entity_decl();
260
+ noko_init_xml_entity_reference();
261
+ noko_init_xml_processing_instruction();
262
+ assert(cNokogiriXmlNode);
263
+ cNokogiriXmlElement = rb_define_class_under(mNokogiriXml, "Element", cNokogiriXmlNode);
264
+ cNokogiriXmlCharacterData = rb_define_class_under(mNokogiriXml, "CharacterData", cNokogiriXmlNode);
265
+ noko_init_xml_comment();
266
+ noko_init_xml_text();
267
+ noko_init_xml_cdata();
268
+
269
+ noko_init_xml_document_fragment();
270
+ noko_init_xml_document();
271
+ noko_init_html_document();
272
+ noko_init_gumbo();
273
+
274
+ noko_init_test_global_handlers();
108
275
 
109
- init_xml_document();
110
- init_html_document();
111
- init_xml_node();
112
- init_xml_document_fragment();
113
- init_xml_text();
114
- init_xml_cdata();
115
- init_xml_processing_instruction();
116
- init_xml_attr();
117
- init_xml_entity_reference();
118
- init_xml_comment();
119
- init_xml_node_set();
120
- init_xml_xpath_context();
121
- init_xml_sax_parser_context();
122
- init_xml_sax_parser();
123
- init_xml_sax_push_parser();
124
- init_xml_reader();
125
- init_xml_dtd();
126
- init_xml_element_content();
127
- init_xml_attribute_decl();
128
- init_xml_element_decl();
129
- init_xml_entity_decl();
130
- init_xml_namespace();
131
- init_html_sax_parser_context();
132
- init_html_sax_push_parser();
133
- init_xslt_stylesheet();
134
- init_xml_syntax_error();
135
- init_html_entity_lookup();
136
- init_html_element_description();
137
- init_xml_schema();
138
- init_xml_relax_ng();
139
- init_nokogiri_io();
140
- init_xml_encoding_handler();
276
+ id_read = rb_intern("read");
277
+ id_write = rb_intern("write");
141
278
  }