nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -0,0 +1,121 @@
1
+ #ifndef HAVE_XMLFIRSTELEMENTCHILD
2
+ #include <nokogiri.h>
3
+ /**
4
+ * xmlFirstElementChild:
5
+ * @parent: the parent node
6
+ *
7
+ * Finds the first child node of that element which is a Element node
8
+ * Note the handling of entities references is different than in
9
+ * the W3C DOM element traversal spec since we don't have back reference
10
+ * from entities content to entities references.
11
+ *
12
+ * Returns the first element child or NULL if not available
13
+ */
14
+ xmlNodePtr
15
+ xmlFirstElementChild(xmlNodePtr parent)
16
+ {
17
+ xmlNodePtr cur = NULL;
18
+
19
+ if (parent == NULL) {
20
+ return (NULL);
21
+ }
22
+ switch (parent->type) {
23
+ case XML_ELEMENT_NODE:
24
+ case XML_ENTITY_NODE:
25
+ case XML_DOCUMENT_NODE:
26
+ case XML_HTML_DOCUMENT_NODE:
27
+ cur = parent->children;
28
+ break;
29
+ default:
30
+ return (NULL);
31
+ }
32
+ while (cur != NULL) {
33
+ if (cur->type == XML_ELEMENT_NODE) {
34
+ return (cur);
35
+ }
36
+ cur = cur->next;
37
+ }
38
+ return (NULL);
39
+ }
40
+
41
+ /**
42
+ * xmlNextElementSibling:
43
+ * @node: the current node
44
+ *
45
+ * Finds the first closest next sibling of the node which is an
46
+ * element node.
47
+ * Note the handling of entities references is different than in
48
+ * the W3C DOM element traversal spec since we don't have back reference
49
+ * from entities content to entities references.
50
+ *
51
+ * Returns the next element sibling or NULL if not available
52
+ */
53
+ xmlNodePtr
54
+ xmlNextElementSibling(xmlNodePtr node)
55
+ {
56
+ if (node == NULL) {
57
+ return (NULL);
58
+ }
59
+ switch (node->type) {
60
+ case XML_ELEMENT_NODE:
61
+ case XML_TEXT_NODE:
62
+ case XML_CDATA_SECTION_NODE:
63
+ case XML_ENTITY_REF_NODE:
64
+ case XML_ENTITY_NODE:
65
+ case XML_PI_NODE:
66
+ case XML_COMMENT_NODE:
67
+ case XML_DTD_NODE:
68
+ case XML_XINCLUDE_START:
69
+ case XML_XINCLUDE_END:
70
+ node = node->next;
71
+ break;
72
+ default:
73
+ return (NULL);
74
+ }
75
+ while (node != NULL) {
76
+ if (node->type == XML_ELEMENT_NODE) {
77
+ return (node);
78
+ }
79
+ node = node->next;
80
+ }
81
+ return (NULL);
82
+ }
83
+
84
+ /**
85
+ * xmlLastElementChild:
86
+ * @parent: the parent node
87
+ *
88
+ * Finds the last child node of that element which is a Element node
89
+ * Note the handling of entities references is different than in
90
+ * the W3C DOM element traversal spec since we don't have back reference
91
+ * from entities content to entities references.
92
+ *
93
+ * Returns the last element child or NULL if not available
94
+ */
95
+ xmlNodePtr
96
+ xmlLastElementChild(xmlNodePtr parent)
97
+ {
98
+ xmlNodePtr cur = NULL;
99
+
100
+ if (parent == NULL) {
101
+ return (NULL);
102
+ }
103
+ switch (parent->type) {
104
+ case XML_ELEMENT_NODE:
105
+ case XML_ENTITY_NODE:
106
+ case XML_DOCUMENT_NODE:
107
+ case XML_HTML_DOCUMENT_NODE:
108
+ cur = parent->last;
109
+ break;
110
+ default:
111
+ return (NULL);
112
+ }
113
+ while (cur != NULL) {
114
+ if (cur->type == XML_ELEMENT_NODE) {
115
+ return (cur);
116
+ }
117
+ cur = cur->prev;
118
+ }
119
+ return (NULL);
120
+ }
121
+ #endif
@@ -1,101 +1,221 @@
1
1
  #include <nokogiri.h>
2
2
 
3
3
  VALUE mNokogiri ;
4
+ VALUE mNokogiriGumbo ;
5
+ VALUE mNokogiriHtml4 ;
6
+ VALUE mNokogiriHtml4Sax ;
7
+ VALUE mNokogiriHtml5 ;
4
8
  VALUE mNokogiriXml ;
5
- VALUE mNokogiriHtml ;
6
- VALUE mNokogiriXslt ;
7
9
  VALUE mNokogiriXmlSax ;
8
- VALUE mNokogiriHtmlSax ;
9
-
10
- #ifdef USE_INCLUDED_VASPRINTF
11
- /*
12
- * I srsly hate windows. it doesn't have vasprintf.
13
- * Thank you Geoffroy Couprie for this implementation of vasprintf!
14
- */
15
- int vasprintf (char **strp, const char *fmt, va_list ap)
10
+ VALUE mNokogiriXmlXpath ;
11
+ VALUE mNokogiriXslt ;
12
+
13
+ VALUE cNokogiriSyntaxError;
14
+ VALUE cNokogiriXmlCharacterData;
15
+ VALUE cNokogiriXmlElement;
16
+ VALUE cNokogiriXmlXpathSyntaxError;
17
+
18
+ void noko_init_xml_attr(void);
19
+ void noko_init_xml_attribute_decl(void);
20
+ void noko_init_xml_cdata(void);
21
+ void noko_init_xml_comment(void);
22
+ void noko_init_xml_document(void);
23
+ void noko_init_xml_document_fragment(void);
24
+ void noko_init_xml_dtd(void);
25
+ void noko_init_xml_element_content(void);
26
+ void noko_init_xml_element_decl(void);
27
+ void noko_init_xml_encoding_handler(void);
28
+ void noko_init_xml_entity_decl(void);
29
+ void noko_init_xml_entity_reference(void);
30
+ void noko_init_xml_namespace(void);
31
+ void noko_init_xml_node(void);
32
+ void noko_init_xml_node_set(void);
33
+ void noko_init_xml_processing_instruction(void);
34
+ void noko_init_xml_reader(void);
35
+ void noko_init_xml_relax_ng(void);
36
+ void noko_init_xml_sax_parser(void);
37
+ void noko_init_xml_sax_parser_context(void);
38
+ void noko_init_xml_sax_push_parser(void);
39
+ void noko_init_xml_schema(void);
40
+ void noko_init_xml_syntax_error(void);
41
+ void noko_init_xml_text(void);
42
+ void noko_init_xml_xpath_context(void);
43
+ void noko_init_xslt_stylesheet(void);
44
+ void noko_init_html_document(void);
45
+ void noko_init_html_element_description(void);
46
+ void noko_init_html_entity_lookup(void);
47
+ void noko_init_html_sax_parser_context(void);
48
+ void noko_init_html_sax_push_parser(void);
49
+ void noko_init_gumbo(void);
50
+ void noko_init_test_global_handlers(void);
51
+
52
+ static ID id_read, id_write, id_external_encoding;
53
+
54
+
55
+ static VALUE
56
+ noko_io_read_check(VALUE val)
16
57
  {
17
- /* Mingw32/64 have a broken vsnprintf implementation that fails when
18
- * using a zero-byte limit in order to retrieve the required size for malloc.
19
- * So we use a one byte buffer instead.
20
- */
21
- char tmp[1];
22
- int len = vsnprintf (tmp, 1, fmt, ap) + 1;
23
- char *res = (char *)malloc((unsigned int)len);
24
- if (res == NULL)
25
- return -1;
26
- *strp = res;
27
- return vsnprintf(res, (unsigned int)len, fmt, ap);
58
+ VALUE *args = (VALUE *)val;
59
+ return rb_funcall(args[0], id_read, 1, args[1]);
28
60
  }
29
- #endif
30
61
 
31
- void vasprintf_free (void *p)
62
+
63
+ static VALUE
64
+ noko_io_read_failed(VALUE arg, VALUE exc)
32
65
  {
33
- free(p);
66
+ return Qundef;
34
67
  }
35
68
 
36
- #ifdef HAVE_RUBY_UTIL_H
37
- #include "ruby/util.h"
38
- #else
39
- #include "util.h"
40
- #endif
41
69
 
42
- void nokogiri_root_node(xmlNodePtr node)
70
+ int
71
+ noko_io_read(void *io, char *c_buffer, int c_buffer_len)
72
+ {
73
+ VALUE rb_io = (VALUE)io;
74
+ VALUE rb_read_string, rb_args[2];
75
+ size_t n_bytes_read, safe_len;
76
+
77
+ rb_args[0] = rb_io;
78
+ rb_args[1] = INT2NUM(c_buffer_len);
79
+
80
+ rb_read_string = rb_rescue(noko_io_read_check, (VALUE)rb_args, noko_io_read_failed, 0);
81
+
82
+ if (NIL_P(rb_read_string)) { return 0; }
83
+ if (rb_read_string == Qundef) { return -1; }
84
+ if (TYPE(rb_read_string) != T_STRING) { return -1; }
85
+
86
+ n_bytes_read = (size_t)RSTRING_LEN(rb_read_string);
87
+ safe_len = (n_bytes_read > (size_t)c_buffer_len) ? (size_t)c_buffer_len : n_bytes_read;
88
+ memcpy(c_buffer, StringValuePtr(rb_read_string), safe_len);
89
+
90
+ return (int)safe_len;
91
+ }
92
+
93
+
94
+ static VALUE
95
+ noko_io_write_check(VALUE rb_args)
43
96
  {
44
- xmlDocPtr doc;
45
- nokogiriTuplePtr tuple;
97
+ VALUE rb_io = ((VALUE *)rb_args)[0];
98
+ VALUE rb_output = ((VALUE *)rb_args)[1];
99
+ return rb_funcall(rb_io, id_write, 1, rb_output);
100
+ }
46
101
 
47
- doc = node->doc;
48
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
49
- tuple = (nokogiriTuplePtr)doc->_private;
50
- st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
102
+
103
+ static VALUE
104
+ noko_io_write_failed(VALUE arg, VALUE exc)
105
+ {
106
+ return Qundef;
51
107
  }
52
108
 
53
- void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc)
109
+
110
+ int
111
+ noko_io_write(void *io, char *c_buffer, int c_buffer_len)
54
112
  {
55
- nokogiriTuplePtr tuple;
113
+ VALUE rb_args[2], rb_n_bytes_written;
114
+ VALUE rb_io = (VALUE)io;
115
+ VALUE rb_enc = Qnil;
116
+ rb_encoding *io_encoding;
117
+
118
+ if (rb_respond_to(rb_io, id_external_encoding)) {
119
+ rb_enc = rb_funcall(rb_io, id_external_encoding, 0);
120
+ }
121
+ io_encoding = RB_NIL_P(rb_enc) ? rb_ascii8bit_encoding() : rb_to_encoding(rb_enc);
122
+
123
+ rb_args[0] = rb_io;
124
+ rb_args[1] = rb_enc_str_new(c_buffer, (long)c_buffer_len, io_encoding);
125
+
126
+ rb_n_bytes_written = rb_rescue(noko_io_write_check, (VALUE)rb_args, noko_io_write_failed, 0);
127
+ if (rb_n_bytes_written == Qundef) { return -1; }
128
+
129
+ return NUM2INT(rb_n_bytes_written);
130
+ }
131
+
56
132
 
57
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
58
- tuple = (nokogiriTuplePtr)doc->_private;
59
- st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
133
+ int
134
+ noko_io_close(void *io)
135
+ {
136
+ return 0;
60
137
  }
61
138
 
62
- void Init_nokogiri()
139
+
140
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
141
+ # define NOKOGIRI_WINDOWS_DLLS 1
142
+ #else
143
+ # define NOKOGIRI_WINDOWS_DLLS 0
144
+ #endif
145
+
146
+ //
147
+ // | dlls || true | false |
148
+ // | nlmm || | |
149
+ // |-----------++---------+---------|
150
+ // | NULL || default | ruby |
151
+ // | "random" || default | ruby |
152
+ // | "ruby" || ruby | ruby |
153
+ // | "default" || default | default |
154
+ //
155
+ // We choose *not* to use Ruby's memory management functions with windows DLLs because of this
156
+ // issue: https://github.com/sparklemotion/nokogiri/issues/2241
157
+ //
158
+ static void
159
+ set_libxml_memory_management(void)
63
160
  {
64
- xmlMemSetup(
65
- (xmlFreeFunc)ruby_xfree,
66
- (xmlMallocFunc)ruby_xmalloc,
67
- (xmlReallocFunc)ruby_xrealloc,
68
- ruby_strdup
69
- );
161
+ const char *nlmm = getenv("NOKOGIRI_LIBXML_MEMORY_MANAGEMENT");
162
+ if (nlmm) {
163
+ if (strcmp(nlmm, "default") == 0) {
164
+ goto libxml_uses_default_memory_management;
165
+ } else if (strcmp(nlmm, "ruby") == 0) {
166
+ goto libxml_uses_ruby_memory_management;
167
+ }
168
+ }
169
+ if (NOKOGIRI_WINDOWS_DLLS) {
170
+ libxml_uses_default_memory_management:
171
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
172
+ return;
173
+ } else {
174
+ libxml_uses_ruby_memory_management:
175
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
176
+ xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
177
+ return;
178
+ }
179
+ }
180
+
70
181
 
182
+ void
183
+ Init_nokogiri(void)
184
+ {
71
185
  mNokogiri = rb_define_module("Nokogiri");
186
+ mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
187
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
188
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
189
+ mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
72
190
  mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
73
- mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
74
- mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
75
191
  mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
76
- mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
77
-
78
- rb_const_set( mNokogiri,
79
- rb_intern("LIBXML_VERSION"),
80
- NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)
81
- );
82
- rb_const_set( mNokogiri,
83
- rb_intern("LIBXML_PARSER_VERSION"),
84
- NOKOGIRI_STR_NEW2(xmlParserVersion)
85
- );
86
-
87
- #ifdef NOKOGIRI_USE_PACKAGED_LIBRARIES
88
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qtrue);
89
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATH));
90
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATH));
91
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
92
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
192
+ mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
193
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
194
+
195
+ set_libxml_memory_management(); /* must be before any function calls that might invoke xmlInitParser() */
196
+ xmlInitParser();
197
+ exsltRegisterAll();
198
+
199
+ rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
200
+ rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
201
+
202
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
203
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
204
+
205
+ #ifdef NOKOGIRI_PACKAGED_LIBRARIES
206
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
207
+ # ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
208
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qtrue);
209
+ # else
210
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
211
+ # endif
212
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
213
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
93
214
  #else
94
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qfalse);
95
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), Qnil);
96
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), Qnil);
97
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), Qnil);
98
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), Qnil);
215
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qfalse);
216
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
217
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), Qnil);
218
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), Qnil);
99
219
  #endif
100
220
 
101
221
  #ifdef LIBXML_ICONV_ENABLED
@@ -104,38 +224,64 @@ void Init_nokogiri()
104
224
  rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
105
225
  #endif
106
226
 
107
- xmlInitParser();
227
+ #ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
228
+ rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
229
+ #endif
230
+
231
+ if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
232
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
233
+ } else {
234
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
235
+ }
236
+
237
+ cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
238
+ noko_init_xml_syntax_error();
239
+ assert(cNokogiriXmlSyntaxError);
240
+ cNokogiriXmlXpathSyntaxError = rb_define_class_under(mNokogiriXmlXpath, "SyntaxError", cNokogiriXmlSyntaxError);
241
+
242
+ noko_init_xml_element_content();
243
+ noko_init_xml_encoding_handler();
244
+ noko_init_xml_namespace();
245
+ noko_init_xml_node_set();
246
+ noko_init_xml_reader();
247
+ noko_init_xml_sax_parser();
248
+ noko_init_xml_xpath_context();
249
+ noko_init_xslt_stylesheet();
250
+ noko_init_html_element_description();
251
+ noko_init_html_entity_lookup();
252
+
253
+ noko_init_xml_schema();
254
+ noko_init_xml_relax_ng();
255
+
256
+ noko_init_xml_sax_parser_context();
257
+ noko_init_html_sax_parser_context();
258
+
259
+ noko_init_xml_sax_push_parser();
260
+ noko_init_html_sax_push_parser();
261
+
262
+ noko_init_xml_node();
263
+ noko_init_xml_attr();
264
+ noko_init_xml_attribute_decl();
265
+ noko_init_xml_dtd();
266
+ noko_init_xml_element_decl();
267
+ noko_init_xml_entity_decl();
268
+ noko_init_xml_entity_reference();
269
+ noko_init_xml_processing_instruction();
270
+ assert(cNokogiriXmlNode);
271
+ cNokogiriXmlElement = rb_define_class_under(mNokogiriXml, "Element", cNokogiriXmlNode);
272
+ cNokogiriXmlCharacterData = rb_define_class_under(mNokogiriXml, "CharacterData", cNokogiriXmlNode);
273
+ noko_init_xml_comment();
274
+ noko_init_xml_text();
275
+ noko_init_xml_cdata();
276
+
277
+ noko_init_xml_document_fragment();
278
+ noko_init_xml_document();
279
+ noko_init_html_document();
280
+ noko_init_gumbo();
281
+
282
+ noko_init_test_global_handlers();
108
283
 
109
- init_xml_document();
110
- init_html_document();
111
- init_xml_node();
112
- init_xml_document_fragment();
113
- init_xml_text();
114
- init_xml_cdata();
115
- init_xml_processing_instruction();
116
- init_xml_attr();
117
- init_xml_entity_reference();
118
- init_xml_comment();
119
- init_xml_node_set();
120
- init_xml_xpath_context();
121
- init_xml_sax_parser_context();
122
- init_xml_sax_parser();
123
- init_xml_sax_push_parser();
124
- init_xml_reader();
125
- init_xml_dtd();
126
- init_xml_element_content();
127
- init_xml_attribute_decl();
128
- init_xml_element_decl();
129
- init_xml_entity_decl();
130
- init_xml_namespace();
131
- init_html_sax_parser_context();
132
- init_html_sax_push_parser();
133
- init_xslt_stylesheet();
134
- init_xml_syntax_error();
135
- init_html_entity_lookup();
136
- init_html_element_description();
137
- init_xml_schema();
138
- init_xml_relax_ng();
139
- init_nokogiri_io();
140
- init_xml_encoding_handler();
284
+ id_read = rb_intern("read");
285
+ id_write = rb_intern("write");
286
+ id_external_encoding = rb_intern("external_encoding");
141
287
  }