nokogiri 1.8.5 → 1.13.6

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (356) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +750 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +191 -89
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +41 -36
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +13 -18
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +291 -216
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +61 -52
  33. data/ext/nokogiri/xml_node.c +1044 -616
  34. data/ext/nokogiri/xml_node_set.c +174 -162
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +226 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +13 -17
  44. data/ext/nokogiri/xml_xpath_context.c +223 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +220 -266
  177. data/.autotest +0 -22
  178. data/.cross_rubies +0 -8
  179. data/.editorconfig +0 -17
  180. data/.gemtest +0 -0
  181. data/.travis.yml +0 -63
  182. data/CHANGELOG.md +0 -1368
  183. data/CONTRIBUTING.md +0 -42
  184. data/C_CODING_STYLE.rdoc +0 -33
  185. data/Gemfile-libxml-ruby +0 -3
  186. data/Manifest.txt +0 -370
  187. data/ROADMAP.md +0 -111
  188. data/Rakefile +0 -348
  189. data/SECURITY.md +0 -19
  190. data/STANDARD_RESPONSES.md +0 -47
  191. data/Y_U_NO_GEMSPEC.md +0 -155
  192. data/appveyor.yml +0 -29
  193. data/build_all +0 -44
  194. data/ext/nokogiri/html_document.c +0 -170
  195. data/ext/nokogiri/html_document.h +0 -10
  196. data/ext/nokogiri/html_element_description.c +0 -279
  197. data/ext/nokogiri/html_element_description.h +0 -10
  198. data/ext/nokogiri/html_entity_lookup.c +0 -32
  199. data/ext/nokogiri/html_entity_lookup.h +0 -8
  200. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  201. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  202. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  203. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  204. data/ext/nokogiri/xml_attr.h +0 -9
  205. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  206. data/ext/nokogiri/xml_cdata.h +0 -9
  207. data/ext/nokogiri/xml_comment.h +0 -9
  208. data/ext/nokogiri/xml_document.h +0 -23
  209. data/ext/nokogiri/xml_document_fragment.h +0 -10
  210. data/ext/nokogiri/xml_dtd.h +0 -10
  211. data/ext/nokogiri/xml_element_content.h +0 -10
  212. data/ext/nokogiri/xml_element_decl.h +0 -9
  213. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  214. data/ext/nokogiri/xml_entity_decl.h +0 -10
  215. data/ext/nokogiri/xml_entity_reference.h +0 -9
  216. data/ext/nokogiri/xml_io.c +0 -61
  217. data/ext/nokogiri/xml_io.h +0 -11
  218. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  219. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  220. data/ext/nokogiri/xml_namespace.h +0 -15
  221. data/ext/nokogiri/xml_node.h +0 -13
  222. data/ext/nokogiri/xml_node_set.h +0 -12
  223. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  224. data/ext/nokogiri/xml_reader.h +0 -10
  225. data/ext/nokogiri/xml_relax_ng.h +0 -9
  226. data/ext/nokogiri/xml_sax_parser.h +0 -39
  227. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  228. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  229. data/ext/nokogiri/xml_schema.h +0 -9
  230. data/ext/nokogiri/xml_syntax_error.h +0 -13
  231. data/ext/nokogiri/xml_text.h +0 -9
  232. data/ext/nokogiri/xml_xpath_context.h +0 -10
  233. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  234. data/lib/nokogiri/html/document_fragment.rb +0 -49
  235. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  236. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  237. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  238. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  239. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  240. data/patches/sort-patches-by-date +0 -25
  241. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  242. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  243. data/suppressions/README.txt +0 -1
  244. data/suppressions/nokogiri_ruby-2.supp +0 -10
  245. data/tasks/test.rb +0 -100
  246. data/test/css/test_nthiness.rb +0 -226
  247. data/test/css/test_parser.rb +0 -386
  248. data/test/css/test_tokenizer.rb +0 -215
  249. data/test/css/test_xpath_visitor.rb +0 -96
  250. data/test/decorators/test_slop.rb +0 -23
  251. data/test/files/2ch.html +0 -108
  252. data/test/files/GH_1042.html +0 -18
  253. data/test/files/address_book.rlx +0 -12
  254. data/test/files/address_book.xml +0 -10
  255. data/test/files/atom.xml +0 -344
  256. data/test/files/bar/bar.xsd +0 -4
  257. data/test/files/bogus.xml +0 -0
  258. data/test/files/dont_hurt_em_why.xml +0 -422
  259. data/test/files/encoding.html +0 -82
  260. data/test/files/encoding.xhtml +0 -84
  261. data/test/files/exslt.xml +0 -8
  262. data/test/files/exslt.xslt +0 -35
  263. data/test/files/foo/foo.xsd +0 -4
  264. data/test/files/metacharset.html +0 -10
  265. data/test/files/namespace_pressure_test.xml +0 -1684
  266. data/test/files/noencoding.html +0 -47
  267. data/test/files/po.xml +0 -32
  268. data/test/files/po.xsd +0 -66
  269. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  270. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  271. data/test/files/saml/xenc_schema.xsd +0 -146
  272. data/test/files/saml/xmldsig_schema.xsd +0 -318
  273. data/test/files/shift_jis.html +0 -10
  274. data/test/files/shift_jis.xml +0 -5
  275. data/test/files/shift_jis_no_charset.html +0 -9
  276. data/test/files/slow-xpath.xml +0 -25509
  277. data/test/files/snuggles.xml +0 -3
  278. data/test/files/staff.dtd +0 -10
  279. data/test/files/staff.xml +0 -59
  280. data/test/files/staff.xslt +0 -32
  281. data/test/files/test_document_url/bar.xml +0 -2
  282. data/test/files/test_document_url/document.dtd +0 -4
  283. data/test/files/test_document_url/document.xml +0 -6
  284. data/test/files/tlm.html +0 -851
  285. data/test/files/to_be_xincluded.xml +0 -2
  286. data/test/files/valid_bar.xml +0 -2
  287. data/test/files/xinclude.xml +0 -4
  288. data/test/helper.rb +0 -271
  289. data/test/html/sax/test_parser.rb +0 -168
  290. data/test/html/sax/test_parser_context.rb +0 -46
  291. data/test/html/sax/test_parser_text.rb +0 -163
  292. data/test/html/sax/test_push_parser.rb +0 -87
  293. data/test/html/test_attributes.rb +0 -85
  294. data/test/html/test_builder.rb +0 -164
  295. data/test/html/test_document.rb +0 -712
  296. data/test/html/test_document_encoding.rb +0 -143
  297. data/test/html/test_document_fragment.rb +0 -310
  298. data/test/html/test_element_description.rb +0 -105
  299. data/test/html/test_named_characters.rb +0 -14
  300. data/test/html/test_node.rb +0 -212
  301. data/test/html/test_node_encoding.rb +0 -91
  302. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  303. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  304. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  305. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  306. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  308. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  309. data/test/test_convert_xpath.rb +0 -135
  310. data/test/test_css_cache.rb +0 -47
  311. data/test/test_encoding_handler.rb +0 -48
  312. data/test/test_memory_leak.rb +0 -156
  313. data/test/test_nokogiri.rb +0 -138
  314. data/test/test_soap4r_sax.rb +0 -52
  315. data/test/test_xslt_transforms.rb +0 -314
  316. data/test/xml/node/test_save_options.rb +0 -28
  317. data/test/xml/node/test_subclass.rb +0 -44
  318. data/test/xml/sax/test_parser.rb +0 -402
  319. data/test/xml/sax/test_parser_context.rb +0 -115
  320. data/test/xml/sax/test_parser_text.rb +0 -202
  321. data/test/xml/sax/test_push_parser.rb +0 -265
  322. data/test/xml/test_attr.rb +0 -74
  323. data/test/xml/test_attribute_decl.rb +0 -86
  324. data/test/xml/test_builder.rb +0 -341
  325. data/test/xml/test_c14n.rb +0 -180
  326. data/test/xml/test_cdata.rb +0 -54
  327. data/test/xml/test_comment.rb +0 -40
  328. data/test/xml/test_document.rb +0 -982
  329. data/test/xml/test_document_encoding.rb +0 -31
  330. data/test/xml/test_document_fragment.rb +0 -298
  331. data/test/xml/test_dtd.rb +0 -187
  332. data/test/xml/test_dtd_encoding.rb +0 -31
  333. data/test/xml/test_element_content.rb +0 -56
  334. data/test/xml/test_element_decl.rb +0 -73
  335. data/test/xml/test_entity_decl.rb +0 -122
  336. data/test/xml/test_entity_reference.rb +0 -262
  337. data/test/xml/test_namespace.rb +0 -96
  338. data/test/xml/test_node.rb +0 -1325
  339. data/test/xml/test_node_attributes.rb +0 -115
  340. data/test/xml/test_node_encoding.rb +0 -75
  341. data/test/xml/test_node_inheritance.rb +0 -32
  342. data/test/xml/test_node_reparenting.rb +0 -592
  343. data/test/xml/test_node_set.rb +0 -809
  344. data/test/xml/test_parse_options.rb +0 -64
  345. data/test/xml/test_processing_instruction.rb +0 -30
  346. data/test/xml/test_reader.rb +0 -620
  347. data/test/xml/test_reader_encoding.rb +0 -134
  348. data/test/xml/test_relax_ng.rb +0 -60
  349. data/test/xml/test_schema.rb +0 -142
  350. data/test/xml/test_syntax_error.rb +0 -36
  351. data/test/xml/test_text.rb +0 -60
  352. data/test/xml/test_unparented_node.rb +0 -483
  353. data/test/xml/test_xinclude.rb +0 -83
  354. data/test/xml/test_xpath.rb +0 -470
  355. data/test/xslt/test_custom_functions.rb +0 -133
  356. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,143 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- require "helper"
3
-
4
- module Nokogiri
5
- module HTML
6
- class TestDocumentEncoding < Nokogiri::TestCase
7
- def test_encoding
8
- doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
9
-
10
- hello = "こんにちは"
11
-
12
- assert_match doc.encoding, doc.to_html
13
- assert_match hello.encode('Shift_JIS'), doc.to_html
14
- assert_equal 'Shift_JIS', doc.to_html.encoding.name
15
-
16
- assert_match hello, doc.to_html(:encoding => 'UTF-8')
17
- assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
18
- assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
19
- end
20
-
21
- def test_encoding_without_charset
22
- doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
23
-
24
- hello = "こんにちは"
25
-
26
- assert_match hello, doc.content
27
- assert_match hello, doc.to_html(:encoding => 'UTF-8')
28
- assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
29
- end
30
-
31
- def test_default_to_encoding_from_string
32
- bad_charset = <<-eohtml
33
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
34
- <html>
35
- <head>
36
- <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
37
- </head>
38
- <body>
39
- <a href="http://tenderlovemaking.com/">blah!</a>
40
- </body>
41
- </html>
42
- eohtml
43
- doc = Nokogiri::HTML(bad_charset)
44
- assert_equal bad_charset.encoding.name, doc.encoding
45
-
46
- doc = Nokogiri.parse(bad_charset)
47
- assert_equal bad_charset.encoding.name, doc.encoding
48
- end
49
-
50
- def test_encoding_non_utf8
51
- orig = '日本語が上手です'
52
- bin = Encoding::ASCII_8BIT
53
- [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
54
- html = <<-eohtml.encode(enc)
55
- <html>
56
- <meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
57
- <title xml:lang="ja">#{orig}</title></html>
58
- eohtml
59
- text = Nokogiri::HTML.parse(html).at('title').inner_text
60
- assert_equal(
61
- orig.encode(enc).force_encoding(bin),
62
- text.encode(enc).force_encoding(bin)
63
- )
64
- end
65
- end
66
-
67
- def test_encoding_with_a_bad_name
68
- bad_charset = <<-eohtml
69
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
70
- <html>
71
- <head>
72
- <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
73
- </head>
74
- <body>
75
- <a href="http://tenderlovemaking.com/">blah!</a>
76
- </body>
77
- </html>
78
- eohtml
79
- doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
80
- assert_equal ['http://tenderlovemaking.com/'],
81
- doc.css('a').map { |a| a['href'] }
82
- end
83
-
84
- def test_empty_doc_encoding
85
- encoding = 'US-ASCII'
86
- assert_equal encoding, Nokogiri::HTML.parse(nil, nil, encoding).encoding
87
- end
88
- end
89
-
90
- class TestDocumentEncodingDetection < Nokogiri::TestCase
91
- def binread(file)
92
- IO.binread(file)
93
- end
94
-
95
- def binopen(file)
96
- File.open(file, 'rb')
97
- end
98
-
99
- def test_document_html_noencoding
100
- from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
101
- from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
102
-
103
- assert_equal from_string.to_s.size, from_stream.to_s.size
104
- end
105
-
106
- def test_document_html_charset
107
- html = Nokogiri::HTML(binopen(METACHARSET_FILE))
108
- assert_equal 'iso-2022-jp', html.encoding
109
- assert_equal 'たこ焼き仮面', html.title
110
- end
111
-
112
- def test_document_xhtml_enc
113
- [ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
114
- doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
115
- ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map(&:text)
116
-
117
- doc_from_string = Nokogiri::HTML(binread(file))
118
- ary_from_string = doc_from_string.xpath('//p/text()').map(&:text)
119
-
120
- doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
121
- ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map(&:text)
122
-
123
- doc_from_file = Nokogiri::HTML(binopen(file))
124
- ary_from_file = doc_from_file.xpath('//p/text()').map(&:text)
125
-
126
- title = 'たこ焼き仮面'
127
-
128
- assert_equal(title, doc_from_string_enc.at('//title/text()').text)
129
- assert_equal(title, doc_from_string.at('//title/text()').text)
130
- assert_equal(title, doc_from_file_enc.at('//title/text()').text)
131
- assert_equal(title, doc_from_file.at('//title/text()').text)
132
-
133
- evil = (0..72).map { |i| '超' * i + '悪い事を構想中。' }
134
-
135
- assert_equal(evil, ary_from_string_enc)
136
- assert_equal(evil, ary_from_string)
137
- assert_equal(evil, ary_from_file_enc)
138
- assert_equal(evil, ary_from_file)
139
- }
140
- end
141
- end
142
- end
143
- end
@@ -1,310 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- require "helper"
3
-
4
- module Nokogiri
5
- module HTML
6
- class TestDocumentFragment < Nokogiri::TestCase
7
- def setup
8
- super
9
- @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
10
- end
11
-
12
- def test_ascii_8bit_encoding
13
- s = String.new 'hello'
14
- s.force_encoding ::Encoding::ASCII_8BIT
15
- assert_equal "hello", Nokogiri::HTML::DocumentFragment.parse(s).to_html
16
- end
17
-
18
- def test_inspect_encoding
19
- fragment = "<div>こんにちは!</div>".encode('EUC-JP')
20
- f = Nokogiri::HTML::DocumentFragment.parse fragment
21
- assert_equal "こんにちは!", f.content
22
- end
23
-
24
- def test_html_parse_encoding
25
- fragment = "<div>こんにちは!</div>".encode 'EUC-JP'
26
- f = Nokogiri::HTML.fragment fragment
27
- assert_equal 'EUC-JP', f.document.encoding
28
- assert_equal "こんにちは!", f.content
29
- end
30
-
31
- def test_unlink_empty_document
32
- frag = Nokogiri::HTML::DocumentFragment.parse('').unlink # must_not_raise
33
- assert_nil frag.parent
34
- end
35
-
36
- def test_colons_are_not_removed
37
- doc = Nokogiri::HTML::DocumentFragment.parse("<span>3:30pm</span>")
38
- assert_match(/3:30/, doc.to_s)
39
- end
40
-
41
- def test_parse_encoding
42
- fragment = "<div>hello world</div>"
43
- f = Nokogiri::HTML::DocumentFragment.parse fragment, 'ISO-8859-1'
44
- assert_equal 'ISO-8859-1', f.document.encoding
45
- assert_equal "hello world", f.content
46
- end
47
-
48
- def test_html_parse_with_encoding
49
- fragment = "<div>hello world</div>"
50
- f = Nokogiri::HTML.fragment fragment, 'ISO-8859-1'
51
- assert_equal 'ISO-8859-1', f.document.encoding
52
- assert_equal "hello world", f.content
53
- end
54
-
55
- def test_parse_in_context
56
- assert_equal('<br>', @html.root.parse('<br />').to_s)
57
- end
58
-
59
- def test_inner_html=
60
- fragment = Nokogiri::HTML.fragment '<hr />'
61
-
62
- fragment.inner_html = "hello"
63
- assert_equal 'hello', fragment.inner_html
64
- end
65
-
66
- def test_ancestors_search
67
- html = %q{
68
- <div>
69
- <ul>
70
- <li>foo</li>
71
- </ul>
72
- </div>
73
- }
74
- fragment = Nokogiri::HTML.fragment html
75
- li = fragment.at('li')
76
- assert li.matches?('li')
77
- end
78
-
79
- def test_fun_encoding
80
- string = %Q(<body>こんにちは</body>)
81
- html = Nokogiri::HTML::DocumentFragment.parse(
82
- string
83
- ).to_html(:encoding => 'UTF-8')
84
- assert_equal string, html
85
- end
86
-
87
- def test_new
88
- assert Nokogiri::HTML::DocumentFragment.new(@html)
89
- end
90
-
91
- def test_body_fragment_should_contain_body
92
- fragment = Nokogiri::HTML::DocumentFragment.parse(" <body><div>foo</div></body>")
93
- assert_match(/^<body>/, fragment.to_s)
94
- end
95
-
96
- def test_nonbody_fragment_should_not_contain_body
97
- fragment = Nokogiri::HTML::DocumentFragment.parse("<div>foo</div>")
98
- assert_match(/^<div>/, fragment.to_s)
99
- end
100
-
101
- def test_fragment_should_have_document
102
- fragment = Nokogiri::HTML::DocumentFragment.new(@html)
103
- assert_equal @html, fragment.document
104
- end
105
-
106
- def test_empty_fragment_should_be_searchable_by_css
107
- fragment = Nokogiri::HTML.fragment("")
108
- assert_equal 0, fragment.css("a").size
109
- end
110
-
111
- def test_empty_fragment_should_be_searchable
112
- fragment = Nokogiri::HTML.fragment("")
113
- assert_equal 0, fragment.search("//a").size
114
- end
115
-
116
- def test_name
117
- fragment = Nokogiri::HTML::DocumentFragment.new(@html)
118
- assert_equal '#document-fragment', fragment.name
119
- end
120
-
121
- def test_static_method
122
- fragment = Nokogiri::HTML::DocumentFragment.parse("<div>a</div>")
123
- assert_instance_of Nokogiri::HTML::DocumentFragment, fragment
124
- end
125
-
126
- def test_many_fragments
127
- 100.times { Nokogiri::HTML::DocumentFragment.new(@html) }
128
- end
129
-
130
- def test_subclass
131
- klass = Class.new(Nokogiri::HTML::DocumentFragment)
132
- fragment = klass.new(@html, "<div>a</div>")
133
- assert_instance_of klass, fragment
134
- end
135
-
136
- def test_subclass_parse
137
- klass = Class.new(Nokogiri::HTML::DocumentFragment)
138
- doc = klass.parse("<div>a</div>")
139
- assert_instance_of klass, doc
140
- end
141
-
142
- def test_html_fragment
143
- fragment = Nokogiri::HTML.fragment("<div>a</div>")
144
- assert_equal "<div>a</div>", fragment.to_s
145
- end
146
-
147
- def test_html_fragment_has_outer_text
148
- doc = "a<div>b</div>c"
149
- fragment = Nokogiri::HTML::Document.new.fragment(doc)
150
- if Nokogiri.uses_libxml? &&
151
- Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16"
152
- assert_equal "a<div>b</div><p>c</p>", fragment.to_s
153
- else
154
- assert_equal "a<div>b</div>c", fragment.to_s
155
- end
156
- end
157
-
158
- def test_html_fragment_case_insensitivity
159
- doc = "<Div>b</Div>"
160
- fragment = Nokogiri::HTML::Document.new.fragment(doc)
161
- assert_equal "<div>b</div>", fragment.to_s
162
- end
163
-
164
- def test_html_fragment_with_leading_whitespace
165
- doc = " <div>b</div> "
166
- fragment = Nokogiri::HTML::Document.new.fragment(doc)
167
- assert_match %r% <div>b</div> *%, fragment.to_s
168
- end
169
-
170
- def test_html_fragment_with_leading_whitespace_and_newline
171
- doc = " \n<div>b</div> "
172
- fragment = Nokogiri::HTML::Document.new.fragment(doc)
173
- assert_match %r% \n<div>b</div> *%, fragment.to_s
174
- end
175
-
176
- def test_html_fragment_with_input_and_intermediate_whitespace
177
- doc = "<label>Label</label><input type=\"text\"> <span>span</span>"
178
- fragment = Nokogiri::HTML::Document.new.fragment(doc)
179
- assert_equal "<label>Label</label><input type=\"text\"> <span>span</span>", fragment.to_s
180
- end
181
-
182
- def test_html_fragment_with_leading_text_and_newline
183
- fragment = HTML::Document.new.fragment("First line\nSecond line<br>Broken line")
184
- assert_equal fragment.to_s, "First line\nSecond line<br>Broken line"
185
- end
186
-
187
- def test_html_fragment_with_leading_whitespace_and_text_and_newline
188
- fragment = HTML::Document.new.fragment(" First line\nSecond line<br>Broken line")
189
- assert_equal " First line\nSecond line<br>Broken line", fragment.to_s
190
- end
191
-
192
- def test_html_fragment_with_leading_entity
193
- failed = "&quot;test<br/>test&quot;"
194
- fragment = Nokogiri::HTML::DocumentFragment.parse(failed)
195
- assert_equal '"test<br>test"', fragment.to_html
196
- end
197
-
198
- def test_to_s
199
- doc = "<span>foo<br></span><span>bar</span>"
200
- fragment = Nokogiri::HTML::Document.new.fragment(doc)
201
- assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_s
202
- end
203
-
204
- def test_to_html
205
- doc = "<span>foo<br></span><span>bar</span>"
206
- fragment = Nokogiri::HTML::Document.new.fragment(doc)
207
- assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_html
208
- end
209
-
210
- def test_to_xhtml
211
- doc = "<span>foo<br></span><span>bar</span><p></p>"
212
- fragment = Nokogiri::HTML::Document.new.fragment(doc)
213
- if Nokogiri.jruby? || Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
214
- assert_equal "<span>foo<br /></span><span>bar</span><p></p>", fragment.to_xhtml
215
- else
216
- # FIXME: why are we doing this ? this violates the spec,
217
- # see http://www.w3.org/TR/xhtml1/#C_2
218
- assert_equal "<span>foo<br></span><span>bar</span><p></p>", fragment.to_xhtml
219
- end
220
- end
221
-
222
- def test_to_xml
223
- doc = "<span>foo<br></span><span>bar</span>"
224
- fragment = Nokogiri::HTML::Document.new.fragment(doc)
225
- assert_equal "<span>foo<br/></span><span>bar</span>", fragment.to_xml
226
- end
227
-
228
- def test_fragment_script_tag_with_cdata
229
- doc = HTML::Document.new
230
- fragment = doc.fragment("<script>var foo = 'bar';</script>")
231
- assert_equal("<script>var foo = 'bar';</script>",
232
- fragment.to_s)
233
- end
234
-
235
- def test_fragment_with_comment
236
- doc = HTML::Document.new
237
- fragment = doc.fragment("<p>hello<!-- your ad here --></p>")
238
- assert_equal("<p>hello<!-- your ad here --></p>",
239
- fragment.to_s)
240
- end
241
-
242
- def test_element_children_counts
243
- if Nokogiri.uses_libxml? && Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.9.1"
244
- skip "#elements doesn't work in 2.9.1, see 1793a5a for history"
245
- end
246
- doc = Nokogiri::HTML::DocumentFragment.parse(" <div> </div>\n ")
247
- assert_equal 1, doc.element_children.count
248
- end
249
-
250
- def test_malformed_fragment_is_corrected
251
- fragment = HTML::DocumentFragment.parse("<div </div>")
252
- assert_equal "<div></div>", fragment.to_s
253
- end
254
-
255
- def test_unclosed_script_tag
256
- # see GH#315
257
- fragment = HTML::DocumentFragment.parse("foo <script>bar")
258
- assert_equal "foo <script>bar</script>", fragment.to_html
259
- end
260
-
261
- def test_error_propagation_on_fragment_parse
262
- frag = Nokogiri::HTML::DocumentFragment.parse "<hello>oh, hello there.</hello>"
263
- assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be copied to the fragment"
264
- end
265
-
266
- def test_error_propagation_on_fragment_parse_in_node_context
267
- doc = Nokogiri::HTML::Document.parse "<html><body><div></div></body></html>"
268
- context_node = doc.at_css "div"
269
- frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
270
- assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
271
- end
272
-
273
- def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors
274
- doc = Nokogiri::HTML::Document.parse "<html><body><div></div><jimmy></jimmy></body></html>"
275
- assert doc.errors.any?{|err| err.to_s =~ /jimmy/}, "assert on setup"
276
-
277
- context_node = doc.at_css "div"
278
- frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
279
- assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
280
- assert frag.errors.none?{|err| err.to_s =~ /jimmy/}, "errors should not include pre-existing document errors"
281
- end
282
-
283
- def test_capturing_nonparse_errors_during_fragment_clone
284
- # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
285
- original = Nokogiri::HTML.fragment("<div id='unique'></div><div id='unique'></div>")
286
- original_errors = original.errors.dup
287
-
288
- copy = original.dup
289
- assert_equal original_errors, copy.errors
290
- end
291
-
292
- def test_capturing_nonparse_errors_during_node_copy_between_fragments
293
- # Errors should be emitted while parsing only, and should not change when moving nodes.
294
- frag1 = Nokogiri::HTML.fragment("<diva id='unique'>one</diva>")
295
- frag2 = Nokogiri::HTML.fragment("<dive id='unique'>two</dive>")
296
- node1 = frag1.at_css("#unique")
297
- node2 = frag2.at_css("#unique")
298
- original_errors1 = frag1.errors.dup
299
- original_errors2 = frag2.errors.dup
300
- assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
301
- assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
302
-
303
- node1.add_child node2
304
-
305
- assert_equal original_errors1, frag1.errors
306
- assert_equal original_errors2, frag2.errors
307
- end
308
- end
309
- end
310
- end
@@ -1,105 +0,0 @@
1
- require "helper"
2
-
3
- module Nokogiri
4
- module HTML
5
- class TestElementDescription < Nokogiri::TestCase
6
- def test_fetch_nonexistent
7
- assert_nil ElementDescription['foo']
8
- end
9
-
10
- def test_fetch_element_description
11
- assert desc = ElementDescription['a']
12
- assert_instance_of ElementDescription, desc
13
- end
14
-
15
- def test_name
16
- assert_equal 'a', ElementDescription['a'].name
17
- end
18
-
19
- def test_implied_start_tag?
20
- assert !ElementDescription['a'].implied_start_tag?
21
- end
22
-
23
- def test_implied_end_tag?
24
- assert !ElementDescription['a'].implied_end_tag?
25
- assert ElementDescription['p'].implied_end_tag?
26
- end
27
-
28
- def test_save_end_tag?
29
- assert !ElementDescription['a'].save_end_tag?
30
- assert ElementDescription['br'].save_end_tag?
31
- end
32
-
33
- def test_empty?
34
- assert ElementDescription['br'].empty?
35
- assert !ElementDescription['a'].empty?
36
- end
37
-
38
- def test_deprecated?
39
- assert ElementDescription['applet'].deprecated?
40
- assert !ElementDescription['br'].deprecated?
41
- end
42
-
43
- def test_inline?
44
- assert ElementDescription['a'].inline?
45
- assert !ElementDescription['div'].inline?
46
- end
47
-
48
- def test_block?
49
- element = ElementDescription['a']
50
- assert_equal(!element.inline?, element.block?)
51
- end
52
-
53
- def test_description
54
- assert ElementDescription['a'].description
55
- end
56
-
57
- def test_subelements
58
- sub_elements = ElementDescription['body'].sub_elements
59
- if Nokogiri.uses_libxml? && Nokogiri::LIBXML_VERSION >= '2.7.7'
60
- assert_equal 65, sub_elements.length
61
- elsif Nokogiri.uses_libxml?
62
- assert_equal 61, sub_elements.length
63
- else
64
- assert sub_elements.length > 0
65
- end
66
- end
67
-
68
- def test_default_sub_element
69
- assert_equal 'div', ElementDescription['body'].default_sub_element
70
- end
71
-
72
- def test_null_default_sub_element
73
- doc = Nokogiri::HTML('foo')
74
- doc.root.description.default_sub_element
75
- end
76
-
77
- def test_optional_attributes
78
- attrs = ElementDescription['table'].optional_attributes
79
- assert attrs
80
- end
81
-
82
- def test_deprecated_attributes
83
- attrs = ElementDescription['table'].deprecated_attributes
84
- assert attrs
85
- assert_equal 2, attrs.length
86
- end
87
-
88
- def test_required_attributes
89
- attrs = ElementDescription['table'].required_attributes
90
- assert attrs
91
- assert_equal 0, attrs.length
92
- end
93
-
94
- def test_inspect
95
- desc = ElementDescription['input']
96
- assert_match desc.name, desc.inspect
97
- end
98
-
99
- def test_to_s
100
- desc = ElementDescription['input']
101
- assert_match desc.name, desc.to_s
102
- end
103
- end
104
- end
105
- end
@@ -1,14 +0,0 @@
1
- require "helper"
2
-
3
- module Nokogiri
4
- module HTML
5
- class TestNamedCharacters < Nokogiri::TestCase
6
- def test_named_character
7
- copy = NamedCharacters.get('copy')
8
- assert_equal 169, NamedCharacters['copy']
9
- assert_equal copy.value, NamedCharacters['copy']
10
- assert copy.description
11
- end
12
- end
13
- end
14
- end