nokogiri 1.8.5 → 1.13.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (353) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +765 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +199 -88
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +21 -21
  22. data/ext/nokogiri/xml_cdata.c +14 -19
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +296 -217
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +25 -25
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +99 -54
  33. data/ext/nokogiri/xml_node.c +1107 -658
  34. data/ext/nokogiri/xml_node_set.c +178 -166
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +277 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +114 -35
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +226 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +21 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  171. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  172. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  173. metadata +211 -266
  174. data/.autotest +0 -22
  175. data/.cross_rubies +0 -8
  176. data/.editorconfig +0 -17
  177. data/.gemtest +0 -0
  178. data/.travis.yml +0 -63
  179. data/CHANGELOG.md +0 -1368
  180. data/CONTRIBUTING.md +0 -42
  181. data/C_CODING_STYLE.rdoc +0 -33
  182. data/Gemfile-libxml-ruby +0 -3
  183. data/Manifest.txt +0 -370
  184. data/ROADMAP.md +0 -111
  185. data/Rakefile +0 -348
  186. data/SECURITY.md +0 -19
  187. data/STANDARD_RESPONSES.md +0 -47
  188. data/Y_U_NO_GEMSPEC.md +0 -155
  189. data/appveyor.yml +0 -29
  190. data/build_all +0 -44
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -61
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -15
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -12
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document_fragment.rb +0 -49
  232. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  233. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  234. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  235. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  236. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  237. data/patches/sort-patches-by-date +0 -25
  238. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  239. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  240. data/suppressions/README.txt +0 -1
  241. data/suppressions/nokogiri_ruby-2.supp +0 -10
  242. data/tasks/test.rb +0 -100
  243. data/test/css/test_nthiness.rb +0 -226
  244. data/test/css/test_parser.rb +0 -386
  245. data/test/css/test_tokenizer.rb +0 -215
  246. data/test/css/test_xpath_visitor.rb +0 -96
  247. data/test/decorators/test_slop.rb +0 -23
  248. data/test/files/2ch.html +0 -108
  249. data/test/files/GH_1042.html +0 -18
  250. data/test/files/address_book.rlx +0 -12
  251. data/test/files/address_book.xml +0 -10
  252. data/test/files/atom.xml +0 -344
  253. data/test/files/bar/bar.xsd +0 -4
  254. data/test/files/bogus.xml +0 -0
  255. data/test/files/dont_hurt_em_why.xml +0 -422
  256. data/test/files/encoding.html +0 -82
  257. data/test/files/encoding.xhtml +0 -84
  258. data/test/files/exslt.xml +0 -8
  259. data/test/files/exslt.xslt +0 -35
  260. data/test/files/foo/foo.xsd +0 -4
  261. data/test/files/metacharset.html +0 -10
  262. data/test/files/namespace_pressure_test.xml +0 -1684
  263. data/test/files/noencoding.html +0 -47
  264. data/test/files/po.xml +0 -32
  265. data/test/files/po.xsd +0 -66
  266. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  267. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  268. data/test/files/saml/xenc_schema.xsd +0 -146
  269. data/test/files/saml/xmldsig_schema.xsd +0 -318
  270. data/test/files/shift_jis.html +0 -10
  271. data/test/files/shift_jis.xml +0 -5
  272. data/test/files/shift_jis_no_charset.html +0 -9
  273. data/test/files/slow-xpath.xml +0 -25509
  274. data/test/files/snuggles.xml +0 -3
  275. data/test/files/staff.dtd +0 -10
  276. data/test/files/staff.xml +0 -59
  277. data/test/files/staff.xslt +0 -32
  278. data/test/files/test_document_url/bar.xml +0 -2
  279. data/test/files/test_document_url/document.dtd +0 -4
  280. data/test/files/test_document_url/document.xml +0 -6
  281. data/test/files/tlm.html +0 -851
  282. data/test/files/to_be_xincluded.xml +0 -2
  283. data/test/files/valid_bar.xml +0 -2
  284. data/test/files/xinclude.xml +0 -4
  285. data/test/helper.rb +0 -271
  286. data/test/html/sax/test_parser.rb +0 -168
  287. data/test/html/sax/test_parser_context.rb +0 -46
  288. data/test/html/sax/test_parser_text.rb +0 -163
  289. data/test/html/sax/test_push_parser.rb +0 -87
  290. data/test/html/test_attributes.rb +0 -85
  291. data/test/html/test_builder.rb +0 -164
  292. data/test/html/test_document.rb +0 -712
  293. data/test/html/test_document_encoding.rb +0 -143
  294. data/test/html/test_document_fragment.rb +0 -310
  295. data/test/html/test_element_description.rb +0 -105
  296. data/test/html/test_named_characters.rb +0 -14
  297. data/test/html/test_node.rb +0 -212
  298. data/test/html/test_node_encoding.rb +0 -91
  299. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  300. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  301. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  302. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  303. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  304. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  305. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  306. data/test/test_convert_xpath.rb +0 -135
  307. data/test/test_css_cache.rb +0 -47
  308. data/test/test_encoding_handler.rb +0 -48
  309. data/test/test_memory_leak.rb +0 -156
  310. data/test/test_nokogiri.rb +0 -138
  311. data/test/test_soap4r_sax.rb +0 -52
  312. data/test/test_xslt_transforms.rb +0 -314
  313. data/test/xml/node/test_save_options.rb +0 -28
  314. data/test/xml/node/test_subclass.rb +0 -44
  315. data/test/xml/sax/test_parser.rb +0 -402
  316. data/test/xml/sax/test_parser_context.rb +0 -115
  317. data/test/xml/sax/test_parser_text.rb +0 -202
  318. data/test/xml/sax/test_push_parser.rb +0 -265
  319. data/test/xml/test_attr.rb +0 -74
  320. data/test/xml/test_attribute_decl.rb +0 -86
  321. data/test/xml/test_builder.rb +0 -341
  322. data/test/xml/test_c14n.rb +0 -180
  323. data/test/xml/test_cdata.rb +0 -54
  324. data/test/xml/test_comment.rb +0 -40
  325. data/test/xml/test_document.rb +0 -982
  326. data/test/xml/test_document_encoding.rb +0 -31
  327. data/test/xml/test_document_fragment.rb +0 -298
  328. data/test/xml/test_dtd.rb +0 -187
  329. data/test/xml/test_dtd_encoding.rb +0 -31
  330. data/test/xml/test_element_content.rb +0 -56
  331. data/test/xml/test_element_decl.rb +0 -73
  332. data/test/xml/test_entity_decl.rb +0 -122
  333. data/test/xml/test_entity_reference.rb +0 -262
  334. data/test/xml/test_namespace.rb +0 -96
  335. data/test/xml/test_node.rb +0 -1325
  336. data/test/xml/test_node_attributes.rb +0 -115
  337. data/test/xml/test_node_encoding.rb +0 -75
  338. data/test/xml/test_node_inheritance.rb +0 -32
  339. data/test/xml/test_node_reparenting.rb +0 -592
  340. data/test/xml/test_node_set.rb +0 -809
  341. data/test/xml/test_parse_options.rb +0 -64
  342. data/test/xml/test_processing_instruction.rb +0 -30
  343. data/test/xml/test_reader.rb +0 -620
  344. data/test/xml/test_reader_encoding.rb +0 -134
  345. data/test/xml/test_relax_ng.rb +0 -60
  346. data/test/xml/test_schema.rb +0 -142
  347. data/test/xml/test_syntax_error.rb +0 -36
  348. data/test/xml/test_text.rb +0 -60
  349. data/test/xml/test_unparented_node.rb +0 -483
  350. data/test/xml/test_xinclude.rb +0 -83
  351. data/test/xml/test_xpath.rb +0 -470
  352. data/test/xslt/test_custom_functions.rb +0 -133
  353. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,712 +0,0 @@
1
- require "helper"
2
-
3
- module Nokogiri
4
- module HTML
5
- class TestDocument < Nokogiri::TestCase
6
- def setup
7
- super
8
- @html = Nokogiri::HTML.parse(File.read(HTML_FILE))
9
- end
10
-
11
- def test_nil_css
12
- # Behavior is undefined but shouldn't break
13
- assert @html.css(nil)
14
- assert @html.xpath(nil)
15
- end
16
-
17
- def test_does_not_fail_with_illformatted_html
18
- doc = Nokogiri::HTML('"</html>";'.dup.force_encoding(Encoding::BINARY))
19
- assert_not_nil doc
20
- end
21
-
22
- def test_exceptions_remove_newlines
23
- errors = @html.errors
24
- assert errors.length > 0, 'has errors'
25
- errors.each do |error|
26
- assert_equal(error.to_s.chomp, error.to_s)
27
- end
28
- end
29
-
30
- def test_fragment
31
- fragment = @html.fragment
32
- assert_equal 0, fragment.children.length
33
- end
34
-
35
- def test_document_takes_config_block
36
- options = nil
37
- Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
38
- options = cfg
39
- options.nonet.nowarning.dtdattr
40
- end
41
- assert options.nonet?
42
- assert options.nowarning?
43
- assert options.dtdattr?
44
- end
45
-
46
- def test_parse_takes_config_block
47
- options = nil
48
- Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
49
- options = cfg
50
- options.nonet.nowarning.dtdattr
51
- end
52
- assert options.nonet?
53
- assert options.nowarning?
54
- assert options.dtdattr?
55
- end
56
-
57
- def test_subclass
58
- klass = Class.new(Nokogiri::HTML::Document)
59
- doc = klass.new
60
- assert_instance_of klass, doc
61
- end
62
-
63
- def test_subclass_initialize
64
- klass = Class.new(Nokogiri::HTML::Document) do
65
- attr_accessor :initialized_with
66
-
67
- def initialize(*args)
68
- @initialized_with = args
69
- end
70
- end
71
- doc = klass.new("uri", "external_id", 1)
72
- assert_equal ["uri", "external_id", 1], doc.initialized_with
73
- end
74
-
75
- def test_subclass_dup
76
- klass = Class.new(Nokogiri::HTML::Document)
77
- doc = klass.new.dup
78
- assert_instance_of klass, doc
79
- end
80
-
81
- def test_subclass_parse
82
- klass = Class.new(Nokogiri::HTML::Document)
83
- doc = klass.parse(File.read(HTML_FILE))
84
- assert_equal @html.to_s, doc.to_s
85
- assert_instance_of klass, doc
86
- end
87
-
88
- def test_document_parse_method
89
- html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
90
- assert_equal @html.to_s, html.to_s
91
- end
92
-
93
- def test_document_parse_method_with_url
94
- require 'open-uri'
95
- begin
96
- html = open('https://www.yahoo.com').read
97
- rescue Exception => e
98
- skip("This test needs the internet. Skips if no internet available. (#{e})")
99
- end
100
- doc = Nokogiri::HTML html ,"http:/foobar.foobar/", 'UTF-8'
101
- refute_empty doc.to_s, "Document should not be empty"
102
- end
103
-
104
- ###
105
- # Nokogiri::HTML returns an empty Document when given a blank string GH#11
106
- def test_empty_string_returns_empty_doc
107
- doc = Nokogiri::HTML('')
108
- assert_instance_of Nokogiri::HTML::Document, doc
109
- assert_nil doc.root
110
- end
111
-
112
- unless Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
113
- # FIXME: this is a hack around broken libxml versions
114
- def test_to_xhtml_with_indent
115
- doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
116
- doc = Nokogiri::HTML(doc.to_xhtml(:indent => 2))
117
- assert_indent 2, doc
118
- end
119
-
120
- def test_write_to_xhtml_with_indent
121
- io = StringIO.new
122
- doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
123
- doc.write_xhtml_to io, :indent => 5
124
- io.rewind
125
- doc = Nokogiri::HTML(io.read)
126
- assert_indent 5, doc
127
- end
128
- end
129
-
130
- def test_swap_should_not_exist
131
- assert_raises(NoMethodError) {
132
- @html.swap
133
- }
134
- end
135
-
136
- def test_namespace_should_not_exist
137
- assert_raises(NoMethodError) {
138
- @html.namespace
139
- }
140
- end
141
-
142
- def test_meta_encoding
143
- assert_equal 'UTF-8', @html.meta_encoding
144
- end
145
-
146
- def test_meta_encoding_is_strict_about_http_equiv
147
- doc = Nokogiri::HTML(<<-eohtml)
148
- <html>
149
- <head>
150
- <meta http-equiv="X-Content-Type" content="text/html; charset=Shift_JIS">
151
- </head>
152
- <body>
153
- foo
154
- </body>
155
- </html>
156
- eohtml
157
- assert_nil doc.meta_encoding
158
- end
159
-
160
- def test_meta_encoding_handles_malformed_content_charset
161
- doc = Nokogiri::HTML(<<EOHTML)
162
- <html>
163
- <head>
164
- <meta http-equiv="Content-type" content="text/html; utf-8" />
165
- </head>
166
- <body>
167
- foo
168
- </body>
169
- </html>
170
- EOHTML
171
- assert_nil doc.meta_encoding
172
- end
173
-
174
- def test_meta_encoding_checks_charset
175
- doc = Nokogiri::HTML(<<-eohtml)
176
- <html>
177
- <head>
178
- <meta charset="UTF-8">
179
- </head>
180
- <body>
181
- foo
182
- </body>
183
- </html>
184
- eohtml
185
- assert_equal 'UTF-8', doc.meta_encoding
186
- end
187
-
188
- def test_meta_encoding=
189
- @html.meta_encoding = 'EUC-JP'
190
- assert_equal 'EUC-JP', @html.meta_encoding
191
- end
192
-
193
- def test_title
194
- assert_equal 'Tender Lovemaking ', @html.title
195
- doc = Nokogiri::HTML('<html><body>foo</body></html>')
196
- assert_nil doc.title
197
- end
198
-
199
- def test_title=()
200
- doc = Nokogiri::HTML(<<eohtml)
201
- <html>
202
- <head>
203
- <title>old</title>
204
- </head>
205
- <body>
206
- foo
207
- </body>
208
- </html>
209
- eohtml
210
- doc.title = 'new'
211
- assert_equal 1, doc.css('title').size
212
- assert_equal 'new', doc.title
213
-
214
- doc = Nokogiri::HTML(<<eohtml)
215
- <html>
216
- <head>
217
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
218
- </head>
219
- <body>
220
- foo
221
- </body>
222
- </html>
223
- eohtml
224
- doc.title = 'new'
225
- assert_equal 'new', doc.title
226
- title = doc.at('/html/head/title')
227
- assert_not_nil title
228
- assert_equal 'new', title.text
229
- assert_equal(-1, doc.at('meta[@http-equiv]') <=> title)
230
-
231
- doc = Nokogiri::HTML(<<eohtml)
232
- <html>
233
- <body>
234
- foo
235
- </body>
236
- </html>
237
- eohtml
238
- doc.title = 'new'
239
- assert_equal 'new', doc.title
240
- # <head> may or may not be added
241
- title = doc.at('/html//title')
242
- assert_not_nil title
243
- assert_equal 'new', title.text
244
- assert_equal(-1, title <=> doc.at('body'))
245
-
246
- doc = Nokogiri::HTML(<<eohtml)
247
- <html>
248
- <meta charset="UTF-8">
249
- <body>
250
- foo
251
- </body>
252
- </html>
253
- eohtml
254
- doc.title = 'new'
255
- assert_equal 'new', doc.title
256
- assert_equal(-1, doc.at('meta[@charset]') <=> doc.at('title'))
257
- assert_equal(-1, doc.at('title') <=> doc.at('body'))
258
-
259
- doc = Nokogiri::HTML('<!DOCTYPE html><p>hello')
260
- doc.title = 'new'
261
- assert_equal 'new', doc.title
262
- assert_instance_of Nokogiri::XML::DTD, doc.children.first
263
- assert_equal(-1, doc.at('title') <=> doc.at('p'))
264
-
265
- doc = Nokogiri::HTML('')
266
- doc.title = 'new'
267
- assert_equal 'new', doc.title
268
- assert_equal 'new', doc.at('/html/head/title/text()').to_s
269
- end
270
-
271
- def test_meta_encoding_without_head
272
- encoding = 'EUC-JP'
273
- html = Nokogiri::HTML('<html><body>foo</body></html>', nil, encoding)
274
-
275
- assert_nil html.meta_encoding
276
-
277
- html.meta_encoding = encoding
278
- assert_equal encoding, html.meta_encoding
279
-
280
- meta = html.at('/html/head/meta[@http-equiv and boolean(@content)]')
281
- assert meta, 'meta is in head'
282
-
283
- assert meta.at('./parent::head/following-sibling::body'), 'meta is before body'
284
- end
285
-
286
- def test_html5_meta_encoding_without_head
287
- encoding = 'EUC-JP'
288
- html = Nokogiri::HTML('<!DOCTYPE html><html><body>foo</body></html>', nil, encoding)
289
-
290
- assert_nil html.meta_encoding
291
-
292
- html.meta_encoding = encoding
293
- assert_equal encoding, html.meta_encoding
294
-
295
- meta = html.at('/html/head/meta[@charset]')
296
- assert meta, 'meta is in head'
297
-
298
- assert meta.at('./parent::head/following-sibling::body'), 'meta is before body'
299
- end
300
-
301
- def test_meta_encoding_with_empty_content_type
302
- html = Nokogiri::HTML(<<-eohtml)
303
- <html>
304
- <head>
305
- <meta http-equiv="Content-Type" content="">
306
- </head>
307
- <body>
308
- foo
309
- </body>
310
- </html>
311
- eohtml
312
- assert_nil html.meta_encoding
313
-
314
- html = Nokogiri::HTML(<<-eohtml)
315
- <html>
316
- <head>
317
- <meta http-equiv="Content-Type">
318
- </head>
319
- <body>
320
- foo
321
- </body>
322
- </html>
323
- eohtml
324
- assert_nil html.meta_encoding
325
- end
326
-
327
- def test_root_node_parent_is_document
328
- parent = @html.root.parent
329
- assert_equal @html, parent
330
- assert_instance_of Nokogiri::HTML::Document, parent
331
- end
332
-
333
- def test_parse_handles_nil_gracefully
334
- @doc = Nokogiri::HTML::Document.parse(nil)
335
- assert_instance_of Nokogiri::HTML::Document, @doc
336
- end
337
-
338
- def test_parse_empty_document
339
- doc = Nokogiri::HTML("\n")
340
- assert_equal 0, doc.css('a').length
341
- assert_equal 0, doc.xpath('//a').length
342
- assert_equal 0, doc.search('//a').length
343
- end
344
-
345
- def test_HTML_function
346
- html = Nokogiri::HTML(File.read(HTML_FILE))
347
- assert html.html?
348
- end
349
-
350
- def test_parse_io
351
- assert File.open(HTML_FILE, 'rb') { |f|
352
- Document.read_io(f, nil, 'UTF-8',
353
- XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
354
- )
355
- }
356
- end
357
-
358
- def test_parse_temp_file
359
- temp_html_file = Tempfile.new("TEMP_HTML_FILE")
360
- File.open(HTML_FILE, 'rb') { |f| temp_html_file.write f.read }
361
- temp_html_file.close
362
- temp_html_file.open
363
- assert_equal Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath('//div/a').length,
364
- Nokogiri::HTML.parse(temp_html_file).xpath('//div/a').length
365
- end
366
-
367
- def test_to_xhtml
368
- assert_match 'XHTML', @html.to_xhtml
369
- assert_match 'XHTML', @html.to_xhtml(:encoding => 'UTF-8')
370
- assert_match 'UTF-8', @html.to_xhtml(:encoding => 'UTF-8')
371
- end
372
-
373
- def test_no_xml_header
374
- html = Nokogiri::HTML(<<-eohtml)
375
- <html>
376
- </html>
377
- eohtml
378
- assert html.to_html.length > 0, 'html length is too short'
379
- assert_no_match(/^<\?xml/, html.to_html)
380
- end
381
-
382
- def test_document_has_error
383
- html = Nokogiri::HTML(<<-eohtml)
384
- <html>
385
- <body>
386
- <div awesome="asdf>
387
- <p>inside div tag</p>
388
- </div>
389
- <p>outside div tag</p>
390
- </body>
391
- </html>
392
- eohtml
393
- assert html.errors.length > 0
394
- end
395
-
396
- def test_relative_css
397
- html = Nokogiri::HTML(<<-eohtml)
398
- <html>
399
- <body>
400
- <div>
401
- <p>inside div tag</p>
402
- </div>
403
- <p>outside div tag</p>
404
- </body>
405
- </html>
406
- eohtml
407
- set = html.search('div').search('p')
408
- assert_equal(1, set.length)
409
- assert_equal('inside div tag', set.first.inner_text)
410
- end
411
-
412
- def test_multi_css
413
- html = Nokogiri::HTML(<<-eohtml)
414
- <html>
415
- <body>
416
- <div>
417
- <p>p tag</p>
418
- <a>a tag</a>
419
- </div>
420
- </body>
421
- </html>
422
- eohtml
423
- set = html.css('p, a')
424
- assert_equal(2, set.length)
425
- assert_equal ['a tag', 'p tag'].sort, set.map(&:content).sort
426
- end
427
-
428
- def test_inner_text
429
- html = Nokogiri::HTML(<<-eohtml)
430
- <html>
431
- <body>
432
- <div>
433
- <p>
434
- Hello world!
435
- </p>
436
- </div>
437
- </body>
438
- </html>
439
- eohtml
440
- node = html.xpath('//div').first
441
- assert_equal('Hello world!', node.inner_text.strip)
442
- end
443
-
444
- def test_doc_type
445
- html = Nokogiri::HTML(<<-eohtml)
446
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
447
- <html xmlns="http://www.w3.org/1999/xhtml">
448
- <body>
449
- <p>Rainbow Dash</p>
450
- </body>
451
- </html>
452
- eohtml
453
- assert_equal "html", html.internal_subset.name
454
- assert_equal "-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id
455
- assert_equal "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id
456
- assert_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">", html.to_s[0,97]
457
- end
458
-
459
- def test_content_size
460
- html = Nokogiri::HTML("<div>\n</div>")
461
- assert_equal 1, html.content.size
462
- assert_equal 1, html.content.split("").size
463
- assert_equal "\n", html.content
464
- end
465
-
466
- def test_find_by_xpath
467
- found = @html.xpath('//div/a')
468
- assert_equal 3, found.length
469
- end
470
-
471
- def test_find_by_css
472
- found = @html.css('div > a')
473
- assert_equal 3, found.length
474
- end
475
-
476
- def test_find_by_css_with_square_brackets
477
- found = @html.css("div[@id='header'] > h1")
478
- found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
479
- assert_equal 1, found.length
480
- end
481
-
482
- def test_find_by_css_with_escaped_characters
483
- found_without_escape = @html.css("div[@id='abc.123']")
484
- found_by_id = @html.css('#abc\.123')
485
- found_by_class = @html.css('.special\.character')
486
- assert_equal 1, found_without_escape.length
487
- assert_equal found_by_id, found_without_escape
488
- assert_equal found_by_class, found_without_escape
489
- end
490
-
491
- def test_find_with_function
492
- assert @html.css("div:awesome() h1", Class.new {
493
- def awesome divs
494
- [divs.first]
495
- end
496
- }.new)
497
- end
498
-
499
- def test_dup_shallow
500
- found = @html.search('//div/a').first
501
- dup = found.dup(0)
502
- assert dup
503
- assert_equal '', dup.content
504
- end
505
-
506
- def test_search_can_handle_xpath_and_css
507
- found = @html.search('//div/a', 'div > p')
508
- length = @html.xpath('//div/a').length +
509
- @html.css('div > p').length
510
- assert_equal length, found.length
511
- end
512
-
513
- def test_dup_document
514
- assert dup = @html.dup
515
- assert_not_equal dup, @html
516
- assert @html.html?
517
- assert_instance_of Nokogiri::HTML::Document, dup
518
- assert dup.html?, 'duplicate should be html'
519
- assert_equal @html.to_s, dup.to_s
520
- end
521
-
522
- def test_dup_document_shallow
523
- assert dup = @html.dup(0)
524
- assert_not_equal dup, @html
525
- end
526
-
527
- def test_dup
528
- found = @html.search('//div/a').first
529
- dup = found.dup
530
- assert dup
531
- assert_equal found.content, dup.content
532
- assert_equal found.document, dup.document
533
- end
534
-
535
- def test_inner_html
536
- html = Nokogiri::HTML <<-EOHTML
537
- <html>
538
- <body>
539
- <div>
540
- <p>
541
- Hello world!
542
- </p>
543
- </div>
544
- </body>
545
- </html>
546
- EOHTML
547
- node = html.xpath("//div").first
548
- assert_equal("<p>Helloworld!</p>", node.inner_html.gsub(%r{\s}, ""))
549
- end
550
-
551
- def test_round_trip
552
- doc = Nokogiri::HTML(@html.inner_html)
553
- assert_equal @html.root.to_html, doc.root.to_html
554
- end
555
-
556
- def test_fragment_contains_text_node
557
- fragment = Nokogiri::HTML.fragment('fooo')
558
- assert_equal 1, fragment.children.length
559
- assert_equal 'fooo', fragment.inner_text
560
- end
561
-
562
- def test_fragment_includes_two_tags
563
- assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
564
- end
565
-
566
- def test_relative_css_finder
567
- doc = Nokogiri::HTML(<<-eohtml)
568
- <html>
569
- <body>
570
- <div class="red">
571
- <p>
572
- inside red
573
- </p>
574
- </div>
575
- <div class="green">
576
- <p>
577
- inside green
578
- </p>
579
- </div>
580
- </body>
581
- </html>
582
- eohtml
583
- red_divs = doc.css('div.red')
584
- assert_equal 1, red_divs.length
585
- p_tags = red_divs.first.css('p')
586
- assert_equal 1, p_tags.length
587
- assert_equal 'inside red', p_tags.first.text.strip
588
- end
589
-
590
- def test_find_classes
591
- doc = Nokogiri::HTML(<<-eohtml)
592
- <html>
593
- <body>
594
- <p class="red">RED</p>
595
- <p class="awesome red">RED</p>
596
- <p class="notred">GREEN</p>
597
- <p class="green notred">GREEN</p>
598
- </body>
599
- </html>
600
- eohtml
601
- list = doc.css('.red')
602
- assert_equal 2, list.length
603
- assert_equal %w{ RED RED }, list.map(&:text)
604
- end
605
-
606
- def test_parse_can_take_io
607
- html = nil
608
- File.open(HTML_FILE, 'rb') { |f|
609
- html = Nokogiri::HTML(f)
610
- }
611
- assert html.html?
612
- end
613
-
614
- def test_html?
615
- assert !@html.xml?
616
- assert @html.html?
617
- end
618
-
619
- def test_serialize
620
- assert @html.serialize
621
- assert @html.to_html
622
- end
623
-
624
- def test_empty_document
625
- # empty document should return "" #699
626
- assert_equal "", Nokogiri::HTML.parse(nil).text
627
- assert_equal "", Nokogiri::HTML.parse("").text
628
- end
629
-
630
- def test_capturing_nonparse_errors_during_document_clone
631
- # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
632
- original = Nokogiri::HTML.parse("<div id='unique'></div><div id='unique'></div>")
633
- original_errors = original.errors.dup
634
-
635
- copy = original.dup
636
- assert_equal original_errors, copy.errors
637
- end
638
-
639
- def test_capturing_nonparse_errors_during_node_copy_between_docs
640
- # Errors should be emitted while parsing only, and should not change when moving nodes.
641
- doc1 = Nokogiri::HTML("<html><body><diva id='unique'>one</diva></body></html>")
642
- doc2 = Nokogiri::HTML("<html><body><dive id='unique'>two</dive></body></html>")
643
- node1 = doc1.at_css("#unique")
644
- node2 = doc2.at_css("#unique")
645
- original_errors1 = doc1.errors.dup
646
- original_errors2 = doc2.errors.dup
647
- assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
648
- assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
649
-
650
- node1.add_child node2
651
-
652
- assert_equal original_errors1, doc1.errors
653
- assert_equal original_errors2, doc2.errors
654
- end
655
-
656
- def test_silencing_nonparse_errors_during_attribute_insertion_1262
657
- # see https://github.com/sparklemotion/nokogiri/issues/1262
658
- #
659
- # libxml2 emits a warning when this happens; the JRuby
660
- # implementation does not. so rather than capture the error in
661
- # doc.errors in a platform-dependent way, I'm opting to have
662
- # the error silenced.
663
- #
664
- # So this test doesn't look meaningful, but we want to avoid
665
- # having `ID unique-issue-1262 already defined` emitted to
666
- # stderr when running the test suite.
667
- #
668
- doc = Nokogiri::HTML::Document.new
669
- Nokogiri::XML::Element.new("div", doc).set_attribute('id', 'unique-issue-1262')
670
- Nokogiri::XML::Element.new("div", doc).set_attribute('id', 'unique-issue-1262')
671
- assert_equal 0, doc.errors.length
672
- end
673
-
674
- it "skips encoding for script tags" do
675
- html = Nokogiri::HTML <<-EOHTML
676
- <html>
677
- <head>
678
- <script>var isGreater = 4 > 5;</script>
679
- </head>
680
- <body></body>
681
- </html>
682
- EOHTML
683
- node = html.xpath("//script").first
684
- assert_equal("var isGreater = 4 > 5;", node.inner_html)
685
- end
686
-
687
- it "skips encoding for style tags" do
688
- html = Nokogiri::HTML <<-EOHTML
689
- <html>
690
- <head>
691
- <style>tr > div { display:block; }</style>
692
- </head>
693
- <body></body>
694
- </html>
695
- EOHTML
696
- node = html.xpath("//style").first
697
- assert_equal("tr > div { display:block; }", node.inner_html)
698
- end
699
-
700
- it "does not fail when converting to_html using explicit encoding" do
701
- html_fragment=<<-eos
702
- <img width="16" height="16" src="images/icon.gif" border="0" alt="Inactive hide details for &quot;User&quot; ---19/05/2015 12:55:29---Provvediamo subito nell&#8217;integrare">
703
- eos
704
- doc = Nokogiri::HTML(html_fragment, nil, 'ISO-8859-1')
705
- html = doc.to_html
706
- assert html.index("src=\"images/icon.gif\"")
707
- assert_equal 'ISO-8859-1', html.encoding.name
708
- end
709
-
710
- end
711
- end
712
- end