nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,712 +0,0 @@
1
- require "helper"
2
-
3
- module Nokogiri
4
- module HTML
5
- class TestDocument < Nokogiri::TestCase
6
- def setup
7
- super
8
- @html = Nokogiri::HTML.parse(File.read(HTML_FILE))
9
- end
10
-
11
- def test_nil_css
12
- # Behavior is undefined but shouldn't break
13
- assert @html.css(nil)
14
- assert @html.xpath(nil)
15
- end
16
-
17
- def test_does_not_fail_with_illformatted_html
18
- doc = Nokogiri::HTML('"</html>";'.dup.force_encoding(Encoding::BINARY))
19
- assert_not_nil doc
20
- end
21
-
22
- def test_exceptions_remove_newlines
23
- errors = @html.errors
24
- assert errors.length > 0, 'has errors'
25
- errors.each do |error|
26
- assert_equal(error.to_s.chomp, error.to_s)
27
- end
28
- end
29
-
30
- def test_fragment
31
- fragment = @html.fragment
32
- assert_equal 0, fragment.children.length
33
- end
34
-
35
- def test_document_takes_config_block
36
- options = nil
37
- Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
38
- options = cfg
39
- options.nonet.nowarning.dtdattr
40
- end
41
- assert options.nonet?
42
- assert options.nowarning?
43
- assert options.dtdattr?
44
- end
45
-
46
- def test_parse_takes_config_block
47
- options = nil
48
- Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
49
- options = cfg
50
- options.nonet.nowarning.dtdattr
51
- end
52
- assert options.nonet?
53
- assert options.nowarning?
54
- assert options.dtdattr?
55
- end
56
-
57
- def test_subclass
58
- klass = Class.new(Nokogiri::HTML::Document)
59
- doc = klass.new
60
- assert_instance_of klass, doc
61
- end
62
-
63
- def test_subclass_initialize
64
- klass = Class.new(Nokogiri::HTML::Document) do
65
- attr_accessor :initialized_with
66
-
67
- def initialize(*args)
68
- @initialized_with = args
69
- end
70
- end
71
- doc = klass.new("uri", "external_id", 1)
72
- assert_equal ["uri", "external_id", 1], doc.initialized_with
73
- end
74
-
75
- def test_subclass_dup
76
- klass = Class.new(Nokogiri::HTML::Document)
77
- doc = klass.new.dup
78
- assert_instance_of klass, doc
79
- end
80
-
81
- def test_subclass_parse
82
- klass = Class.new(Nokogiri::HTML::Document)
83
- doc = klass.parse(File.read(HTML_FILE))
84
- assert_equal @html.to_s, doc.to_s
85
- assert_instance_of klass, doc
86
- end
87
-
88
- def test_document_parse_method
89
- html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
90
- assert_equal @html.to_s, html.to_s
91
- end
92
-
93
- def test_document_parse_method_with_url
94
- require 'open-uri'
95
- begin
96
- html = open('https://www.yahoo.com').read
97
- rescue Exception => e
98
- skip("This test needs the internet. Skips if no internet available. (#{e})")
99
- end
100
- doc = Nokogiri::HTML html ,"http:/foobar.foobar/", 'UTF-8'
101
- refute_empty doc.to_s, "Document should not be empty"
102
- end
103
-
104
- ###
105
- # Nokogiri::HTML returns an empty Document when given a blank string GH#11
106
- def test_empty_string_returns_empty_doc
107
- doc = Nokogiri::HTML('')
108
- assert_instance_of Nokogiri::HTML::Document, doc
109
- assert_nil doc.root
110
- end
111
-
112
- unless Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
113
- # FIXME: this is a hack around broken libxml versions
114
- def test_to_xhtml_with_indent
115
- doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
116
- doc = Nokogiri::HTML(doc.to_xhtml(:indent => 2))
117
- assert_indent 2, doc
118
- end
119
-
120
- def test_write_to_xhtml_with_indent
121
- io = StringIO.new
122
- doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
123
- doc.write_xhtml_to io, :indent => 5
124
- io.rewind
125
- doc = Nokogiri::HTML(io.read)
126
- assert_indent 5, doc
127
- end
128
- end
129
-
130
- def test_swap_should_not_exist
131
- assert_raises(NoMethodError) {
132
- @html.swap
133
- }
134
- end
135
-
136
- def test_namespace_should_not_exist
137
- assert_raises(NoMethodError) {
138
- @html.namespace
139
- }
140
- end
141
-
142
- def test_meta_encoding
143
- assert_equal 'UTF-8', @html.meta_encoding
144
- end
145
-
146
- def test_meta_encoding_is_strict_about_http_equiv
147
- doc = Nokogiri::HTML(<<-eohtml)
148
- <html>
149
- <head>
150
- <meta http-equiv="X-Content-Type" content="text/html; charset=Shift_JIS">
151
- </head>
152
- <body>
153
- foo
154
- </body>
155
- </html>
156
- eohtml
157
- assert_nil doc.meta_encoding
158
- end
159
-
160
- def test_meta_encoding_handles_malformed_content_charset
161
- doc = Nokogiri::HTML(<<EOHTML)
162
- <html>
163
- <head>
164
- <meta http-equiv="Content-type" content="text/html; utf-8" />
165
- </head>
166
- <body>
167
- foo
168
- </body>
169
- </html>
170
- EOHTML
171
- assert_nil doc.meta_encoding
172
- end
173
-
174
- def test_meta_encoding_checks_charset
175
- doc = Nokogiri::HTML(<<-eohtml)
176
- <html>
177
- <head>
178
- <meta charset="UTF-8">
179
- </head>
180
- <body>
181
- foo
182
- </body>
183
- </html>
184
- eohtml
185
- assert_equal 'UTF-8', doc.meta_encoding
186
- end
187
-
188
- def test_meta_encoding=
189
- @html.meta_encoding = 'EUC-JP'
190
- assert_equal 'EUC-JP', @html.meta_encoding
191
- end
192
-
193
- def test_title
194
- assert_equal 'Tender Lovemaking ', @html.title
195
- doc = Nokogiri::HTML('<html><body>foo</body></html>')
196
- assert_nil doc.title
197
- end
198
-
199
- def test_title=()
200
- doc = Nokogiri::HTML(<<eohtml)
201
- <html>
202
- <head>
203
- <title>old</title>
204
- </head>
205
- <body>
206
- foo
207
- </body>
208
- </html>
209
- eohtml
210
- doc.title = 'new'
211
- assert_equal 1, doc.css('title').size
212
- assert_equal 'new', doc.title
213
-
214
- doc = Nokogiri::HTML(<<eohtml)
215
- <html>
216
- <head>
217
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
218
- </head>
219
- <body>
220
- foo
221
- </body>
222
- </html>
223
- eohtml
224
- doc.title = 'new'
225
- assert_equal 'new', doc.title
226
- title = doc.at('/html/head/title')
227
- assert_not_nil title
228
- assert_equal 'new', title.text
229
- assert_equal(-1, doc.at('meta[@http-equiv]') <=> title)
230
-
231
- doc = Nokogiri::HTML(<<eohtml)
232
- <html>
233
- <body>
234
- foo
235
- </body>
236
- </html>
237
- eohtml
238
- doc.title = 'new'
239
- assert_equal 'new', doc.title
240
- # <head> may or may not be added
241
- title = doc.at('/html//title')
242
- assert_not_nil title
243
- assert_equal 'new', title.text
244
- assert_equal(-1, title <=> doc.at('body'))
245
-
246
- doc = Nokogiri::HTML(<<eohtml)
247
- <html>
248
- <meta charset="UTF-8">
249
- <body>
250
- foo
251
- </body>
252
- </html>
253
- eohtml
254
- doc.title = 'new'
255
- assert_equal 'new', doc.title
256
- assert_equal(-1, doc.at('meta[@charset]') <=> doc.at('title'))
257
- assert_equal(-1, doc.at('title') <=> doc.at('body'))
258
-
259
- doc = Nokogiri::HTML('<!DOCTYPE html><p>hello')
260
- doc.title = 'new'
261
- assert_equal 'new', doc.title
262
- assert_instance_of Nokogiri::XML::DTD, doc.children.first
263
- assert_equal(-1, doc.at('title') <=> doc.at('p'))
264
-
265
- doc = Nokogiri::HTML('')
266
- doc.title = 'new'
267
- assert_equal 'new', doc.title
268
- assert_equal 'new', doc.at('/html/head/title/text()').to_s
269
- end
270
-
271
- def test_meta_encoding_without_head
272
- encoding = 'EUC-JP'
273
- html = Nokogiri::HTML('<html><body>foo</body></html>', nil, encoding)
274
-
275
- assert_nil html.meta_encoding
276
-
277
- html.meta_encoding = encoding
278
- assert_equal encoding, html.meta_encoding
279
-
280
- meta = html.at('/html/head/meta[@http-equiv and boolean(@content)]')
281
- assert meta, 'meta is in head'
282
-
283
- assert meta.at('./parent::head/following-sibling::body'), 'meta is before body'
284
- end
285
-
286
- def test_html5_meta_encoding_without_head
287
- encoding = 'EUC-JP'
288
- html = Nokogiri::HTML('<!DOCTYPE html><html><body>foo</body></html>', nil, encoding)
289
-
290
- assert_nil html.meta_encoding
291
-
292
- html.meta_encoding = encoding
293
- assert_equal encoding, html.meta_encoding
294
-
295
- meta = html.at('/html/head/meta[@charset]')
296
- assert meta, 'meta is in head'
297
-
298
- assert meta.at('./parent::head/following-sibling::body'), 'meta is before body'
299
- end
300
-
301
- def test_meta_encoding_with_empty_content_type
302
- html = Nokogiri::HTML(<<-eohtml)
303
- <html>
304
- <head>
305
- <meta http-equiv="Content-Type" content="">
306
- </head>
307
- <body>
308
- foo
309
- </body>
310
- </html>
311
- eohtml
312
- assert_nil html.meta_encoding
313
-
314
- html = Nokogiri::HTML(<<-eohtml)
315
- <html>
316
- <head>
317
- <meta http-equiv="Content-Type">
318
- </head>
319
- <body>
320
- foo
321
- </body>
322
- </html>
323
- eohtml
324
- assert_nil html.meta_encoding
325
- end
326
-
327
- def test_root_node_parent_is_document
328
- parent = @html.root.parent
329
- assert_equal @html, parent
330
- assert_instance_of Nokogiri::HTML::Document, parent
331
- end
332
-
333
- def test_parse_handles_nil_gracefully
334
- @doc = Nokogiri::HTML::Document.parse(nil)
335
- assert_instance_of Nokogiri::HTML::Document, @doc
336
- end
337
-
338
- def test_parse_empty_document
339
- doc = Nokogiri::HTML("\n")
340
- assert_equal 0, doc.css('a').length
341
- assert_equal 0, doc.xpath('//a').length
342
- assert_equal 0, doc.search('//a').length
343
- end
344
-
345
- def test_HTML_function
346
- html = Nokogiri::HTML(File.read(HTML_FILE))
347
- assert html.html?
348
- end
349
-
350
- def test_parse_io
351
- assert File.open(HTML_FILE, 'rb') { |f|
352
- Document.read_io(f, nil, 'UTF-8',
353
- XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
354
- )
355
- }
356
- end
357
-
358
- def test_parse_temp_file
359
- temp_html_file = Tempfile.new("TEMP_HTML_FILE")
360
- File.open(HTML_FILE, 'rb') { |f| temp_html_file.write f.read }
361
- temp_html_file.close
362
- temp_html_file.open
363
- assert_equal Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath('//div/a').length,
364
- Nokogiri::HTML.parse(temp_html_file).xpath('//div/a').length
365
- end
366
-
367
- def test_to_xhtml
368
- assert_match 'XHTML', @html.to_xhtml
369
- assert_match 'XHTML', @html.to_xhtml(:encoding => 'UTF-8')
370
- assert_match 'UTF-8', @html.to_xhtml(:encoding => 'UTF-8')
371
- end
372
-
373
- def test_no_xml_header
374
- html = Nokogiri::HTML(<<-eohtml)
375
- <html>
376
- </html>
377
- eohtml
378
- assert html.to_html.length > 0, 'html length is too short'
379
- assert_no_match(/^<\?xml/, html.to_html)
380
- end
381
-
382
- def test_document_has_error
383
- html = Nokogiri::HTML(<<-eohtml)
384
- <html>
385
- <body>
386
- <div awesome="asdf>
387
- <p>inside div tag</p>
388
- </div>
389
- <p>outside div tag</p>
390
- </body>
391
- </html>
392
- eohtml
393
- assert html.errors.length > 0
394
- end
395
-
396
- def test_relative_css
397
- html = Nokogiri::HTML(<<-eohtml)
398
- <html>
399
- <body>
400
- <div>
401
- <p>inside div tag</p>
402
- </div>
403
- <p>outside div tag</p>
404
- </body>
405
- </html>
406
- eohtml
407
- set = html.search('div').search('p')
408
- assert_equal(1, set.length)
409
- assert_equal('inside div tag', set.first.inner_text)
410
- end
411
-
412
- def test_multi_css
413
- html = Nokogiri::HTML(<<-eohtml)
414
- <html>
415
- <body>
416
- <div>
417
- <p>p tag</p>
418
- <a>a tag</a>
419
- </div>
420
- </body>
421
- </html>
422
- eohtml
423
- set = html.css('p, a')
424
- assert_equal(2, set.length)
425
- assert_equal ['a tag', 'p tag'].sort, set.map(&:content).sort
426
- end
427
-
428
- def test_inner_text
429
- html = Nokogiri::HTML(<<-eohtml)
430
- <html>
431
- <body>
432
- <div>
433
- <p>
434
- Hello world!
435
- </p>
436
- </div>
437
- </body>
438
- </html>
439
- eohtml
440
- node = html.xpath('//div').first
441
- assert_equal('Hello world!', node.inner_text.strip)
442
- end
443
-
444
- def test_doc_type
445
- html = Nokogiri::HTML(<<-eohtml)
446
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
447
- <html xmlns="http://www.w3.org/1999/xhtml">
448
- <body>
449
- <p>Rainbow Dash</p>
450
- </body>
451
- </html>
452
- eohtml
453
- assert_equal "html", html.internal_subset.name
454
- assert_equal "-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id
455
- assert_equal "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id
456
- assert_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">", html.to_s[0,97]
457
- end
458
-
459
- def test_content_size
460
- html = Nokogiri::HTML("<div>\n</div>")
461
- assert_equal 1, html.content.size
462
- assert_equal 1, html.content.split("").size
463
- assert_equal "\n", html.content
464
- end
465
-
466
- def test_find_by_xpath
467
- found = @html.xpath('//div/a')
468
- assert_equal 3, found.length
469
- end
470
-
471
- def test_find_by_css
472
- found = @html.css('div > a')
473
- assert_equal 3, found.length
474
- end
475
-
476
- def test_find_by_css_with_square_brackets
477
- found = @html.css("div[@id='header'] > h1")
478
- found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
479
- assert_equal 1, found.length
480
- end
481
-
482
- def test_find_by_css_with_escaped_characters
483
- found_without_escape = @html.css("div[@id='abc.123']")
484
- found_by_id = @html.css('#abc\.123')
485
- found_by_class = @html.css('.special\.character')
486
- assert_equal 1, found_without_escape.length
487
- assert_equal found_by_id, found_without_escape
488
- assert_equal found_by_class, found_without_escape
489
- end
490
-
491
- def test_find_with_function
492
- assert @html.css("div:awesome() h1", Class.new {
493
- def awesome divs
494
- [divs.first]
495
- end
496
- }.new)
497
- end
498
-
499
- def test_dup_shallow
500
- found = @html.search('//div/a').first
501
- dup = found.dup(0)
502
- assert dup
503
- assert_equal '', dup.content
504
- end
505
-
506
- def test_search_can_handle_xpath_and_css
507
- found = @html.search('//div/a', 'div > p')
508
- length = @html.xpath('//div/a').length +
509
- @html.css('div > p').length
510
- assert_equal length, found.length
511
- end
512
-
513
- def test_dup_document
514
- assert dup = @html.dup
515
- assert_not_equal dup, @html
516
- assert @html.html?
517
- assert_instance_of Nokogiri::HTML::Document, dup
518
- assert dup.html?, 'duplicate should be html'
519
- assert_equal @html.to_s, dup.to_s
520
- end
521
-
522
- def test_dup_document_shallow
523
- assert dup = @html.dup(0)
524
- assert_not_equal dup, @html
525
- end
526
-
527
- def test_dup
528
- found = @html.search('//div/a').first
529
- dup = found.dup
530
- assert dup
531
- assert_equal found.content, dup.content
532
- assert_equal found.document, dup.document
533
- end
534
-
535
- def test_inner_html
536
- html = Nokogiri::HTML <<-EOHTML
537
- <html>
538
- <body>
539
- <div>
540
- <p>
541
- Hello world!
542
- </p>
543
- </div>
544
- </body>
545
- </html>
546
- EOHTML
547
- node = html.xpath("//div").first
548
- assert_equal("<p>Helloworld!</p>", node.inner_html.gsub(%r{\s}, ""))
549
- end
550
-
551
- def test_round_trip
552
- doc = Nokogiri::HTML(@html.inner_html)
553
- assert_equal @html.root.to_html, doc.root.to_html
554
- end
555
-
556
- def test_fragment_contains_text_node
557
- fragment = Nokogiri::HTML.fragment('fooo')
558
- assert_equal 1, fragment.children.length
559
- assert_equal 'fooo', fragment.inner_text
560
- end
561
-
562
- def test_fragment_includes_two_tags
563
- assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
564
- end
565
-
566
- def test_relative_css_finder
567
- doc = Nokogiri::HTML(<<-eohtml)
568
- <html>
569
- <body>
570
- <div class="red">
571
- <p>
572
- inside red
573
- </p>
574
- </div>
575
- <div class="green">
576
- <p>
577
- inside green
578
- </p>
579
- </div>
580
- </body>
581
- </html>
582
- eohtml
583
- red_divs = doc.css('div.red')
584
- assert_equal 1, red_divs.length
585
- p_tags = red_divs.first.css('p')
586
- assert_equal 1, p_tags.length
587
- assert_equal 'inside red', p_tags.first.text.strip
588
- end
589
-
590
- def test_find_classes
591
- doc = Nokogiri::HTML(<<-eohtml)
592
- <html>
593
- <body>
594
- <p class="red">RED</p>
595
- <p class="awesome red">RED</p>
596
- <p class="notred">GREEN</p>
597
- <p class="green notred">GREEN</p>
598
- </body>
599
- </html>
600
- eohtml
601
- list = doc.css('.red')
602
- assert_equal 2, list.length
603
- assert_equal %w{ RED RED }, list.map(&:text)
604
- end
605
-
606
- def test_parse_can_take_io
607
- html = nil
608
- File.open(HTML_FILE, 'rb') { |f|
609
- html = Nokogiri::HTML(f)
610
- }
611
- assert html.html?
612
- end
613
-
614
- def test_html?
615
- assert !@html.xml?
616
- assert @html.html?
617
- end
618
-
619
- def test_serialize
620
- assert @html.serialize
621
- assert @html.to_html
622
- end
623
-
624
- def test_empty_document
625
- # empty document should return "" #699
626
- assert_equal "", Nokogiri::HTML.parse(nil).text
627
- assert_equal "", Nokogiri::HTML.parse("").text
628
- end
629
-
630
- def test_capturing_nonparse_errors_during_document_clone
631
- # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
632
- original = Nokogiri::HTML.parse("<div id='unique'></div><div id='unique'></div>")
633
- original_errors = original.errors.dup
634
-
635
- copy = original.dup
636
- assert_equal original_errors, copy.errors
637
- end
638
-
639
- def test_capturing_nonparse_errors_during_node_copy_between_docs
640
- # Errors should be emitted while parsing only, and should not change when moving nodes.
641
- doc1 = Nokogiri::HTML("<html><body><diva id='unique'>one</diva></body></html>")
642
- doc2 = Nokogiri::HTML("<html><body><dive id='unique'>two</dive></body></html>")
643
- node1 = doc1.at_css("#unique")
644
- node2 = doc2.at_css("#unique")
645
- original_errors1 = doc1.errors.dup
646
- original_errors2 = doc2.errors.dup
647
- assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
648
- assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
649
-
650
- node1.add_child node2
651
-
652
- assert_equal original_errors1, doc1.errors
653
- assert_equal original_errors2, doc2.errors
654
- end
655
-
656
- def test_silencing_nonparse_errors_during_attribute_insertion_1262
657
- # see https://github.com/sparklemotion/nokogiri/issues/1262
658
- #
659
- # libxml2 emits a warning when this happens; the JRuby
660
- # implementation does not. so rather than capture the error in
661
- # doc.errors in a platform-dependent way, I'm opting to have
662
- # the error silenced.
663
- #
664
- # So this test doesn't look meaningful, but we want to avoid
665
- # having `ID unique-issue-1262 already defined` emitted to
666
- # stderr when running the test suite.
667
- #
668
- doc = Nokogiri::HTML::Document.new
669
- Nokogiri::XML::Element.new("div", doc).set_attribute('id', 'unique-issue-1262')
670
- Nokogiri::XML::Element.new("div", doc).set_attribute('id', 'unique-issue-1262')
671
- assert_equal 0, doc.errors.length
672
- end
673
-
674
- it "skips encoding for script tags" do
675
- html = Nokogiri::HTML <<-EOHTML
676
- <html>
677
- <head>
678
- <script>var isGreater = 4 > 5;</script>
679
- </head>
680
- <body></body>
681
- </html>
682
- EOHTML
683
- node = html.xpath("//script").first
684
- assert_equal("var isGreater = 4 > 5;", node.inner_html)
685
- end
686
-
687
- it "skips encoding for style tags" do
688
- html = Nokogiri::HTML <<-EOHTML
689
- <html>
690
- <head>
691
- <style>tr > div { display:block; }</style>
692
- </head>
693
- <body></body>
694
- </html>
695
- EOHTML
696
- node = html.xpath("//style").first
697
- assert_equal("tr > div { display:block; }", node.inner_html)
698
- end
699
-
700
- it "does not fail when converting to_html using explicit encoding" do
701
- html_fragment=<<-eos
702
- <img width="16" height="16" src="images/icon.gif" border="0" alt="Inactive hide details for &quot;User&quot; ---19/05/2015 12:55:29---Provvediamo subito nell&#8217;integrare">
703
- eos
704
- doc = Nokogiri::HTML(html_fragment, nil, 'ISO-8859-1')
705
- html = doc.to_html
706
- assert html.index("src=\"images/icon.gif\"")
707
- assert_equal 'ISO-8859-1', html.encoding.name
708
- end
709
-
710
- end
711
- end
712
- end