nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,87 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require "helper"
4
-
5
- module Nokogiri
6
- module HTML
7
- module SAX
8
- class TestPushParser < Nokogiri::SAX::TestCase
9
- def setup
10
- super
11
- @parser = HTML::SAX::PushParser.new(Doc.new)
12
- end
13
-
14
- def test_end_document_called
15
- @parser.<<(<<-eoxml)
16
- <p id="asdfasdf">
17
- <!-- This is a comment -->
18
- Paragraph 1
19
- </p>
20
- eoxml
21
- assert ! @parser.document.end_document_called
22
- @parser.finish
23
- assert @parser.document.end_document_called
24
- end
25
-
26
- def test_start_element
27
- @parser.<<(<<-eoxml)
28
- <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
29
- <html><head><body><p id="asdfasdf">
30
- eoxml
31
-
32
- assert_equal [["html", []], ["head", []], ["body", []], ["p", [["id", "asdfasdf"]]]],
33
- @parser.document.start_elements
34
-
35
- @parser.<<(<<-eoxml)
36
- <!-- This is a comment -->
37
- Paragraph 1
38
- </p></body></html>
39
- eoxml
40
- assert_equal [' This is a comment '], @parser.document.comments
41
- @parser.finish
42
- end
43
-
44
-
45
- def test_chevron_partial_html
46
- @parser.<<(<<-eoxml)
47
- <p id="asdfasdf">
48
- eoxml
49
-
50
- @parser.<<(<<-eoxml)
51
- <!-- This is a comment -->
52
- Paragraph 1
53
- </p>
54
- eoxml
55
- assert_equal [' This is a comment '], @parser.document.comments
56
- @parser.finish
57
- end
58
-
59
- def test_chevron
60
- @parser.<<(<<-eoxml)
61
- <p id="asdfasdf">
62
- <!-- This is a comment -->
63
- Paragraph 1
64
- </p>
65
- eoxml
66
- @parser.finish
67
- assert_equal [' This is a comment '], @parser.document.comments
68
- end
69
-
70
- def test_default_options
71
- assert_equal 0, @parser.options
72
- end
73
-
74
- def test_broken_encoding
75
- skip("ultra hard to fix for pure Java version") if Nokogiri.jruby?
76
- @parser.options |= XML::ParseOptions::RECOVER
77
- # This is ISO_8859-1:
78
- @parser.<< "<?xml version='1.0' encoding='UTF-8'?><r>Gau\337</r>"
79
- @parser.finish
80
- assert(@parser.document.errors.size >= 1)
81
- assert_equal "Gau\337", @parser.document.data.join
82
- assert_equal [["r"], ["body"], ["html"]], @parser.document.end_elements
83
- end
84
- end
85
- end
86
- end
87
- end
@@ -1,85 +0,0 @@
1
- require "helper"
2
-
3
- module Nokogiri
4
- module HTML
5
- class TestAttr < Nokogiri::TestCase
6
- unless Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?
7
- #
8
- # libxml2 >= 2.9.2 fails to escape comments within some attributes. It
9
- # wants to ensure these comments can be treated as "server-side includes",
10
- # but as a result fails to ensure that serialization is well-formed,
11
- # resulting in an opportunity for XSS injection of code into a final
12
- # re-parsed document (presumably in a browser).
13
- #
14
- # the offending commit is:
15
- #
16
- # https://github.com/GNOME/libxml2/commit/960f0e2
17
- #
18
- # we'll test this by parsing the HTML, serializing it, then
19
- # re-parsing it to ensure there isn't any ambiguity in the output
20
- # that might allow code injection into a browser consuming
21
- # "sanitized" output.
22
- #
23
- # complaints have been made upstream about this behavior, notably at
24
- #
25
- # https://bugzilla.gnome.org/show_bug.cgi?id=769760
26
- #
27
- # and multiple CVEs have been declared and fixed in downstream
28
- # libraries as a result, a list is being kept up to date here:
29
- #
30
- # https://github.com/flavorjones/loofah/issues/144
31
- #
32
- [
33
- #
34
- # these tags and attributes are determined by the code at:
35
- #
36
- # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
37
- #
38
- {tag: "a", attr: "href"},
39
- {tag: "div", attr: "href"},
40
- {tag: "a", attr: "action"},
41
- {tag: "div", attr: "action"},
42
- {tag: "a", attr: "src"},
43
- {tag: "div", attr: "src"},
44
- {tag: "a", attr: "name"},
45
- #
46
- # note that div+name is _not_ affected by the libxml2 issue.
47
- # but we test it anyway to ensure our logic isn't modifying
48
- # attributes that don't need modifying.
49
- #
50
- {tag: "div", attr: "name", unescaped: true},
51
- ].each do |config|
52
-
53
- define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
54
- html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=unsafevalue()>-->le.com'>test</#{config[:tag]}>}
55
-
56
- reparsed = HTML.fragment(HTML.fragment(html).to_html)
57
- attributes = reparsed.at_css(config[:tag]).attribute_nodes
58
-
59
- assert_equal [config[:attr]], attributes.collect(&:name)
60
- if Nokogiri::VersionInfo.instance.libxml2?
61
- if config[:unescaped]
62
- #
63
- # this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
64
- # assert that this attribute's serialization is unaffected.
65
- #
66
- assert_equal %{examp<!--" unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
67
- else
68
- #
69
- # let's match the behavior in libxml < 2.9.2.
70
- # test that this attribute's serialization is well-formed and sanitized.
71
- #
72
- assert_equal %{examp<!--%22%20unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
73
- end
74
- else
75
- #
76
- # yay for consistency in javaland. move along, nothing to see here.
77
- #
78
- assert_equal %{examp<!--%22 unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
79
- end
80
- end
81
- end
82
- end
83
- end
84
- end
85
- end
@@ -1,164 +0,0 @@
1
- require "helper"
2
-
3
- module Nokogiri
4
- module HTML
5
- class TestBuilder < Nokogiri::TestCase
6
- def test_top_level_function_builds
7
- foo = nil
8
- Nokogiri() { |xml| foo = xml }
9
- assert_instance_of Nokogiri::HTML::Builder, foo
10
- end
11
-
12
- def test_builder_with_explicit_tags
13
- html_doc = Nokogiri::HTML::Builder.new {
14
- div.slide(:class => 'another_class') {
15
- node = Nokogiri::XML::Node.new("id", doc)
16
- node.content = "hello"
17
- insert(node)
18
- }
19
- }.doc
20
- assert_equal 1, html_doc.css('div.slide > id').length
21
- assert_equal 'hello', html_doc.at('div.slide > id').content
22
- end
23
-
24
- def test_hash_as_attributes_for_attribute_method
25
- html = Nokogiri::HTML::Builder.new { ||
26
- div.slide(:class => 'another_class') {
27
- span 'Slide 1'
28
- }
29
- }.to_html
30
- assert_match 'class="slide another_class"', html
31
- end
32
-
33
- def test_hash_as_attributes
34
- builder = Nokogiri::HTML::Builder.new do
35
- div(:id => 'awesome') {
36
- h1 "america"
37
- }
38
- end
39
- assert_equal('<div id="awesome"><h1>america</h1></div>',
40
- builder.doc.root.to_html.gsub(/\n/, '').gsub(/>\s*</, '><'))
41
- end
42
-
43
- def test_href_with_attributes
44
- uri = 'http://tenderlovemaking.com/'
45
- built = Nokogiri::XML::Builder.new {
46
- div {
47
- a('King Khan & The Shrines', :href => uri)
48
- }
49
- }
50
- assert_equal 'http://tenderlovemaking.com/',
51
- built.doc.at('a')[:href]
52
- end
53
-
54
- def test_tag_nesting
55
- builder = Nokogiri::HTML::Builder.new do
56
- body {
57
- span.left ''
58
- span.middle {
59
- div.icon ''
60
- }
61
- span.right ''
62
- }
63
- end
64
- assert node = builder.doc.css('span.right').first
65
- assert_equal 'middle', node.previous_sibling['class']
66
- end
67
-
68
- def test_has_ampersand
69
- builder = Nokogiri::HTML::Builder.new do
70
- div.rad.thing! {
71
- text "<awe&some>"
72
- b "hello & world"
73
- }
74
- end
75
- assert_equal(
76
- '<div class="rad" id="thing">&lt;awe&amp;some&gt;<b>hello &amp; world</b></div>',
77
- builder.doc.root.to_html.gsub(/\n/, ''))
78
- end
79
-
80
- def test_multi_tags
81
- builder = Nokogiri::HTML::Builder.new do
82
- div.rad.thing! {
83
- text "<awesome>"
84
- b "hello"
85
- }
86
- end
87
- assert_equal(
88
- '<div class="rad" id="thing">&lt;awesome&gt;<b>hello</b></div>',
89
- builder.doc.root.to_html.gsub(/\n/, ''))
90
- end
91
-
92
- def test_attributes_plus_block
93
- builder = Nokogiri::HTML::Builder.new do
94
- div.rad.thing! {
95
- text "<awesome>"
96
- }
97
- end
98
- assert_equal('<div class="rad" id="thing">&lt;awesome&gt;</div>',
99
- builder.doc.root.to_html.chomp)
100
- end
101
-
102
- def test_builder_adds_attributes
103
- builder = Nokogiri::HTML::Builder.new do
104
- div.rad.thing! "tender div"
105
- end
106
- assert_equal('<div class="rad" id="thing">tender div</div>',
107
- builder.doc.root.to_html.chomp)
108
- end
109
-
110
- def test_bold_tag
111
- builder = Nokogiri::HTML::Builder.new do
112
- b "bold tag"
113
- end
114
- assert_equal('<b>bold tag</b>', builder.doc.root.to_html.chomp)
115
- end
116
-
117
- def test_html_then_body_tag
118
- builder = Nokogiri::HTML::Builder.new do
119
- html {
120
- body {
121
- b "bold tag"
122
- }
123
- }
124
- end
125
- assert_equal('<html><body><b>bold tag</b></body></html>',
126
- builder.doc.root.to_html.chomp.gsub(/>\s*</, '><'))
127
- end
128
-
129
- def test_instance_eval_with_delegation_to_block_context
130
- class << self
131
- def foo
132
- "foo!"
133
- end
134
- end
135
-
136
- builder = Nokogiri::HTML::Builder.new { text foo }
137
- assert builder.to_html.include?("foo!")
138
- end
139
-
140
- def test_builder_with_param
141
- doc = Nokogiri::HTML::Builder.new { |html|
142
- html.body {
143
- html.p "hello world"
144
- }
145
- }.doc
146
-
147
- assert node = doc.xpath('//body/p').first
148
- assert_equal 'hello world', node.content
149
- end
150
-
151
- def test_builder_with_id
152
- text = "hello world"
153
- doc = Nokogiri::HTML::Builder.new { |html|
154
- html.body {
155
- html.id_ text
156
- }
157
- }.doc
158
-
159
- assert node = doc.xpath('//body/id').first
160
- assert_equal text, node.content
161
- end
162
- end
163
- end
164
- end