nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -0,0 +1,145 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "helper"
3
+
4
+ module Nokogiri
5
+ module HTML
6
+ class TestDocumentEncoding < Nokogiri::TestCase
7
+ def test_encoding
8
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
9
+
10
+ hello = "こんにちは"
11
+
12
+ assert_match doc.encoding, doc.to_html
13
+ assert_match hello.encode('Shift_JIS'), doc.to_html
14
+ assert_equal 'Shift_JIS', doc.to_html.encoding.name
15
+
16
+ assert_match hello, doc.to_html(:encoding => 'UTF-8')
17
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
18
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
19
+ end
20
+
21
+ def test_encoding_without_charset
22
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
23
+
24
+ hello = "こんにちは"
25
+
26
+ assert_match hello, doc.content
27
+ assert_match hello, doc.to_html(:encoding => 'UTF-8')
28
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
29
+ end
30
+
31
+ def test_default_to_encoding_from_string
32
+ bad_charset = <<-eohtml
33
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
34
+ <html>
35
+ <head>
36
+ <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
37
+ </head>
38
+ <body>
39
+ <a href="http://tenderlovemaking.com/">blah!</a>
40
+ </body>
41
+ </html>
42
+ eohtml
43
+ doc = Nokogiri::HTML(bad_charset)
44
+ assert_equal bad_charset.encoding.name, doc.encoding
45
+
46
+ doc = Nokogiri.parse(bad_charset)
47
+ assert_equal bad_charset.encoding.name, doc.encoding
48
+ end
49
+
50
+ def test_encoding_non_utf8
51
+ orig = '日本語が上手です'
52
+ bin = Encoding::ASCII_8BIT
53
+ [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
54
+ html = <<-eohtml.encode(enc)
55
+ <html>
56
+ <meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
57
+ <title xml:lang="ja">#{orig}</title></html>
58
+ eohtml
59
+ text = Nokogiri::HTML.parse(html).at('title').inner_text
60
+ assert_equal(
61
+ orig.encode(enc).force_encoding(bin),
62
+ text.encode(enc).force_encoding(bin)
63
+ )
64
+ end
65
+ end
66
+
67
+ def test_encoding_with_a_bad_name
68
+ bad_charset = <<-eohtml
69
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
70
+ <html>
71
+ <head>
72
+ <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
73
+ </head>
74
+ <body>
75
+ <a href="http://tenderlovemaking.com/">blah!</a>
76
+ </body>
77
+ </html>
78
+ eohtml
79
+ doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
80
+ assert_equal ['http://tenderlovemaking.com/'],
81
+ doc.css('a').map { |a| a['href'] }
82
+ end
83
+
84
+ def test_empty_doc_encoding
85
+ encoding = 'US-ASCII'
86
+ assert_equal encoding, Nokogiri::HTML.parse(nil, nil, encoding).encoding
87
+ end
88
+ end
89
+
90
+ class TestDocumentEncodingDetection < Nokogiri::TestCase
91
+ def binread(file)
92
+ IO.binread(file)
93
+ end
94
+
95
+ def binopen(file)
96
+ File.open(file, 'rb')
97
+ end
98
+
99
+ def test_document_html_noencoding
100
+ from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
101
+ from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
102
+
103
+ assert_equal from_string.to_s.size, from_stream.to_s.size
104
+ end
105
+
106
+ def test_document_html_charset
107
+ html = Nokogiri::HTML(binopen(METACHARSET_FILE))
108
+ assert_equal 'iso-2022-jp', html.encoding
109
+ assert_equal 'たこ焼き仮面', html.title
110
+ end
111
+
112
+ def test_document_xhtml_enc
113
+ [ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
114
+ doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
115
+ ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map(&:text)
116
+
117
+ doc_from_string = Nokogiri::HTML(binread(file))
118
+ ary_from_string = doc_from_string.xpath('//p/text()').map(&:text)
119
+
120
+ doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
121
+ ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map(&:text)
122
+
123
+ doc_from_file = Nokogiri::HTML(binopen(file))
124
+ ary_from_file = doc_from_file.xpath('//p/text()').map(&:text)
125
+
126
+ title = 'たこ焼き仮面'
127
+
128
+ assert_equal(title, doc_from_string_enc.at('//title/text()').text)
129
+ assert_equal(title, doc_from_string.at('//title/text()').text)
130
+ assert_equal(title, doc_from_file_enc.at('//title/text()').text)
131
+ unless Nokogiri.jruby? && file == ENCODING_HTML_FILE
132
+ assert_equal(title, doc_from_file.at('//title/text()').text)
133
+ end
134
+
135
+ evil = (0..72).map { |i| '超' * i + '悪い事を構想中。' }
136
+
137
+ assert_equal(evil, ary_from_string_enc)
138
+ assert_equal(evil, ary_from_string)
139
+ assert_equal(evil, ary_from_file_enc)
140
+ assert_equal(evil, ary_from_file)
141
+ }
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,301 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "helper"
3
+
4
+ module Nokogiri
5
+ module HTML
6
+ class TestDocumentFragment < Nokogiri::TestCase
7
+ def setup
8
+ super
9
+ @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
10
+ end
11
+
12
+ def test_inspect_encoding
13
+ fragment = "<div>こんにちは!</div>".encode('EUC-JP')
14
+ f = Nokogiri::HTML::DocumentFragment.parse fragment
15
+ assert_equal "こんにちは!", f.content
16
+ end
17
+
18
+ def test_html_parse_encoding
19
+ fragment = "<div>こんにちは!</div>".encode 'EUC-JP'
20
+ f = Nokogiri::HTML.fragment fragment
21
+ assert_equal 'EUC-JP', f.document.encoding
22
+ assert_equal "こんにちは!", f.content
23
+ end
24
+
25
+ def test_unlink_empty_document
26
+ frag = Nokogiri::HTML::DocumentFragment.parse('').unlink # must_not_raise
27
+ assert_nil frag.parent
28
+ end
29
+
30
+ def test_colons_are_not_removed
31
+ doc = Nokogiri::HTML::DocumentFragment.parse("<span>3:30pm</span>")
32
+ assert_match(/3:30/, doc.to_s)
33
+ end
34
+
35
+ def test_parse_encoding
36
+ fragment = "<div>hello world</div>"
37
+ f = Nokogiri::HTML::DocumentFragment.parse fragment, 'ISO-8859-1'
38
+ assert_equal 'ISO-8859-1', f.document.encoding
39
+ assert_equal "hello world", f.content
40
+ end
41
+
42
+ def test_html_parse_with_encoding
43
+ fragment = "<div>hello world</div>"
44
+ f = Nokogiri::HTML.fragment fragment, 'ISO-8859-1'
45
+ assert_equal 'ISO-8859-1', f.document.encoding
46
+ assert_equal "hello world", f.content
47
+ end
48
+
49
+ def test_parse_in_context
50
+ assert_equal('<br>', @html.root.parse('<br />').to_s)
51
+ end
52
+
53
+ def test_inner_html=
54
+ fragment = Nokogiri::HTML.fragment '<hr />'
55
+
56
+ fragment.inner_html = "hello"
57
+ assert_equal 'hello', fragment.inner_html
58
+ end
59
+
60
+ def test_ancestors_search
61
+ html = %q{
62
+ <div>
63
+ <ul>
64
+ <li>foo</li>
65
+ </ul>
66
+ </div>
67
+ }
68
+ fragment = Nokogiri::HTML.fragment html
69
+ li = fragment.at('li')
70
+ assert li.matches?('li')
71
+ end
72
+
73
+ def test_fun_encoding
74
+ string = %Q(<body>こんにちは</body>)
75
+ html = Nokogiri::HTML::DocumentFragment.parse(
76
+ string
77
+ ).to_html(:encoding => 'UTF-8')
78
+ assert_equal string, html
79
+ end
80
+
81
+ def test_new
82
+ assert Nokogiri::HTML::DocumentFragment.new(@html)
83
+ end
84
+
85
+ def test_body_fragment_should_contain_body
86
+ fragment = Nokogiri::HTML::DocumentFragment.parse(" <body><div>foo</div></body>")
87
+ assert_match(/^<body>/, fragment.to_s)
88
+ end
89
+
90
+ def test_nonbody_fragment_should_not_contain_body
91
+ fragment = Nokogiri::HTML::DocumentFragment.parse("<div>foo</div>")
92
+ assert_match(/^<div>/, fragment.to_s)
93
+ end
94
+
95
+ def test_fragment_should_have_document
96
+ fragment = Nokogiri::HTML::DocumentFragment.new(@html)
97
+ assert_equal @html, fragment.document
98
+ end
99
+
100
+ def test_empty_fragment_should_be_searchable_by_css
101
+ fragment = Nokogiri::HTML.fragment("")
102
+ assert_equal 0, fragment.css("a").size
103
+ end
104
+
105
+ def test_empty_fragment_should_be_searchable
106
+ fragment = Nokogiri::HTML.fragment("")
107
+ assert_equal 0, fragment.search("//a").size
108
+ end
109
+
110
+ def test_name
111
+ fragment = Nokogiri::HTML::DocumentFragment.new(@html)
112
+ assert_equal '#document-fragment', fragment.name
113
+ end
114
+
115
+ def test_static_method
116
+ fragment = Nokogiri::HTML::DocumentFragment.parse("<div>a</div>")
117
+ assert_instance_of Nokogiri::HTML::DocumentFragment, fragment
118
+ end
119
+
120
+ def test_many_fragments
121
+ 100.times { Nokogiri::HTML::DocumentFragment.new(@html) }
122
+ end
123
+
124
+ def test_subclass
125
+ klass = Class.new(Nokogiri::HTML::DocumentFragment)
126
+ fragment = klass.new(@html, "<div>a</div>")
127
+ assert_instance_of klass, fragment
128
+ end
129
+
130
+ def test_subclass_parse
131
+ klass = Class.new(Nokogiri::HTML::DocumentFragment)
132
+ doc = klass.parse("<div>a</div>")
133
+ assert_instance_of klass, doc
134
+ end
135
+
136
+ def test_html_fragment
137
+ fragment = Nokogiri::HTML.fragment("<div>a</div>")
138
+ assert_equal "<div>a</div>", fragment.to_s
139
+ end
140
+
141
+ def test_html_fragment_has_outer_text
142
+ doc = "a<div>b</div>c"
143
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
144
+ if Nokogiri.uses_libxml? &&
145
+ Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16"
146
+ assert_equal "a<div>b</div><p>c</p>", fragment.to_s
147
+ else
148
+ assert_equal "a<div>b</div>c", fragment.to_s
149
+ end
150
+ end
151
+
152
+ def test_html_fragment_case_insensitivity
153
+ doc = "<Div>b</Div>"
154
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
155
+ assert_equal "<div>b</div>", fragment.to_s
156
+ end
157
+
158
+ def test_html_fragment_with_leading_whitespace
159
+ doc = " <div>b</div> "
160
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
161
+ assert_match %r% <div>b</div> *%, fragment.to_s
162
+ end
163
+
164
+ def test_html_fragment_with_leading_whitespace_and_newline
165
+ doc = " \n<div>b</div> "
166
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
167
+ assert_match %r% \n<div>b</div> *%, fragment.to_s
168
+ end
169
+
170
+ def test_html_fragment_with_input_and_intermediate_whitespace
171
+ doc = "<label>Label</label><input type=\"text\"> <span>span</span>"
172
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
173
+ assert_equal "<label>Label</label><input type=\"text\"> <span>span</span>", fragment.to_s
174
+ end
175
+
176
+ def test_html_fragment_with_leading_text_and_newline
177
+ fragment = HTML::Document.new.fragment("First line\nSecond line<br>Broken line")
178
+ assert_equal fragment.to_s, "First line\nSecond line<br>Broken line"
179
+ end
180
+
181
+ def test_html_fragment_with_leading_whitespace_and_text_and_newline
182
+ fragment = HTML::Document.new.fragment(" First line\nSecond line<br>Broken line")
183
+ assert_equal " First line\nSecond line<br>Broken line", fragment.to_s
184
+ end
185
+
186
+ def test_html_fragment_with_leading_entity
187
+ failed = "&quot;test<br/>test&quot;"
188
+ fragment = Nokogiri::HTML::DocumentFragment.parse(failed)
189
+ assert_equal '"test<br>test"', fragment.to_html
190
+ end
191
+
192
+ def test_to_s
193
+ doc = "<span>foo<br></span><span>bar</span>"
194
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
195
+ assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_s
196
+ end
197
+
198
+ def test_to_html
199
+ doc = "<span>foo<br></span><span>bar</span>"
200
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
201
+ assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_html
202
+ end
203
+
204
+ def test_to_xhtml
205
+ doc = "<span>foo<br></span><span>bar</span><p></p>"
206
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
207
+ if Nokogiri.jruby? || Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
208
+ assert_equal "<span>foo<br /></span><span>bar</span><p></p>", fragment.to_xhtml
209
+ else
210
+ # FIXME: why are we doing this ? this violates the spec,
211
+ # see http://www.w3.org/TR/xhtml1/#C_2
212
+ assert_equal "<span>foo<br></span><span>bar</span><p></p>", fragment.to_xhtml
213
+ end
214
+ end
215
+
216
+ def test_to_xml
217
+ doc = "<span>foo<br></span><span>bar</span>"
218
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
219
+ assert_equal "<span>foo<br/></span><span>bar</span>", fragment.to_xml
220
+ end
221
+
222
+ def test_fragment_script_tag_with_cdata
223
+ doc = HTML::Document.new
224
+ fragment = doc.fragment("<script>var foo = 'bar';</script>")
225
+ assert_equal("<script>var foo = 'bar';</script>",
226
+ fragment.to_s)
227
+ end
228
+
229
+ def test_fragment_with_comment
230
+ doc = HTML::Document.new
231
+ fragment = doc.fragment("<p>hello<!-- your ad here --></p>")
232
+ assert_equal("<p>hello<!-- your ad here --></p>",
233
+ fragment.to_s)
234
+ end
235
+
236
+ def test_element_children_counts
237
+ doc = Nokogiri::HTML::DocumentFragment.parse(" <div> </div>\n ")
238
+ assert_equal 1, doc.element_children.count
239
+ end
240
+
241
+ def test_malformed_fragment_is_corrected
242
+ fragment = HTML::DocumentFragment.parse("<div </div>")
243
+ assert_equal "<div></div>", fragment.to_s
244
+ end
245
+
246
+ def test_unclosed_script_tag
247
+ # see GH#315
248
+ fragment = HTML::DocumentFragment.parse("foo <script>bar")
249
+ assert_equal "foo <script>bar</script>", fragment.to_html
250
+ end
251
+
252
+ def test_error_propagation_on_fragment_parse
253
+ frag = Nokogiri::HTML::DocumentFragment.parse "<hello>oh, hello there.</hello>"
254
+ assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be copied to the fragment"
255
+ end
256
+
257
+ def test_error_propagation_on_fragment_parse_in_node_context
258
+ doc = Nokogiri::HTML::Document.parse "<html><body><div></div></body></html>"
259
+ context_node = doc.at_css "div"
260
+ frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
261
+ assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
262
+ end
263
+
264
+ def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors
265
+ doc = Nokogiri::HTML::Document.parse "<html><body><div></div><jimmy></jimmy></body></html>"
266
+ assert doc.errors.any?{|err| err.to_s =~ /jimmy/}, "assert on setup"
267
+
268
+ context_node = doc.at_css "div"
269
+ frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
270
+ assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
271
+ assert frag.errors.none?{|err| err.to_s =~ /jimmy/}, "errors should not include pre-existing document errors"
272
+ end
273
+
274
+ def test_capturing_nonparse_errors_during_fragment_clone
275
+ # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
276
+ original = Nokogiri::HTML.fragment("<div id='unique'></div><div id='unique'></div>")
277
+ original_errors = original.errors.dup
278
+
279
+ copy = original.dup
280
+ assert_equal original_errors, copy.errors
281
+ end
282
+
283
+ def test_capturing_nonparse_errors_during_node_copy_between_fragments
284
+ # Errors should be emitted while parsing only, and should not change when moving nodes.
285
+ frag1 = Nokogiri::HTML.fragment("<diva id='unique'>one</diva>")
286
+ frag2 = Nokogiri::HTML.fragment("<dive id='unique'>two</dive>")
287
+ node1 = frag1.at_css("#unique")
288
+ node2 = frag2.at_css("#unique")
289
+ original_errors1 = frag1.errors.dup
290
+ original_errors2 = frag2.errors.dup
291
+ assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
292
+ assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
293
+
294
+ node1.add_child node2
295
+
296
+ assert_equal original_errors1, frag1.errors
297
+ assert_equal original_errors2, frag2.errors
298
+ end
299
+ end
300
+ end
301
+ end
@@ -0,0 +1,105 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestElementDescription < Nokogiri::TestCase
6
+ def test_fetch_nonexistent
7
+ assert_nil ElementDescription['foo']
8
+ end
9
+
10
+ def test_fetch_element_description
11
+ assert desc = ElementDescription['a']
12
+ assert_instance_of ElementDescription, desc
13
+ end
14
+
15
+ def test_name
16
+ assert_equal 'a', ElementDescription['a'].name
17
+ end
18
+
19
+ def test_implied_start_tag?
20
+ assert !ElementDescription['a'].implied_start_tag?
21
+ end
22
+
23
+ def test_implied_end_tag?
24
+ assert !ElementDescription['a'].implied_end_tag?
25
+ assert ElementDescription['p'].implied_end_tag?
26
+ end
27
+
28
+ def test_save_end_tag?
29
+ assert !ElementDescription['a'].save_end_tag?
30
+ assert ElementDescription['br'].save_end_tag?
31
+ end
32
+
33
+ def test_empty?
34
+ assert ElementDescription['br'].empty?
35
+ assert !ElementDescription['a'].empty?
36
+ end
37
+
38
+ def test_deprecated?
39
+ assert ElementDescription['applet'].deprecated?
40
+ assert !ElementDescription['br'].deprecated?
41
+ end
42
+
43
+ def test_inline?
44
+ assert ElementDescription['a'].inline?
45
+ assert !ElementDescription['div'].inline?
46
+ end
47
+
48
+ def test_block?
49
+ element = ElementDescription['a']
50
+ assert_equal(!element.inline?, element.block?)
51
+ end
52
+
53
+ def test_description
54
+ assert ElementDescription['a'].description
55
+ end
56
+
57
+ def test_subelements
58
+ sub_elements = ElementDescription['body'].sub_elements
59
+ if Nokogiri.uses_libxml? && Nokogiri::LIBXML_VERSION >= '2.7.7'
60
+ assert_equal 65, sub_elements.length
61
+ elsif Nokogiri.uses_libxml?
62
+ assert_equal 61, sub_elements.length
63
+ else
64
+ assert sub_elements.length > 0
65
+ end
66
+ end
67
+
68
+ def test_default_sub_element
69
+ assert_equal 'div', ElementDescription['body'].default_sub_element
70
+ end
71
+
72
+ def test_null_default_sub_element
73
+ doc = Nokogiri::HTML('foo')
74
+ doc.root.description.default_sub_element
75
+ end
76
+
77
+ def test_optional_attributes
78
+ attrs = ElementDescription['table'].optional_attributes
79
+ assert attrs
80
+ end
81
+
82
+ def test_deprecated_attributes
83
+ attrs = ElementDescription['table'].deprecated_attributes
84
+ assert attrs
85
+ assert_equal 2, attrs.length
86
+ end
87
+
88
+ def test_required_attributes
89
+ attrs = ElementDescription['table'].required_attributes
90
+ assert attrs
91
+ assert_equal 0, attrs.length
92
+ end
93
+
94
+ def test_inspect
95
+ desc = ElementDescription['input']
96
+ assert_match desc.name, desc.inspect
97
+ end
98
+
99
+ def test_to_s
100
+ desc = ElementDescription['input']
101
+ assert_match desc.name, desc.to_s
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,14 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestNamedCharacters < Nokogiri::TestCase
6
+ def test_named_character
7
+ copy = NamedCharacters.get('copy')
8
+ assert_equal 169, NamedCharacters['copy']
9
+ assert_equal copy.value, NamedCharacters['copy']
10
+ assert copy.description
11
+ end
12
+ end
13
+ end
14
+ end