nokogiri 1.2.3-x86-mswin32-60 → 1.4.5-x86-mswin32-60

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (319) hide show
  1. data/.autotest +18 -7
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.ja.rdoc +297 -3
  4. data/CHANGELOG.rdoc +289 -0
  5. data/Manifest.txt +148 -37
  6. data/README.ja.rdoc +20 -20
  7. data/README.rdoc +53 -22
  8. data/Rakefile +127 -211
  9. data/bin/nokogiri +54 -0
  10. data/ext/nokogiri/depend +358 -0
  11. data/ext/nokogiri/extconf.rb +89 -54
  12. data/ext/nokogiri/html_document.c +34 -27
  13. data/ext/nokogiri/html_document.h +1 -1
  14. data/ext/nokogiri/html_element_description.c +276 -0
  15. data/ext/nokogiri/html_element_description.h +10 -0
  16. data/ext/nokogiri/html_entity_lookup.c +7 -5
  17. data/ext/nokogiri/html_entity_lookup.h +1 -1
  18. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  19. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  20. data/ext/nokogiri/{native.c → nokogiri.c} +31 -7
  21. data/ext/nokogiri/{native.h → nokogiri.h} +68 -41
  22. data/ext/nokogiri/xml_attr.c +20 -9
  23. data/ext/nokogiri/xml_attr.h +1 -1
  24. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  25. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +21 -9
  27. data/ext/nokogiri/xml_cdata.h +1 -1
  28. data/ext/nokogiri/xml_comment.c +18 -6
  29. data/ext/nokogiri/xml_comment.h +1 -1
  30. data/ext/nokogiri/xml_document.c +247 -68
  31. data/ext/nokogiri/xml_document.h +5 -3
  32. data/ext/nokogiri/xml_document_fragment.c +15 -7
  33. data/ext/nokogiri/xml_document_fragment.h +1 -1
  34. data/ext/nokogiri/xml_dtd.c +110 -10
  35. data/ext/nokogiri/xml_dtd.h +3 -1
  36. data/ext/nokogiri/xml_element_content.c +123 -0
  37. data/ext/nokogiri/xml_element_content.h +10 -0
  38. data/ext/nokogiri/xml_element_decl.c +69 -0
  39. data/ext/nokogiri/xml_element_decl.h +9 -0
  40. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  41. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  42. data/ext/nokogiri/xml_entity_decl.c +110 -0
  43. data/ext/nokogiri/xml_entity_decl.h +10 -0
  44. data/ext/nokogiri/xml_entity_reference.c +16 -5
  45. data/ext/nokogiri/xml_entity_reference.h +1 -1
  46. data/ext/nokogiri/xml_io.c +40 -8
  47. data/ext/nokogiri/xml_io.h +2 -1
  48. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  49. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  50. data/ext/nokogiri/xml_namespace.c +84 -0
  51. data/ext/nokogiri/xml_namespace.h +13 -0
  52. data/ext/nokogiri/xml_node.c +782 -225
  53. data/ext/nokogiri/xml_node.h +2 -4
  54. data/ext/nokogiri/xml_node_set.c +253 -34
  55. data/ext/nokogiri/xml_node_set.h +2 -2
  56. data/ext/nokogiri/xml_processing_instruction.c +17 -5
  57. data/ext/nokogiri/xml_processing_instruction.h +1 -1
  58. data/ext/nokogiri/xml_reader.c +277 -85
  59. data/ext/nokogiri/xml_reader.h +1 -1
  60. data/ext/nokogiri/xml_relax_ng.c +168 -0
  61. data/ext/nokogiri/xml_relax_ng.h +9 -0
  62. data/ext/nokogiri/xml_sax_parser.c +183 -111
  63. data/ext/nokogiri/xml_sax_parser.h +30 -1
  64. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  65. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  66. data/ext/nokogiri/xml_sax_push_parser.c +42 -12
  67. data/ext/nokogiri/xml_sax_push_parser.h +1 -1
  68. data/ext/nokogiri/xml_schema.c +205 -0
  69. data/ext/nokogiri/xml_schema.h +9 -0
  70. data/ext/nokogiri/xml_syntax_error.c +28 -173
  71. data/ext/nokogiri/xml_syntax_error.h +2 -1
  72. data/ext/nokogiri/xml_text.c +16 -6
  73. data/ext/nokogiri/xml_text.h +1 -1
  74. data/ext/nokogiri/xml_xpath_context.c +104 -47
  75. data/ext/nokogiri/xml_xpath_context.h +1 -1
  76. data/ext/nokogiri/xslt_stylesheet.c +161 -19
  77. data/ext/nokogiri/xslt_stylesheet.h +1 -1
  78. data/lib/nokogiri.rb +47 -8
  79. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  80. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  81. data/lib/nokogiri/css.rb +6 -3
  82. data/lib/nokogiri/css/node.rb +14 -12
  83. data/lib/nokogiri/css/parser.rb +665 -62
  84. data/lib/nokogiri/css/parser.y +20 -10
  85. data/lib/nokogiri/css/parser_extras.rb +91 -0
  86. data/lib/nokogiri/css/tokenizer.rb +148 -5
  87. data/lib/nokogiri/css/tokenizer.rex +10 -9
  88. data/lib/nokogiri/css/xpath_visitor.rb +47 -44
  89. data/lib/nokogiri/decorators/slop.rb +8 -4
  90. data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
  91. data/lib/nokogiri/ffi/html/document.rb +28 -0
  92. data/lib/nokogiri/ffi/html/element_description.rb +81 -0
  93. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  94. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  95. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  96. data/lib/nokogiri/ffi/libxml.rb +420 -0
  97. data/lib/nokogiri/ffi/structs/common_node.rb +38 -0
  98. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  99. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  100. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_attr.rb +20 -0
  102. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  103. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  104. data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
  105. data/lib/nokogiri/ffi/structs/xml_document.rb +117 -0
  106. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  107. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  108. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  109. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  110. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  111. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  112. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  113. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  114. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  115. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xml_parser_input.rb +19 -0
  117. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  118. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  119. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
  120. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  121. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  122. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  123. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +38 -0
  124. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  125. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  126. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  127. data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
  128. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  129. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  132. data/lib/nokogiri/ffi/xml/document.rb +174 -0
  133. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  134. data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
  135. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  136. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  137. data/lib/nokogiri/ffi/xml/entity_decl.rb +36 -0
  138. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  139. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/ffi/xml/node.rb +559 -0
  141. data/lib/nokogiri/ffi/xml/node_set.rb +150 -0
  142. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  143. data/lib/nokogiri/ffi/xml/reader.rb +236 -0
  144. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  145. data/lib/nokogiri/ffi/xml/sax/parser.rb +143 -0
  146. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +79 -0
  147. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +51 -0
  148. data/lib/nokogiri/ffi/xml/schema.rb +109 -0
  149. data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
  150. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  151. data/lib/nokogiri/ffi/xml/xpath.rb +9 -0
  152. data/lib/nokogiri/ffi/xml/xpath_context.rb +153 -0
  153. data/lib/nokogiri/ffi/xslt/stylesheet.rb +77 -0
  154. data/lib/nokogiri/html.rb +13 -47
  155. data/lib/nokogiri/html/builder.rb +27 -1
  156. data/lib/nokogiri/html/document.rb +201 -7
  157. data/lib/nokogiri/html/document_fragment.rb +41 -0
  158. data/lib/nokogiri/html/element_description.rb +23 -0
  159. data/lib/nokogiri/html/entity_lookup.rb +2 -0
  160. data/lib/nokogiri/html/sax/parser.rb +34 -3
  161. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  162. data/lib/nokogiri/nokogiri.rb +1 -0
  163. data/lib/nokogiri/version.rb +40 -1
  164. data/lib/nokogiri/version_warning.rb +14 -0
  165. data/lib/nokogiri/xml.rb +32 -53
  166. data/lib/nokogiri/xml/attr.rb +5 -0
  167. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  168. data/lib/nokogiri/xml/builder.rb +349 -29
  169. data/lib/nokogiri/xml/cdata.rb +3 -1
  170. data/lib/nokogiri/xml/character_data.rb +7 -0
  171. data/lib/nokogiri/xml/document.rb +166 -14
  172. data/lib/nokogiri/xml/document_fragment.rb +76 -1
  173. data/lib/nokogiri/xml/dtd.rb +16 -3
  174. data/lib/nokogiri/xml/element_content.rb +36 -0
  175. data/lib/nokogiri/xml/element_decl.rb +13 -0
  176. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  177. data/lib/nokogiri/xml/namespace.rb +13 -0
  178. data/lib/nokogiri/xml/node.rb +561 -166
  179. data/lib/nokogiri/xml/node/save_options.rb +22 -2
  180. data/lib/nokogiri/xml/node_set.rb +202 -40
  181. data/lib/nokogiri/xml/parse_options.rb +93 -0
  182. data/lib/nokogiri/xml/pp.rb +2 -0
  183. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  184. data/lib/nokogiri/xml/pp/node.rb +56 -0
  185. data/lib/nokogiri/xml/processing_instruction.rb +2 -0
  186. data/lib/nokogiri/xml/reader.rb +93 -8
  187. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  188. data/lib/nokogiri/xml/sax.rb +1 -7
  189. data/lib/nokogiri/xml/sax/document.rb +107 -2
  190. data/lib/nokogiri/xml/sax/parser.rb +57 -7
  191. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  192. data/lib/nokogiri/xml/sax/push_parser.rb +13 -1
  193. data/lib/nokogiri/xml/schema.rb +63 -0
  194. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  195. data/lib/nokogiri/xml/text.rb +4 -1
  196. data/lib/nokogiri/xml/xpath.rb +1 -1
  197. data/lib/nokogiri/xml/xpath/syntax_error.rb +3 -0
  198. data/lib/nokogiri/xml/xpath_context.rb +2 -0
  199. data/lib/nokogiri/xslt.rb +26 -2
  200. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  201. data/lib/xsd/xmlparser/nokogiri.rb +45 -9
  202. data/tasks/cross_compile.rb +173 -0
  203. data/tasks/test.rb +25 -69
  204. data/test/css/test_nthiness.rb +3 -4
  205. data/test/css/test_parser.rb +75 -20
  206. data/test/css/test_tokenizer.rb +23 -1
  207. data/test/css/test_xpath_visitor.rb +10 -1
  208. data/test/decorators/test_slop.rb +16 -0
  209. data/test/ffi/test_document.rb +35 -0
  210. data/test/files/2ch.html +108 -0
  211. data/test/files/address_book.rlx +12 -0
  212. data/test/files/address_book.xml +10 -0
  213. data/test/files/bar/bar.xsd +4 -0
  214. data/test/files/encoding.html +82 -0
  215. data/test/files/encoding.xhtml +84 -0
  216. data/test/files/foo/foo.xsd +4 -0
  217. data/test/files/po.xml +32 -0
  218. data/test/files/po.xsd +66 -0
  219. data/test/files/shift_jis.html +10 -0
  220. data/test/files/shift_jis.xml +5 -0
  221. data/test/files/snuggles.xml +3 -0
  222. data/test/files/staff.dtd +10 -0
  223. data/test/files/valid_bar.xml +2 -0
  224. data/test/helper.rb +101 -23
  225. data/test/html/sax/test_parser.rb +81 -2
  226. data/test/html/sax/test_parser_context.rb +48 -0
  227. data/test/html/test_builder.rb +39 -8
  228. data/test/html/test_document.rb +186 -23
  229. data/test/html/test_document_encoding.rb +78 -1
  230. data/test/html/test_document_fragment.rb +253 -0
  231. data/test/html/test_element_description.rb +98 -0
  232. data/test/html/test_named_characters.rb +1 -1
  233. data/test/html/test_node.rb +124 -36
  234. data/test/html/test_node_encoding.rb +27 -0
  235. data/test/test_convert_xpath.rb +1 -52
  236. data/test/test_css_cache.rb +2 -13
  237. data/test/test_encoding_handler.rb +46 -0
  238. data/test/test_memory_leak.rb +88 -19
  239. data/test/test_nokogiri.rb +38 -5
  240. data/test/test_reader.rb +188 -6
  241. data/test/test_soap4r_sax.rb +52 -0
  242. data/test/test_xslt_transforms.rb +183 -83
  243. data/test/xml/node/test_save_options.rb +1 -1
  244. data/test/xml/node/test_subclass.rb +44 -0
  245. data/test/xml/sax/test_parser.rb +175 -4
  246. data/test/xml/sax/test_parser_context.rb +113 -0
  247. data/test/xml/sax/test_push_parser.rb +90 -2
  248. data/test/xml/test_attr.rb +35 -1
  249. data/test/xml/test_attribute_decl.rb +82 -0
  250. data/test/xml/test_builder.rb +186 -1
  251. data/test/xml/test_cdata.rb +32 -1
  252. data/test/xml/test_comment.rb +13 -1
  253. data/test/xml/test_document.rb +415 -43
  254. data/test/xml/test_document_encoding.rb +1 -1
  255. data/test/xml/test_document_fragment.rb +173 -5
  256. data/test/xml/test_dtd.rb +61 -6
  257. data/test/xml/test_dtd_encoding.rb +3 -1
  258. data/test/xml/test_element_content.rb +56 -0
  259. data/test/xml/test_element_decl.rb +73 -0
  260. data/test/xml/test_entity_decl.rb +120 -0
  261. data/test/xml/test_entity_reference.rb +5 -1
  262. data/test/xml/test_namespace.rb +68 -0
  263. data/test/xml/test_node.rb +546 -201
  264. data/test/xml/test_node_attributes.rb +34 -0
  265. data/test/xml/test_node_encoding.rb +33 -3
  266. data/test/xml/test_node_reparenting.rb +321 -0
  267. data/test/xml/test_node_set.rb +538 -2
  268. data/test/xml/test_parse_options.rb +52 -0
  269. data/test/xml/test_processing_instruction.rb +6 -1
  270. data/test/xml/test_reader_encoding.rb +1 -1
  271. data/test/xml/test_relax_ng.rb +60 -0
  272. data/test/xml/test_schema.rb +94 -0
  273. data/test/xml/test_syntax_error.rb +12 -0
  274. data/test/xml/test_text.rb +35 -1
  275. data/test/xml/test_unparented_node.rb +5 -5
  276. data/test/xml/test_xpath.rb +142 -11
  277. data/test/xslt/test_custom_functions.rb +94 -0
  278. metadata +328 -92
  279. data/ext/nokogiri/html_sax_parser.c +0 -57
  280. data/ext/nokogiri/html_sax_parser.h +0 -11
  281. data/ext/nokogiri/iconv.dll +0 -0
  282. data/ext/nokogiri/libexslt.dll +0 -0
  283. data/ext/nokogiri/libxml2.dll +0 -0
  284. data/ext/nokogiri/libxslt.dll +0 -0
  285. data/ext/nokogiri/native.so +0 -0
  286. data/ext/nokogiri/xml_xpath.c +0 -53
  287. data/ext/nokogiri/xml_xpath.h +0 -11
  288. data/ext/nokogiri/zlib1.dll +0 -0
  289. data/lib/action-nokogiri.rb +0 -30
  290. data/lib/nokogiri/css/generated_parser.rb +0 -713
  291. data/lib/nokogiri/css/generated_tokenizer.rb +0 -144
  292. data/lib/nokogiri/decorators.rb +0 -2
  293. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  294. data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
  295. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
  296. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -28
  297. data/lib/nokogiri/hpricot.rb +0 -51
  298. data/lib/nokogiri/xml/comment.rb +0 -6
  299. data/lib/nokogiri/xml/element.rb +0 -6
  300. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  301. data/lib/nokogiri/xml/fragment_handler.rb +0 -34
  302. data/test/hpricot/files/basic.xhtml +0 -17
  303. data/test/hpricot/files/boingboing.html +0 -2266
  304. data/test/hpricot/files/cy0.html +0 -3653
  305. data/test/hpricot/files/immob.html +0 -400
  306. data/test/hpricot/files/pace_application.html +0 -1320
  307. data/test/hpricot/files/tenderlove.html +0 -16
  308. data/test/hpricot/files/uswebgen.html +0 -220
  309. data/test/hpricot/files/utf8.html +0 -1054
  310. data/test/hpricot/files/week9.html +0 -1723
  311. data/test/hpricot/files/why.xml +0 -19
  312. data/test/hpricot/load_files.rb +0 -11
  313. data/test/hpricot/test_alter.rb +0 -68
  314. data/test/hpricot/test_builder.rb +0 -20
  315. data/test/hpricot/test_parser.rb +0 -426
  316. data/test/hpricot/test_paths.rb +0 -15
  317. data/test/hpricot/test_preserved.rb +0 -77
  318. data/test/hpricot/test_xml.rb +0 -30
  319. data/test/test_gc.rb +0 -15
@@ -1,10 +1,24 @@
1
1
  # -*- coding: utf-8 -*-
2
- require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require "helper"
3
3
 
4
4
  module Nokogiri
5
5
  module HTML
6
6
  if RUBY_VERSION =~ /^1\.9/
7
7
  class TestDocumentEncoding < Nokogiri::TestCase
8
+ def test_encoding
9
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
10
+
11
+ hello = "こんにちは"
12
+
13
+ assert_match doc.encoding, doc.to_html
14
+ assert_match hello.encode('Shift_JIS'), doc.to_html
15
+ assert_equal 'Shift_JIS', doc.to_html.encoding.name
16
+
17
+ assert_match hello, doc.to_html(:encoding => 'UTF-8')
18
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
19
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
20
+ end
21
+
8
22
  def test_default_to_encoding_from_string
9
23
  bad_charset = <<-eohtml
10
24
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -24,6 +38,23 @@ module Nokogiri
24
38
  assert_equal bad_charset.encoding.name, doc.encoding
25
39
  end
26
40
 
41
+ def test_encoding_non_utf8
42
+ orig = '日本語が上手です'
43
+ bin = Encoding::ASCII_8BIT
44
+ [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
45
+ html = <<-eohtml.encode(enc)
46
+ <html>
47
+ <meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
48
+ <title xml:lang="ja">#{orig}</title></html>
49
+ eohtml
50
+ text = Nokogiri::HTML.parse(html).at('title').inner_text
51
+ assert_equal(
52
+ orig.encode(enc).force_encoding(bin),
53
+ text.encode(enc).force_encoding(bin)
54
+ )
55
+ end
56
+ end
57
+
27
58
  def test_encoding_with_a_bad_name
28
59
  bad_charset = <<-eohtml
29
60
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -42,5 +73,51 @@ module Nokogiri
42
73
  end
43
74
  end
44
75
  end
76
+
77
+ class TestDocumentEncodingDetection < Nokogiri::TestCase
78
+ if IO.respond_to?(:binread)
79
+ def binread(file)
80
+ IO.binread(file)
81
+ end
82
+ else
83
+ def binread(file)
84
+ IO.read(file)
85
+ end
86
+ end
87
+
88
+ def binopen(file)
89
+ File.open(file, 'rb')
90
+ end
91
+
92
+ def test_document_xhtml_enc
93
+ [ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
94
+ doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
95
+ ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map { |text| text.text }
96
+
97
+ doc_from_string = Nokogiri::HTML(binread(file))
98
+ ary_from_string = doc_from_string.xpath('//p/text()').map { |text| text.text }
99
+
100
+ doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
101
+ ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map { |text| text.text }
102
+
103
+ doc_from_file = Nokogiri::HTML(binopen(file))
104
+ ary_from_file = doc_from_file.xpath('//p/text()').map { |text| text.text }
105
+
106
+ title = 'たこ焼き仮面'
107
+
108
+ assert_equal(title, doc_from_string_enc.at('//title/text()').text)
109
+ assert_equal(title, doc_from_string.at('//title/text()').text)
110
+ assert_equal(title, doc_from_file_enc.at('//title/text()').text)
111
+ assert_equal(title, doc_from_file.at('//title/text()').text)
112
+
113
+ evil = (0..72).map { |i| '超' * i + '悪い事を構想中。' }
114
+
115
+ assert_equal(evil, ary_from_string_enc)
116
+ assert_equal(evil, ary_from_string)
117
+ assert_equal(evil, ary_from_file_enc)
118
+ assert_equal(evil, ary_from_file)
119
+ }
120
+ end
121
+ end
45
122
  end
46
123
  end
@@ -0,0 +1,253 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "helper"
3
+
4
+ module Nokogiri
5
+ module HTML
6
+ class TestDocumentFragment < Nokogiri::TestCase
7
+ def setup
8
+ super
9
+ @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
10
+ end
11
+
12
+ if RUBY_VERSION >= '1.9'
13
+ def test_inspect_encoding
14
+ fragment = "<div>こんにちは!</div>".encode('EUC-JP')
15
+ f = Nokogiri::HTML::DocumentFragment.parse fragment
16
+ assert_equal "こんにちは!", f.content
17
+ end
18
+
19
+ def test_html_parse_encoding
20
+ fragment = "<div>こんにちは!</div>".encode 'EUC-JP'
21
+ f = Nokogiri::HTML.fragment fragment
22
+ assert_equal 'EUC-JP', f.document.encoding
23
+ assert_equal "こんにちは!", f.content
24
+ end
25
+ end
26
+
27
+ def test_parse_encoding
28
+ fragment = "<div>hello world</div>"
29
+ f = Nokogiri::HTML::DocumentFragment.parse fragment, 'ISO-8859-1'
30
+ assert_equal 'ISO-8859-1', f.document.encoding
31
+ assert_equal "hello world", f.content
32
+ end
33
+
34
+ def test_html_parse_with_encoding
35
+ fragment = "<div>hello world</div>"
36
+ f = Nokogiri::HTML.fragment fragment, 'ISO-8859-1'
37
+ assert_equal 'ISO-8859-1', f.document.encoding
38
+ assert_equal "hello world", f.content
39
+ end
40
+
41
+ def test_parse_in_context
42
+ assert_equal('<br>', @html.root.parse('<br />').to_s)
43
+ end
44
+
45
+ def test_inner_html=
46
+ fragment = Nokogiri::HTML.fragment '<hr />'
47
+
48
+ fragment.inner_html = "hello"
49
+ assert_equal 'hello', fragment.inner_html
50
+ end
51
+
52
+ def test_ancestors_search
53
+ html = %q{
54
+ <div>
55
+ <ul>
56
+ <li>foo</li>
57
+ </ul>
58
+ </div>
59
+ }
60
+ fragment = Nokogiri::HTML.fragment html
61
+ li = fragment.at('li')
62
+ assert li.matches?('li')
63
+ end
64
+
65
+ def test_fun_encoding
66
+ string = %Q(<body>こんにちは</body>)
67
+ html = Nokogiri::HTML::DocumentFragment.parse(
68
+ string
69
+ ).to_html(:encoding => 'UTF-8')
70
+ assert_equal string, html
71
+ end
72
+
73
+ def test_new
74
+ assert Nokogiri::HTML::DocumentFragment.new(@html)
75
+ end
76
+
77
+ def test_body_fragment_should_contain_body
78
+ fragment = Nokogiri::HTML::DocumentFragment.parse(" <body><div>foo</div></body>")
79
+ assert_match(/^<body>/, fragment.to_s)
80
+ end
81
+
82
+ def test_nonbody_fragment_should_not_contain_body
83
+ fragment = Nokogiri::HTML::DocumentFragment.parse("<div>foo</div>")
84
+ assert_match(/^<div>/, fragment.to_s)
85
+ end
86
+
87
+ def test_fragment_should_have_document
88
+ fragment = Nokogiri::HTML::DocumentFragment.new(@html)
89
+ assert_equal @html, fragment.document
90
+ end
91
+
92
+ def test_empty_fragment_should_be_searchable_by_css
93
+ fragment = Nokogiri::HTML.fragment("")
94
+ assert_equal 0, fragment.css("a").size
95
+ end
96
+
97
+ def test_empty_fragment_should_be_searchable
98
+ fragment = Nokogiri::HTML.fragment("")
99
+ assert_equal 0, fragment.search("//a").size
100
+ end
101
+
102
+ def test_name
103
+ fragment = Nokogiri::HTML::DocumentFragment.new(@html)
104
+ assert_equal '#document-fragment', fragment.name
105
+ end
106
+
107
+ def test_static_method
108
+ fragment = Nokogiri::HTML::DocumentFragment.parse("<div>a</div>")
109
+ assert_instance_of Nokogiri::HTML::DocumentFragment, fragment
110
+ end
111
+
112
+ def test_many_fragments
113
+ 100.times { Nokogiri::HTML::DocumentFragment.new(@html) }
114
+ end
115
+
116
+ def test_subclass
117
+ klass = Class.new(Nokogiri::HTML::DocumentFragment)
118
+ fragment = klass.new(@html, "<div>a</div>")
119
+ assert_instance_of klass, fragment
120
+ end
121
+
122
+ def test_subclass_parse
123
+ klass = Class.new(Nokogiri::HTML::DocumentFragment)
124
+ doc = klass.parse("<div>a</div>")
125
+ assert_instance_of klass, doc
126
+ end
127
+
128
+ def test_html_fragment
129
+ fragment = Nokogiri::HTML.fragment("<div>a</div>")
130
+ assert_equal "<div>a</div>", fragment.to_s
131
+ end
132
+
133
+ def test_html_fragment_has_outer_text
134
+ doc = "a<div>b</div>c"
135
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
136
+ if Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16"
137
+ assert_equal "a<div>b</div><p>c</p>", fragment.to_s
138
+ else
139
+ assert_equal "a<div>b</div>c", fragment.to_s
140
+ end
141
+ end
142
+
143
+ def test_html_fragment_case_insensitivity
144
+ doc = "<Div>b</Div>"
145
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
146
+ assert_equal "<div>b</div>", fragment.to_s
147
+ end
148
+
149
+ def test_html_fragment_with_leading_whitespace
150
+ doc = " <div>b</div> "
151
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
152
+ assert_match %r% <div>b</div> *%, fragment.to_s
153
+ end
154
+
155
+ def test_html_fragment_with_leading_whitespace_and_newline
156
+ doc = " \n<div>b</div> "
157
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
158
+ assert_match %r% \n<div>b</div> *%, fragment.to_s
159
+ end
160
+
161
+ def test_html_fragment_with_leading_text_and_newline
162
+ fragment = HTML::Document.new.fragment("First line\nSecond line<br>Broken line")
163
+ assert_equal fragment.to_s, "First line\nSecond line<br>Broken line"
164
+ end
165
+
166
+ def test_html_fragment_with_leading_whitespace_and_text_and_newline
167
+ fragment = HTML::Document.new.fragment(" First line\nSecond line<br>Broken line")
168
+ assert_equal " First line\nSecond line<br>Broken line", fragment.to_s
169
+ end
170
+
171
+ def test_html_fragment_with_leading_entity
172
+ failed = "&quot;test<br/>test&quot;"
173
+ fragment = Nokogiri::HTML::DocumentFragment.parse(failed)
174
+ assert_equal '"test<br>test"', fragment.to_html
175
+ end
176
+
177
+ def test_to_s
178
+ doc = "<span>foo<br></span><span>bar</span>"
179
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
180
+ assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_s
181
+ end
182
+
183
+ def test_to_html
184
+ doc = "<span>foo<br></span><span>bar</span>"
185
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
186
+ assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_html
187
+ end
188
+
189
+ def test_to_xhtml
190
+ doc = "<span>foo<br></span><span>bar</span>"
191
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
192
+ if Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
193
+ assert_equal "<span>foo<br /></span><span>bar</span>", fragment.to_xhtml
194
+ else
195
+ assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_xhtml
196
+ end
197
+ end
198
+
199
+ def test_to_xml
200
+ doc = "<span>foo<br></span><span>bar</span>"
201
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
202
+ assert_equal "<span>foo<br/></span><span>bar</span>", fragment.to_xml
203
+ end
204
+
205
+ def test_fragment_script_tag_with_cdata
206
+ doc = HTML::Document.new
207
+ fragment = doc.fragment("<script>var foo = 'bar';</script>")
208
+ assert_equal("<script>var foo = 'bar';</script>",
209
+ fragment.to_s)
210
+ end
211
+
212
+ def test_fragment_with_comment
213
+ doc = HTML::Document.new
214
+ fragment = doc.fragment("<p>hello<!-- your ad here --></p>")
215
+ assert_equal("<p>hello<!-- your ad here --></p>",
216
+ fragment.to_s)
217
+ end
218
+
219
+ def test_malformed_fragment_is_corrected
220
+ fragment = HTML::DocumentFragment.parse("<div </div>")
221
+ assert_equal "<div></div>", fragment.to_s
222
+ end
223
+
224
+ def test_unclosed_script_tag
225
+ # see GH#315
226
+ fragment = HTML::DocumentFragment.parse("foo <script>bar")
227
+ assert_equal "foo <script>bar</script>", fragment.to_html
228
+ end
229
+
230
+ def test_error_propagation_on_fragment_parse
231
+ frag = Nokogiri::HTML::DocumentFragment.parse "<hello>oh, hello there.</hello>"
232
+ assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be copied to the fragment"
233
+ end
234
+
235
+ def test_error_propagation_on_fragment_parse_in_node_context
236
+ doc = Nokogiri::HTML::Document.parse "<html><body><div></div></body></html>"
237
+ context_node = doc.at_css "div"
238
+ frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
239
+ assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
240
+ end
241
+
242
+ def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors
243
+ doc = Nokogiri::HTML::Document.parse "<html><body><div></div><jimmy></jimmy></body></html>"
244
+ assert doc.errors.any?{|err| err.to_s =~ /jimmy/}, "assert on setup"
245
+
246
+ context_node = doc.at_css "div"
247
+ frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
248
+ assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
249
+ assert frag.errors.none?{|err| err.to_s =~ /jimmy/}, "errors should not include pre-existing document errors"
250
+ end
251
+ end
252
+ end
253
+ end
@@ -0,0 +1,98 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestElementDescription < Nokogiri::TestCase
6
+ def test_fetch_nonexistent
7
+ assert_nil ElementDescription['foo']
8
+ end
9
+
10
+ def test_fetch_element_description
11
+ assert desc = ElementDescription['a']
12
+ assert_instance_of ElementDescription, desc
13
+ end
14
+
15
+ def test_name
16
+ assert_equal 'a', ElementDescription['a'].name
17
+ end
18
+
19
+ def test_implied_start_tag?
20
+ assert !ElementDescription['a'].implied_start_tag?
21
+ end
22
+
23
+ def test_implied_end_tag?
24
+ assert !ElementDescription['a'].implied_end_tag?
25
+ assert ElementDescription['p'].implied_end_tag?
26
+ end
27
+
28
+ def test_save_end_tag?
29
+ assert !ElementDescription['a'].save_end_tag?
30
+ assert ElementDescription['br'].save_end_tag?
31
+ end
32
+
33
+ def test_empty?
34
+ assert ElementDescription['br'].empty?
35
+ assert !ElementDescription['a'].empty?
36
+ end
37
+
38
+ def test_deprecated?
39
+ assert ElementDescription['applet'].deprecated?
40
+ assert !ElementDescription['br'].deprecated?
41
+ end
42
+
43
+ def test_inline?
44
+ assert ElementDescription['a'].inline?
45
+ assert !ElementDescription['div'].inline?
46
+ end
47
+
48
+ def test_block?
49
+ element = ElementDescription['a']
50
+ assert_equal(!element.inline?, element.block?)
51
+ end
52
+
53
+ def test_description
54
+ assert ElementDescription['a'].description
55
+ end
56
+
57
+ def test_subelements
58
+ sub_elements = ElementDescription['body'].sub_elements
59
+ if Nokogiri::LIBXML_VERSION >= '2.7.7'
60
+ assert_equal 65, sub_elements.length
61
+ else
62
+ assert_equal 61, sub_elements.length
63
+ end
64
+ end
65
+
66
+ def test_default_sub_element
67
+ assert_equal 'div', ElementDescription['body'].default_sub_element
68
+ end
69
+
70
+ def test_optional_attributes
71
+ attrs = ElementDescription['table'].optional_attributes
72
+ assert attrs
73
+ end
74
+
75
+ def test_deprecated_attributes
76
+ attrs = ElementDescription['table'].deprecated_attributes
77
+ assert attrs
78
+ assert_equal 2, attrs.length
79
+ end
80
+
81
+ def test_required_attributes
82
+ attrs = ElementDescription['table'].required_attributes
83
+ assert attrs
84
+ assert_equal 0, attrs.length
85
+ end
86
+
87
+ def test_inspect
88
+ desc = ElementDescription['input']
89
+ assert_match desc.name, desc.inspect
90
+ end
91
+
92
+ def test_to_s
93
+ desc = ElementDescription['input']
94
+ assert_match desc.name, desc.to_s
95
+ end
96
+ end
97
+ end
98
+ end