docdiff 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +7 -0
  3. data/Gemfile +17 -0
  4. data/Guardfile +8 -0
  5. data/Makefile +108 -0
  6. data/Rakefile +17 -0
  7. data/bin/docdiff +179 -0
  8. data/devutil/JIS0208.TXT +6952 -0
  9. data/devutil/char_by_charclass.rb +23 -0
  10. data/devutil/charclass_by_char.rb +21 -0
  11. data/devutil/jis0208.rb +343 -0
  12. data/devutil/testjis0208.rb +38 -0
  13. data/docdiff.conf.example +22 -0
  14. data/docdiff.gemspec +23 -0
  15. data/docdiffwebui.cgi +176 -0
  16. data/docdiffwebui.html +123 -0
  17. data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
  18. data/img/docdiff-screenshot-format-html-firefox.png +0 -0
  19. data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
  20. data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
  21. data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
  22. data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
  23. data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
  24. data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
  25. data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
  26. data/index.html +181 -0
  27. data/langfilter.rb +14 -0
  28. data/lib/doc_diff.rb +170 -0
  29. data/lib/docdiff.rb +7 -0
  30. data/lib/docdiff/charstring.rb +579 -0
  31. data/lib/docdiff/diff.rb +217 -0
  32. data/lib/docdiff/diff/contours.rb +382 -0
  33. data/lib/docdiff/diff/editscript.rb +148 -0
  34. data/lib/docdiff/diff/rcsdiff.rb +107 -0
  35. data/lib/docdiff/diff/shortestpath.rb +93 -0
  36. data/lib/docdiff/diff/speculative.rb +40 -0
  37. data/lib/docdiff/diff/subsequence.rb +39 -0
  38. data/lib/docdiff/diff/unidiff.rb +124 -0
  39. data/lib/docdiff/difference.rb +92 -0
  40. data/lib/docdiff/document.rb +127 -0
  41. data/lib/docdiff/encoding/en_ascii.rb +97 -0
  42. data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
  43. data/lib/docdiff/encoding/ja_sjis.rb +260 -0
  44. data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
  45. data/lib/docdiff/version.rb +3 -0
  46. data/lib/docdiff/view.rb +476 -0
  47. data/lib/viewdiff.rb +375 -0
  48. data/readme.html +713 -0
  49. data/sample/01.en.ascii.cr +1 -0
  50. data/sample/01.en.ascii.crlf +2 -0
  51. data/sample/01.en.ascii.lf +2 -0
  52. data/sample/01.ja.eucjp.lf +2 -0
  53. data/sample/01.ja.sjis.cr +1 -0
  54. data/sample/01.ja.sjis.crlf +2 -0
  55. data/sample/01.ja.utf8.crlf +2 -0
  56. data/sample/02.en.ascii.cr +1 -0
  57. data/sample/02.en.ascii.crlf +2 -0
  58. data/sample/02.en.ascii.lf +2 -0
  59. data/sample/02.ja.eucjp.lf +2 -0
  60. data/sample/02.ja.sjis.cr +1 -0
  61. data/sample/02.ja.sjis.crlf +2 -0
  62. data/sample/02.ja.utf8.crlf +2 -0
  63. data/sample/humpty_dumpty01.ascii.lf +4 -0
  64. data/sample/humpty_dumpty02.ascii.lf +4 -0
  65. data/test/charstring_test.rb +1008 -0
  66. data/test/diff_test.rb +36 -0
  67. data/test/difference_test.rb +64 -0
  68. data/test/docdiff_test.rb +193 -0
  69. data/test/document_test.rb +626 -0
  70. data/test/test_helper.rb +7 -0
  71. data/test/view_test.rb +570 -0
  72. data/test/viewdiff_test.rb +908 -0
  73. metadata +129 -0
@@ -0,0 +1 @@
1
+ Hello, my name is Watanabe.
@@ -0,0 +1,2 @@
1
+ Hello, my name is Watanabe.
2
+ I am just another Ruby porter.
@@ -0,0 +1,2 @@
1
+ Hello, my name is Watanabe.
2
+ I am just another Ruby porter.
@@ -0,0 +1,2 @@
1
+ ����ˤ��ϡ����̾���Ϥ錄�ʤ٤Ǥ���
2
+ ���Just Another Ruby Porter�Ǥ���
@@ -0,0 +1 @@
1
+ ����ɂ��́A���̖��O�͂킽�Ȃׂł��B
@@ -0,0 +1,2 @@
1
+ ����ɂ��́A���̖��O�͂킽�Ȃׂł��B
2
+ ����Just Another Ruby Porter�ł��B
@@ -0,0 +1,2 @@
1
+ こんにちは、私の名前はわたなべです。
2
+ 私はJust Another Ruby Porterです。
@@ -0,0 +1 @@
1
+ Hello, my name is matz.
@@ -0,0 +1,2 @@
1
+ Hello, my name is matz.
2
+ It's me who has created Ruby. I am a Ruby hacker.
@@ -0,0 +1,2 @@
1
+ Hello, my name is matz.
2
+ It's me who has created Ruby. I am a Ruby hacker.
@@ -0,0 +1,2 @@
1
+ ����Ф�ϡ����̾���ϤޤĤ�ȤǤ���
2
+ Ruby���ä��Τϻ�Ǥ������Ruby Hacker�Ǥ���
@@ -0,0 +1 @@
1
+ ����΂�́A���̖��O�͂܂‚��Ƃł��B
@@ -0,0 +1,2 @@
1
+ ����΂�́A���̖��O�͂܂‚��Ƃł��B
2
+ Ruby��������͎̂��ł��B����Ruby Hacker�ł��B
@@ -0,0 +1,2 @@
1
+ こんばんは、私の名前はまつもとです。
2
+ Rubyを作ったのは私です。私はRuby Hackerです。
@@ -0,0 +1,4 @@
1
+ Humpty Dumpty sat on a wall.
2
+ Humpty Dumpty had a great fall.
3
+ All the king's horses and all the king's men
4
+ Couldn't put Humpty together again!
@@ -0,0 +1,4 @@
1
+ Humpty Dumpty was sitting on a Humpty Dumpty wall.
2
+ Humpty had a great fall by mistake.
3
+ All the king's men and all the king's horses
4
+ Couldn't put Humpty Dumpty together!
@@ -0,0 +1,1008 @@
1
+ #!/usr/bin/ruby
2
+ # -*- coding: euc-jp; -*-
3
+ require 'test/unit'
4
+ require 'docdiff/charstring'
5
+ require 'nkf'
6
+
7
+ class TC_CharString < Test::Unit::TestCase
8
+
9
+ def setup()
10
+ #
11
+ end
12
+
13
+ # test encoding module registration
14
+ def test_encoding_ascii()
15
+ str = "foo".extend CharString
16
+ str.encoding = "US-ASCII"
17
+ expected = CharString::ASCII
18
+ assert_equal(expected, CharString::Encodings[str.encoding])
19
+ end
20
+ def test_encoding_eucjp()
21
+ str = "foo".extend CharString
22
+ str.encoding = "EUC-JP"
23
+ expected = CharString::EUC_JP
24
+ assert_equal(expected, CharString::Encodings[str.encoding])
25
+ end
26
+ def test_encoding_sjis()
27
+ str = "foo".extend CharString
28
+ str.encoding = "Shift_JIS"
29
+ expected = CharString::Shift_JIS
30
+ assert_equal(expected, CharString::Encodings[str.encoding])
31
+ end
32
+ def test_encoding_utf8()
33
+ str = "foo".extend CharString
34
+ str.encoding = "UTF-8"
35
+ expected = CharString::UTF8
36
+ assert_equal(expected, CharString::Encodings[str.encoding])
37
+ end
38
+
39
+ # test eol module registration
40
+ def test_eol_cr()
41
+ str = "foo".extend CharString
42
+ str.eol = "CR"
43
+ expected = CharString::CR
44
+ assert_equal(expected, CharString::EOLChars[str.eol])
45
+ end
46
+ def test_eol_lf()
47
+ str = "foo".extend CharString
48
+ str.eol = "LF"
49
+ expected = CharString::LF
50
+ assert_equal(expected, CharString::EOLChars[str.eol])
51
+ end
52
+ def test_eol_crlf()
53
+ str = "foo".extend CharString
54
+ str.eol = "CRLF"
55
+ expected = CharString::CRLF
56
+ assert_equal(expected, CharString::EOLChars[str.eol])
57
+ end
58
+
59
+ # test eol eol_char method
60
+ def test_eol_char_cr()
61
+ str = "foo\rbar\r".extend CharString
62
+ str.eol = "CR"
63
+ expected = "\r"
64
+ assert_equal(expected, str.eol_char)
65
+ end
66
+ def test_eol_char_lf()
67
+ str = "foo\nbar\n".extend CharString
68
+ str.eol = "LF"
69
+ expected = "\n"
70
+ assert_equal(expected, str.eol_char)
71
+ end
72
+ def test_eol_char_crlf()
73
+ str = "foo\r\nbar\r\n".extend CharString
74
+ str.eol = "CRLF"
75
+ expected = "\r\n"
76
+ assert_equal(expected, str.eol_char)
77
+ end
78
+ def test_eol_char_none()
79
+ str = "foobar".extend CharString
80
+ expected = nil
81
+ assert_equal(expected, str.eol_char)
82
+ end
83
+ def test_eol_char_none_for_0length_string()
84
+ str = "".extend CharString
85
+ expected = nil
86
+ assert_equal(expected, str.eol_char)
87
+ end
88
+ def test_eol_char_none_eucjp()
89
+ str = NKF.nkf("-e", "���ܸ�a b").extend CharString
90
+ expected = nil
91
+ assert_equal(expected, str.eol_char)
92
+ end
93
+ def test_eol_char_none_sjis()
94
+ str = NKF.nkf("-s", "���ܸ�a b").extend CharString
95
+ expected = nil
96
+ assert_equal(expected, str.eol_char)
97
+ end
98
+
99
+ # test eol split_to_line() method
100
+ def test_cr_split_to_line()
101
+ str = "foo\rbar\r".extend CharString
102
+ encoding, eol = "US-ASCII", "CR"
103
+ str.encoding, str.eol = encoding, eol
104
+ expected = ["foo\r", "bar\r"]
105
+ assert_equal(expected, str.split_to_line)
106
+ end
107
+ def test_cr_split_to_line_chomped_lastline()
108
+ str = "foo\rbar".extend CharString
109
+ str.encoding = "US-ASCII"
110
+ str.eol = "CR"
111
+ expected = ["foo\r", "bar"]
112
+ assert_equal(expected, str.split_to_line)
113
+ end
114
+ def test_cr_split_to_line_empty_line()
115
+ str = "foo\r\rbar\r".extend CharString
116
+ str.encoding = "US-ASCII"
117
+ str.eol = "CR"
118
+ expected = ["foo\r", "\r", "bar\r"]
119
+ assert_equal(expected, str.split_to_line)
120
+ end
121
+ def test_lf_split_to_line()
122
+ str = "foo\nbar\n".extend CharString
123
+ str.encoding = "US-ASCII"
124
+ str.eol = "LF"
125
+ expected = ["foo\n", "bar\n"]
126
+ assert_equal(expected, str.split_to_line)
127
+ end
128
+ def test_lf_split_to_line_chomped_lastline()
129
+ str = "foo\nbar".extend CharString
130
+ str.encoding = "US-ASCII"
131
+ str.eol = "LF"
132
+ expected = ["foo\n", "bar"]
133
+ assert_equal(expected, str.split_to_line)
134
+ end
135
+ def test_lf_split_to_line_empty_line()
136
+ str = "foo\n\nbar\n".extend CharString
137
+ str.encoding = "US-ASCII"
138
+ str.eol = "LF"
139
+ expected = ["foo\n", "\n", "bar\n"]
140
+ assert_equal(expected, str.split_to_line)
141
+ end
142
+ def test_crlf_split_to_line()
143
+ str = "foo\r\nbar\r\n".extend CharString
144
+ str.encoding = "US-ASCII"
145
+ str.eol = "CRLF"
146
+ expected = ["foo\r\n", "bar\r\n"]
147
+ assert_equal(expected, str.split_to_line)
148
+ end
149
+ def test_crlf_split_to_line_chomped_lastline()
150
+ str = "foo\r\nbar".extend CharString
151
+ str.encoding = "US-ASCII"
152
+ str.eol = "CRLF"
153
+ expected = ["foo\r\n", "bar"]
154
+ assert_equal(expected, str.split_to_line)
155
+ end
156
+ def test_crlf_split_to_line_empty_line()
157
+ str = "foo\r\n\r\nbar\r\n".extend CharString
158
+ str.encoding = "US-ASCII"
159
+ str.eol = "CRLF"
160
+ expected = ["foo\r\n", "\r\n", "bar\r\n"]
161
+ assert_equal(expected, str.split_to_line)
162
+ end
163
+
164
+ # test ASCII module
165
+ def test_ascii_split_to_word()
166
+ str = "foo bar".extend CharString
167
+ str.encoding = "US-ASCII"
168
+ expected = ["foo ", "bar"]
169
+ assert_equal(expected, str.split_to_word)
170
+ end
171
+ def test_ascii_split_to_word_withsymbol()
172
+ str = "foo (bar) baz-baz".extend CharString
173
+ str.encoding = "US-ASCII"
174
+ expected = ["foo ", "(bar) ", "baz-baz"]
175
+ assert_equal(expected, str.split_to_word)
176
+ end
177
+ def test_ascii_split_to_word_withquote()
178
+ str = "foo's 'foo' \"bar\" 'baz.'".extend CharString
179
+ str.encoding = "US-ASCII"
180
+ expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
181
+ assert_equal(expected, str.split_to_word)
182
+ end
183
+ def test_ascii_split_to_word_withlongspace()
184
+ str = " foo bar".extend CharString
185
+ str.encoding = "US-ASCII"
186
+ expected = [" ", "foo ", " ", "bar"]
187
+ assert_equal(expected, str.split_to_word)
188
+ end
189
+ def test_ascii_split_to_word_withdash()
190
+ str = "foo -- bar, baz - quux".extend CharString
191
+ str.encoding = "US-ASCII"
192
+ expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
193
+ assert_equal(expected, str.split_to_word)
194
+ end
195
+ def test_ascii_split_to_char()
196
+ str = "foo bar".extend CharString
197
+ str.encoding = "US-ASCII"
198
+ str.eol = "LF"
199
+ expected = ["f","o","o"," ","b","a","r"]
200
+ assert_equal(expected, str.split_to_char)
201
+ end
202
+ def test_ascii_split_to_char_with_eol_cr()
203
+ str = "foo bar\r".extend CharString
204
+ str.encoding = "US-ASCII"
205
+ str.eol = "CR"
206
+ expected = ["f","o","o"," ","b","a","r","\r"]
207
+ assert_equal(expected, str.split_to_char)
208
+ end
209
+ def test_ascii_split_to_char_with_eol_lf()
210
+ str = "foo bar\n".extend CharString
211
+ str.encoding = "US-ASCII"
212
+ str.eol = "LF"
213
+ expected = ["f","o","o"," ","b","a","r","\n"]
214
+ assert_equal(expected, str.split_to_char)
215
+ end
216
+ def test_ascii_split_to_char_with_eol_crlf()
217
+ str = "foo bar\r\n".extend CharString
218
+ str.encoding = "US-ASCII"
219
+ str.eol = "CRLF"
220
+ expected = ["f","o","o"," ","b","a","r","\r\n"]
221
+ assert_equal(expected, str.split_to_char)
222
+ end
223
+ def test_ascii_split_to_byte()
224
+ str = "foo bar\r\n".extend CharString
225
+ str.encoding = "US-ASCII"
226
+ str.eol = "CRLF"
227
+ expected = ["f","o","o"," ","b","a","r","\r","\n"]
228
+ assert_equal(expected, str.split_to_byte)
229
+ end
230
+ def test_ascii_count_byte()
231
+ str = "foo bar\r\n".extend CharString
232
+ str.encoding = "US-ASCII"
233
+ str.eol = "CRLF"
234
+ expected = 9
235
+ assert_equal(expected, str.count_byte)
236
+ end
237
+ def test_ascii_count_char()
238
+ str = "foo bar\r\nbaz quux\r\n".extend CharString
239
+ str.encoding = "US-ASCII"
240
+ str.eol = "CRLF"
241
+ expected = 17
242
+ assert_equal(expected, str.count_char)
243
+ end
244
+ def test_ascii_count_latin_graph_char()
245
+ str = "foo bar\r\nbaz quux\r\n".extend CharString
246
+ str.encoding = "US-ASCII"
247
+ str.eol = "CRLF"
248
+ expected = 13
249
+ assert_equal(expected, str.count_latin_graph_char)
250
+ end
251
+ def test_ascii_count_graph_char()
252
+ str = "foo bar\r\nbaz quux\r\n".extend CharString
253
+ str.encoding = "US-ASCII"
254
+ str.eol = "CRLF"
255
+ expected = 13
256
+ assert_equal(expected, str.count_graph_char)
257
+ end
258
+ def test_ascii_count_latin_blank_char()
259
+ str = "foo bar\r\nbaz\tquux\r\n".extend CharString
260
+ str.encoding = "US-ASCII"
261
+ str.eol = "CRLF"
262
+ expected = 2
263
+ assert_equal(expected, str.count_latin_blank_char)
264
+ end
265
+ def test_ascii_count_blank_char()
266
+ str = "foo bar\r\nbaz\tquux\r\n".extend CharString
267
+ str.encoding = "US-ASCII"
268
+ str.eol = "CRLF"
269
+ expected = 2
270
+ assert_equal(expected, str.count_blank_char)
271
+ end
272
+ def test_ascii_count_word()
273
+ str = "foo bar \r\nbaz quux\r\n".extend CharString
274
+ str.encoding = "US-ASCII"
275
+ str.eol = "CRLF"
276
+ expected = 6
277
+ assert_equal(expected, str.count_word)
278
+ end
279
+ def test_ascii_count_latin_word()
280
+ str = "foo bar \r\nbaz quux\r\n".extend CharString
281
+ str.encoding = "US-ASCII"
282
+ str.eol = "CRLF"
283
+ expected = 5 # " " is also counted as a word
284
+ assert_equal(expected, str.count_latin_word)
285
+ end
286
+ def test_ascii_count_latin_valid_word()
287
+ str = "1 foo \r\n%%% ()\r\n".extend CharString
288
+ str.encoding = "US-ASCII"
289
+ str.eol = "CRLF"
290
+ expected = 2
291
+ assert_equal(expected, str.count_latin_valid_word)
292
+ end
293
+ def test_ascii_count_line()
294
+ str = "foo\r\nbar".extend CharString
295
+ str.encoding = "US-ASCII"
296
+ str.eol = "CRLF"
297
+ expected = 2
298
+ assert_equal(expected, str.count_line)
299
+ end
300
+ def test_ascii_count_graph_line()
301
+ str = "foo\r\n ".extend CharString
302
+ str.encoding = "US-ASCII"
303
+ str.eol = "CRLF"
304
+ expected = 1
305
+ assert_equal(expected, str.count_graph_line)
306
+ end
307
+ def test_ascii_count_empty_line()
308
+ str = "foo\r\n \r\n\t\r\n\r\n".extend CharString
309
+ str.encoding = "US-ASCII"
310
+ str.eol = "CRLF"
311
+ expected = 1
312
+ assert_equal(expected, str.count_empty_line)
313
+ end
314
+ def test_ascii_count_blank_line()
315
+ str = "\r\n \r\n\t\r\n ".extend CharString
316
+ str.encoding = "US-ASCII"
317
+ str.eol = "CRLF"
318
+ expected = 3
319
+ assert_equal(expected, str.count_blank_line)
320
+ end
321
+
322
+ # test EUCJP module
323
+ def test_eucjp_split_to_word()
324
+ str = NKF.nkf("-e", "���ܸ��ʸ��foo bar").extend CharString
325
+ str.encoding = "EUC-JP"
326
+ expected = ["���ܸ��","ʸ��","foo ","bar"].collect{|c| NKF.nkf("-e", c)}
327
+ assert_equal(expected, str.split_to_word)
328
+ end
329
+ def test_eucjp_split_to_word_kanhira()
330
+ str = NKF.nkf("-e", "���ܸ��ʸ��").extend CharString
331
+ str.encoding = "EUC-JP"
332
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
333
+ assert_equal(expected, str.split_to_word)
334
+ end
335
+ def test_eucjp_split_to_word_katahira()
336
+ str = NKF.nkf("-e", "�������ʤ�ʸ��").extend CharString
337
+ str.encoding = "EUC-JP"
338
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
339
+ assert_equal(expected, str.split_to_word)
340
+ end
341
+ def test_eucjp_split_to_word_kataonbiki()
342
+ str = NKF.nkf("-e", "��ӡ�������").extend CharString
343
+ str.encoding = "EUC-JP" #<= needed to pass the test
344
+ expected = ["��ӡ�", "����", "��"].collect{|c| NKF.nkf("-e", c)}
345
+ assert_equal(expected, str.split_to_word)
346
+ end
347
+ def test_eucjp_split_to_word_hiraonbiki()
348
+ str = NKF.nkf("-e", "���ӡ���").extend CharString
349
+ str.encoding = "EUC-JP" #<= needed to pass the test
350
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-e", c)}
351
+ assert_equal(expected, str.split_to_word)
352
+ end
353
+ def test_eucjp_split_to_word_latinmix()
354
+ str = NKF.nkf("-e", "���ܸ��Latin��ʸ��").extend CharString
355
+ str.encoding = "EUC-JP"
356
+ expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
357
+ assert_equal(expected, str.split_to_word)
358
+ end
359
+ def test_eucjp_split_to_char()
360
+ str = NKF.nkf("-e", "���ܸ�a b").extend CharString
361
+ str.encoding = "EUC-JP"
362
+ str.eol = "LF" #<= needed to pass the test
363
+ expected = ["��","��","��","a"," ","b"].collect{|c|NKF.nkf("-e",c)}
364
+ assert_equal(expected, str.split_to_char)
365
+ end
366
+ def test_eucjp_split_to_char_with_cr()
367
+ str = NKF.nkf("-e", "���ܸ�a b\r").extend CharString
368
+ str.encoding = "EUC-JP"
369
+ str.eol = "CR"
370
+ expected = ["��","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-e",c)}
371
+ assert_equal(expected, str.split_to_char)
372
+ end
373
+ def test_eucjp_split_to_char_with_lf()
374
+ str = NKF.nkf("-e", "���ܸ�a b\n").extend CharString
375
+ str.encoding = "EUC-JP"
376
+ str.eol = "LF"
377
+ expected = ["��","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-e",c)}
378
+ assert_equal(expected, str.split_to_char)
379
+ end
380
+ def test_eucjp_split_to_char_with_crlf()
381
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
382
+ str.encoding = "EUC-JP"
383
+ str.eol = "CRLF"
384
+ expected = ["��","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-e",c)}
385
+ assert_equal(expected, str.split_to_char)
386
+ end
387
+ def test_eucjp_count_char()
388
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
389
+ str.encoding = "EUC-JP"
390
+ str.eol = "CRLF"
391
+ expected = 7
392
+ assert_equal(expected, str.count_char)
393
+ end
394
+ def test_eucjp_count_latin_graph_char()
395
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
396
+ str.encoding = "EUC-JP"
397
+ str.eol = "CRLF"
398
+ expected = 2
399
+ assert_equal(expected, str.count_latin_graph_char)
400
+ end
401
+ def test_eucjp_count_ja_graph_char()
402
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
403
+ str.encoding = "EUC-JP"
404
+ str.eol = "CRLF"
405
+ expected = 3
406
+ assert_equal(expected, str.count_ja_graph_char)
407
+ end
408
+ def test_eucjp_count_graph_char()
409
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
410
+ str.encoding = "EUC-JP"
411
+ str.eol = "CRLF"
412
+ expected = 5
413
+ assert_equal(expected, str.count_graph_char)
414
+ end
415
+ def test_eucjp_count_latin_blank_char()
416
+ str = NKF.nkf("-e", "���ܸ�\ta b\r\n").extend CharString
417
+ str.encoding = "EUC-JP"
418
+ str.eol = "CRLF"
419
+ expected = 2
420
+ assert_equal(expected, str.count_latin_blank_char)
421
+ end
422
+ def test_eucjp_count_ja_blank_char()
423
+ str = NKF.nkf("-e", "���ܡ���\ta b\r\n").extend CharString
424
+ str.encoding = "EUC-JP"
425
+ str.eol = "CRLF"
426
+ expected = 1
427
+ assert_equal(expected, str.count_ja_blank_char)
428
+ end
429
+ def test_eucjp_count_blank_char()
430
+ str = NKF.nkf("-e", "���ܡ���\ta b\r\n").extend CharString
431
+ str.encoding = "EUC-JP"
432
+ str.eol = "CRLF"
433
+ expected = 3
434
+ assert_equal(expected, str.count_blank_char)
435
+ end
436
+ def test_eucjp_count_word()
437
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
438
+ str.encoding = "EUC-JP"
439
+ str.eol = "CRLF"
440
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
441
+ assert_equal(expected, str.count_word)
442
+ end
443
+ def test_eucjp_count_ja_word()
444
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
445
+ str.encoding = "EUC-JP"
446
+ str.eol = "CRLF"
447
+ expected = 3
448
+ assert_equal(expected, str.count_ja_word)
449
+ end
450
+ def test_eucjp_count_latin_valid_word()
451
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
452
+ str.encoding = "EUC-JP"
453
+ str.eol = "CRLF"
454
+ expected = 2
455
+ assert_equal(expected, str.count_latin_valid_word)
456
+ end
457
+ def test_eucjp_count_ja_valid_word()
458
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
459
+ str.encoding = "EUC-JP"
460
+ str.eol = "CRLF"
461
+ expected = 2
462
+ assert_equal(expected, str.count_ja_valid_word)
463
+ end
464
+ def test_eucjp_count_valid_word()
465
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
466
+ str.encoding = "EUC-JP"
467
+ str.eol = "CRLF"
468
+ expected = 4
469
+ assert_equal(expected, str.count_valid_word)
470
+ end
471
+ def test_eucjp_count_line()
472
+ str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
473
+ str.encoding = "EUC-JP"
474
+ str.eol = "CRLF"
475
+ expected = 6
476
+ assert_equal(expected, str.count_line)
477
+ end
478
+ def test_eucjp_count_graph_line()
479
+ str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
480
+ str.encoding = "EUC-JP"
481
+ str.eol = "CRLF"
482
+ expected = 3
483
+ assert_equal(expected, str.count_graph_line)
484
+ end
485
+ def test_eucjp_count_empty_line()
486
+ str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
487
+ str.encoding = "EUC-JP"
488
+ str.eol = "CRLF"
489
+ expected = 1
490
+ assert_equal(expected, str.count_empty_line)
491
+ end
492
+ def test_eucjp_count_blank_line()
493
+ str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
494
+ str.encoding = "EUC-JP"
495
+ str.eol = "CRLF"
496
+ expected = 2
497
+ assert_equal(expected, str.count_blank_line)
498
+ end
499
+
500
+ # test SJIS module
501
+ def test_sjis_split_to_word()
502
+ str = NKF.nkf("-s", "���ܸ��ʸ��foo bar").extend CharString
503
+ str.encoding = "Shift_JIS"
504
+ expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c|NKF.nkf("-s",c)}
505
+ assert_equal(expected, str.split_to_word)
506
+ end
507
+ def test_sjisplit_s_to_word_kanhira()
508
+ str = NKF.nkf("-s", "���ܸ��ʸ��").extend CharString
509
+ str.encoding = "Shift_JIS"
510
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
511
+ assert_equal(expected, str.split_to_word)
512
+ end
513
+ def test_sjis_split_to_word_katahira()
514
+ str = NKF.nkf("-s", "�������ʤ�ʸ��").extend CharString
515
+ str.encoding = "Shift_JIS"
516
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
517
+ assert_equal(expected, str.split_to_word)
518
+ end
519
+ def test_sjis_split_to_word_kataonbiki()
520
+ str = NKF.nkf("-s", "��ӡ��λ���").extend CharString
521
+ str.encoding = "Shift_JIS"
522
+ expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-s", c)}
523
+ assert_equal(expected, str.split_to_word)
524
+ end
525
+ def test_sjis_split_to_word_hiraonbiki()
526
+ str = NKF.nkf("-s", "���ӡ���").extend CharString
527
+ str.encoding = "Shift_JIS"
528
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-s", c)}
529
+ assert_equal(expected, str.split_to_word)
530
+ end
531
+ def test_sjis_split_to_word_latinmix()
532
+ str = NKF.nkf("-s", "���ܸ��Latin��ʸ��").extend CharString
533
+ str.encoding = "Shift_JIS"
534
+ expected = ["���ܸ��","Latin","��","ʸ��"].collect{|c| NKF.nkf("-s", c)}
535
+ assert_equal(expected, str.split_to_word)
536
+ end
537
+ def test_sjis_split_to_char()
538
+ str = NKF.nkf("-s", "ɽ�׻�a b").extend CharString
539
+ str.encoding = "Shift_JIS"
540
+ str.eol = "LF" #<= needed to pass the test
541
+ expected = ["ɽ","��","��","a"," ","b"].collect{|c|NKF.nkf("-s",c)}
542
+ assert_equal(expected, str.split_to_char)
543
+ end
544
+ def test_sjis_split_to_char_with_cr()
545
+ str = NKF.nkf("-s", "ɽ�׻�a b\r").extend CharString
546
+ str.encoding = "Shift_JIS"
547
+ str.eol = "CR"
548
+ expected = ["ɽ","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-s",c)}
549
+ assert_equal(expected, str.split_to_char)
550
+ end
551
+ def test_sjis_split_to_char_with_lf()
552
+ str = NKF.nkf("-s", "ɽ�׻�a b\n").extend CharString
553
+ str.encoding = "Shift_JIS"
554
+ str.eol = "LF"
555
+ expected = ["ɽ","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-s",c)}
556
+ assert_equal(expected, str.split_to_char)
557
+ end
558
+ def test_sjis_split_to_char_with_crlf()
559
+ str = NKF.nkf("-s", "ɽ�׻�a b\r\n").extend CharString
560
+ str.encoding = "Shift_JIS"
561
+ str.eol = "CRLF"
562
+ expected = ["ɽ","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-s",c)}
563
+ assert_equal(expected, str.split_to_char)
564
+ end
565
+ def test_sjis_count_char()
566
+ str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
567
+ str.encoding = "Shift_JIS"
568
+ str.eol = "CRLF"
569
+ expected = 7
570
+ assert_equal(expected, str.count_char)
571
+ end
572
+ def test_sjis_count_latin_graph_char()
573
+ str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
574
+ str.encoding = "Shift_JIS"
575
+ str.eol = "CRLF"
576
+ expected = 2
577
+ assert_equal(expected, str.count_latin_graph_char)
578
+ end
579
+ def test_sjis_count_ja_graph_char()
580
+ str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
581
+ str.encoding = "Shift_JIS"
582
+ str.eol = "CRLF"
583
+ expected = 3
584
+ assert_equal(expected, str.count_ja_graph_char)
585
+ end
586
+ def test_sjis_count_graph_char()
587
+ str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
588
+ str.encoding = "Shift_JIS"
589
+ str.eol = "CRLF"
590
+ expected = 5
591
+ assert_equal(expected, str.count_graph_char)
592
+ end
593
+ def test_sjis_count_latin_blank_char()
594
+ str = NKF.nkf("-s", "���ܸ�\ta b\r\n").extend CharString
595
+ str.encoding = "Shift_JIS"
596
+ str.eol = "CRLF"
597
+ expected = 2
598
+ assert_equal(expected, str.count_latin_blank_char)
599
+ end
600
+ def test_sjis_count_ja_blank_char()
601
+ str = NKF.nkf("-s", "���ܡ���\ta b\r\n").extend CharString
602
+ str.encoding = "Shift_JIS"
603
+ str.eol = "CRLF"
604
+ expected = 1
605
+ assert_equal(expected, str.count_ja_blank_char)
606
+ end
607
+ def test_sjis_count_blank_char()
608
+ str = NKF.nkf("-s", "���ܡ���\ta b\r\n").extend CharString
609
+ str.encoding = "Shift_JIS"
610
+ str.eol = "CRLF"
611
+ expected = 3
612
+ assert_equal(expected, str.count_blank_char)
613
+ end
614
+ def test_sjis_count_word()
615
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
616
+ str.encoding = "Shift_JIS"
617
+ str.eol = "CRLF"
618
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
619
+ assert_equal(expected, str.count_word)
620
+ end
621
+ def test_sjis_count_ja_word()
622
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
623
+ str.encoding = "Shift_JIS"
624
+ str.eol = "CRLF"
625
+ expected = 3
626
+ assert_equal(expected, str.count_ja_word)
627
+ end
628
+ def test_sjis_count_latin_valid_word()
629
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
630
+ str.encoding = "Shift_JIS"
631
+ str.eol = "CRLF"
632
+ expected = 2
633
+ assert_equal(expected, str.count_latin_valid_word)
634
+ end
635
+ def test_sjis_count_ja_valid_word()
636
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
637
+ str.encoding = "Shift_JIS"
638
+ str.eol = "CRLF"
639
+ expected = 2
640
+ assert_equal(expected, str.count_ja_valid_word)
641
+ end
642
+ def test_sjis_count_valid_word()
643
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
644
+ str.encoding = "Shift_JIS"
645
+ str.eol = "CRLF"
646
+ expected = 4
647
+ assert_equal(expected, str.count_valid_word)
648
+ end
649
+ def test_sjis_count_line()
650
+ str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
651
+ str.encoding = "Shift_JIS"
652
+ str.eol = "CRLF"
653
+ expected = 6
654
+ assert_equal(expected, str.count_line)
655
+ end
656
+ def test_sjis_count_graph_line()
657
+ str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
658
+ str.encoding = "Shift_JIS"
659
+ str.eol = "CRLF"
660
+ expected = 3
661
+ assert_equal(expected, str.count_graph_line)
662
+ end
663
+ def test_sjis_count_empty_line()
664
+ str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
665
+ str.encoding = "Shift_JIS"
666
+ str.eol = "CRLF"
667
+ expected = 1
668
+ assert_equal(expected, str.count_empty_line)
669
+ end
670
+ def test_sjis_count_blank_line()
671
+ str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
672
+ str.encoding = "Shift_JIS"
673
+ str.eol = "CRLF"
674
+ expected = 2
675
+ assert_equal(expected, str.count_blank_line)
676
+ end
677
+
678
+ # test UTF8 module
679
+ def test_utf8_split_to_word()
680
+ str = NKF.nkf("-E -w", "���ܸ��ʸ��foo bar").extend CharString
681
+ str.encoding = "UTF-8"
682
+ expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c| NKF.nkf("-E -w", c)}
683
+ assert_equal(expected, str.split_to_word)
684
+ end
685
+ def test_utf8_split_to_word_kanhira()
686
+ str = NKF.nkf("-E -w", "���ܸ��ʸ��").extend CharString
687
+ str.encoding = "UTF-8"
688
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
689
+ assert_equal(expected, str.split_to_word)
690
+ end
691
+ def test_utf8_split_to_word_katahira()
692
+ str = NKF.nkf("-E -w", "�������ʤ�ʸ��").extend CharString
693
+ str.encoding = "UTF-8"
694
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
695
+ assert_equal(expected, str.split_to_word)
696
+ end
697
+ def test_utf8_split_to_word_kataonbiki()
698
+ str = NKF.nkf("-E -w", "��ӡ��λ���").extend CharString
699
+ str.encoding = "UTF-8"
700
+ expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-E -w", c)}
701
+ assert_equal(expected, str.split_to_word)
702
+ end
703
+ def test_utf8_split_to_word_hiraonbiki()
704
+ str = NKF.nkf("-E -w", "���ӡ���").extend CharString
705
+ str.encoding = "UTF-8"
706
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-E -w", c)}
707
+ assert_equal(expected, str.split_to_word)
708
+ end
709
+ def test_utf8_split_to_word_latinmix()
710
+ str = NKF.nkf("-E -w", "���ܸ��Latin��ʸ��").extend CharString
711
+ str.encoding = "UTF-8"
712
+ expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
713
+ assert_equal(expected, str.split_to_word)
714
+ end
715
+ def test_utf8_split_to_char()
716
+ str = NKF.nkf("-E -w", "���ܸ�a b").extend CharString
717
+ str.encoding = "UTF-8" #<= needed to pass the test
718
+ str.eol = "LF" #<= needed to pass the test
719
+ expected = ["��", "��", "��", "a", " ", "b"].collect{|c| NKF.nkf("-E -w", c)}
720
+ assert_equal(expected, str.split_to_char)
721
+ end
722
+ def test_utf8_split_to_char_with_cr()
723
+ str = NKF.nkf("-E -w", "���ܸ�a b\r").extend CharString
724
+ str.encoding = "UTF-8" #<= needed to pass the test
725
+ str.eol = "CR"
726
+ expected = ["��","��","��","a"," ","b","\r"].collect{|c| NKF.nkf("-E -w", c)}
727
+ assert_equal(expected, str.split_to_char)
728
+ end
729
+ def test_utf8_split_to_char_with_lf()
730
+ str = NKF.nkf("-E -w", "���ܸ�a b\n").extend CharString
731
+ str.encoding = "UTF-8" #<= needed to pass the test
732
+ str.eol = "LF"
733
+ expected = ["��","��","��","a"," ","b","\n"].collect{|c| NKF.nkf("-E -w", c)}
734
+ assert_equal(expected, str.split_to_char)
735
+ end
736
+ def test_utf8_split_to_char_with_crlf()
737
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
738
+ str.encoding = "UTF-8"#<= needed to pass the test
739
+ str.eol = "CRLF"
740
+ expected = ["��","��","��","a"," ","b","\r\n"].collect{|c| NKF.nkf("-E -w", c)}
741
+ assert_equal(expected, str.split_to_char)
742
+ end
743
+ def test_utf8_count_char()
744
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
745
+ str.encoding = "UTF-8" #<= needed to pass the test
746
+ str.eol = "CRLF"
747
+ expected = 7
748
+ assert_equal(expected, str.count_char)
749
+ end
750
+ def test_utf8_count_latin_graph_char()
751
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
752
+ str.encoding = "UTF-8" #<= needed to pass the test
753
+ str.eol = "CRLF"
754
+ expected = 2
755
+ assert_equal(expected, str.count_latin_graph_char)
756
+ end
757
+ def test_utf8_count_ja_graph_char()
758
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
759
+ str.encoding = "UTF-8" #<= needed to pass the test
760
+ str.eol = "CRLF"
761
+ expected = 3
762
+ assert_equal(expected, str.count_ja_graph_char)
763
+ end
764
+ def test_utf8_count_graph_char()
765
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
766
+ str.encoding = "UTF-8" #<= needed to passs the test
767
+ str.eol = "CRLF"
768
+ expected = 5
769
+ assert_equal(expected, str.count_graph_char)
770
+ end
771
+ def test_utf8_count_latin_blank_char()
772
+ str = NKF.nkf("-E -w", "���ܸ�\ta b\r\n").extend CharString
773
+ str.encoding = "UTF-8"
774
+ str.eol = "CRLF"
775
+ expected = 2
776
+ assert_equal(expected, str.count_latin_blank_char)
777
+ end
778
+ def test_utf8_count_ja_blank_char()
779
+ str = NKF.nkf("-E -w", "���ܡ���\ta b\r\n").extend CharString
780
+ str.encoding = "UTF-8"
781
+ str.eol = "CRLF"
782
+ expected = 1
783
+ assert_equal(expected, str.count_ja_blank_char)
784
+ end
785
+ def test_utf8_count_blank_char()
786
+ str = NKF.nkf("-E -w", "���ܡ���\ta b\r\n").extend CharString
787
+ str.encoding = "UTF-8"
788
+ str.eol = "CRLF"
789
+ expected = 3
790
+ assert_equal(expected, str.count_blank_char)
791
+ end
792
+ def test_utf8_count_word()
793
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
794
+ str.encoding = "UTF-8"
795
+ str.eol = "CRLF"
796
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
797
+ assert_equal(expected, str.count_word)
798
+ end
799
+ def test_utf8_count_ja_word()
800
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
801
+ str.encoding = "UTF-8"
802
+ str.eol = "CRLF"
803
+ expected = 3
804
+ assert_equal(expected, str.count_ja_word)
805
+ end
806
+ def test_utf8_count_latin_valid_word()
807
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
808
+ str.encoding = "UTF-8"
809
+ str.eol = "CRLF"
810
+ expected = 2
811
+ assert_equal(expected, str.count_latin_valid_word)
812
+ end
813
+ def test_utf8_count_ja_valid_word()
814
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
815
+ str.encoding = "UTF-8"
816
+ str.eol = "CRLF"
817
+ expected = 2
818
+ assert_equal(expected, str.count_ja_valid_word)
819
+ end
820
+ def test_utf8_count_valid_word()
821
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
822
+ str.encoding = "UTF-8"
823
+ str.eol = "CRLF"
824
+ expected = 4
825
+ assert_equal(expected, str.count_valid_word)
826
+ end
827
+ def test_utf8_count_line()
828
+ str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
829
+ str.encoding = "UTF-8"
830
+ str.eol = "CRLF"
831
+ expected = 6
832
+ assert_equal(expected, str.count_line)
833
+ end
834
+ def test_utf8_count_graph_line()
835
+ str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
836
+ str.encoding = "UTF-8"
837
+ str.eol = "CRLF"
838
+ expected = 3
839
+ assert_equal(expected, str.count_graph_line)
840
+ end
841
+ def test_utf8_count_empty_line()
842
+ str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
843
+ str.encoding = "UTF-8"
844
+ str.eol = "CRLF"
845
+ expected = 1
846
+ assert_equal(expected, str.count_empty_line)
847
+ end
848
+ def test_utf8_count_blank_line()
849
+ str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
850
+ str.encoding = "UTF-8"
851
+ str.eol = "CRLF"
852
+ expected = 2
853
+ assert_equal(expected, str.count_blank_line)
854
+ end
855
+
856
+ # test module functions
857
+
858
+ def assert_guess_encoding(expected, str)
859
+ unless CharString.ruby_m17n?
860
+ assert_equal(expected, CharString.guess_encoding_using_pureruby(str))
861
+ assert_equal(expected, CharString.guess_encoding_using_iconv(str))
862
+ end
863
+ assert_equal(expected, CharString.guess_encoding(str))
864
+ end
865
+
866
+ def test_guess_encoding_nil()
867
+ str = nil
868
+ expected = nil
869
+ assert_guess_encoding(expected, str)
870
+ end
871
+ # def test_guess_encoding_binary()
872
+ # str = "\xFF\xFF"
873
+ # expected = "BINARY"
874
+ # assert_equal(expected, CharString.guess_encoding(str))
875
+ # end
876
+ def test_guess_encoding_unknown()
877
+ if CharString.ruby_m17n?
878
+ str = "".encode("BINARY") # cannot put invalid string literal
879
+ expected = "ASCII-8BIT"
880
+ else
881
+ str = "\xff\xff\xff\xff" # "\xDE\xAD\xBE\xEF"
882
+ expected = "UNKNOWN"
883
+ end
884
+ assert_guess_encoding(expected, str)
885
+ end
886
+ def test_guess_encoding_ascii_1()
887
+ if CharString.ruby_m17n?
888
+ str = "ASCII string".encode("US-ASCII")
889
+ expected = "US-ASCII"
890
+ else
891
+ str = "ASCII string"
892
+ expected = "US-ASCII"
893
+ end
894
+ assert_guess_encoding(expected, str)
895
+ end
896
+ def test_guess_encoding_ascii_2()
897
+ if CharString.ruby_m17n?
898
+ str = "abc\ndef\n".encode("US-ASCII")
899
+ expected = "US-ASCII"
900
+ else
901
+ str = "abc\ndef\n"
902
+ expected = "US-ASCII"
903
+ end
904
+ assert_guess_encoding(expected, str)
905
+ end
906
+ # CharString.guess_encoding mistakes JIS for ASCII sometimes, due to Iconv.
907
+ # def test_guess_encoding_jis_1()
908
+ # str = NKF.nkf("-j", "�����ȥ������ʤȤҤ餬��\n")
909
+ # expected = "JIS"
910
+ # assert_guess_encoding(expected, str)
911
+ # end
912
+ # def test_guess_encoding_jis_2()
913
+ # str = NKF.nkf("-j", "�����ȥ������ʤȤҤ餬�ʤ�Latin��ʸ���ȶ���( )�ȵ���@\n" * 100)
914
+ # expected = "JIS"
915
+ # assert_guess_encoding(expected, str)
916
+ # end
917
+ def test_guess_encoding_eucjp_1()
918
+ str = NKF.nkf("-e", "���ܸ��Latin��ʸ��")
919
+ expected = "EUC-JP"
920
+ assert_guess_encoding(expected, str)
921
+ end
922
+ def test_guess_encoding_eucjp_2()
923
+ str = NKF.nkf('-e', "�����ȥ������ʤȤҤ餬�ʤ�Latin��ʸ���ȶ���( )\n" * 10)
924
+ expected = "EUC-JP"
925
+ assert_guess_encoding(expected, str)
926
+ end
927
+ def test_guess_encoding_eucjp_3()
928
+ str = NKF.nkf('-e', "����Ф�ϡ����̾���ϤޤĤ�ȤǤ���\nRuby���ä��Τϻ�Ǥ������Ruby Hacker�Ǥ���\n")
929
+ expected = "EUC-JP"
930
+ assert_guess_encoding(expected, str)
931
+ end
932
+ def test_guess_encoding_sjis_1()
933
+ str = NKF.nkf("-s", "���ܸ��Latin��ʸ��")
934
+ expected = "Shift_JIS"
935
+ assert_guess_encoding(expected, str)
936
+ end
937
+ def test_guess_encoding_sjis_2()
938
+ str = NKF.nkf('-s', "������\n�������ʤ�\n�Ҥ餬�ʤ�\nLatin")
939
+ expected = "Shift_JIS"
940
+ assert_guess_encoding(expected, str)
941
+ end
942
+ def test_guess_encoding_utf8_1()
943
+ str = NKF.nkf("-E -w", "���ܸ��Latin��ʸ��")
944
+ expected = "UTF-8"
945
+ assert_guess_encoding(expected, str)
946
+ end
947
+ def test_guess_encoding_utf8_2()
948
+ str = NKF.nkf("-E -w", "������\n�ˤۤؤ�\n")
949
+ expected = "UTF-8"
950
+ assert_guess_encoding(expected, str)
951
+ end
952
+
953
+ def test_guess_eol_nil()
954
+ str = nil
955
+ expected = nil
956
+ assert_equal(expected, CharString.guess_eol(str))
957
+ end
958
+ def test_guess_eol_empty()
959
+ str = ""
960
+ expected = "NONE"
961
+ assert_equal(expected, CharString.guess_eol(str))
962
+ end
963
+ def test_guess_eol_none()
964
+ str = "foo bar"
965
+ expected = "NONE"
966
+ assert_equal(expected, CharString.guess_eol(str))
967
+ end
968
+ def test_guess_eol_cr()
969
+ str = "foo bar\r"
970
+ expected = "CR"
971
+ assert_equal(expected, CharString.guess_eol(str))
972
+ end
973
+ def test_guess_eol_lf()
974
+ str = "foo bar\n"
975
+ expected = "LF"
976
+ assert_equal(expected, CharString.guess_eol(str))
977
+ end
978
+ def test_guess_eol_crlf()
979
+ str = "foo bar\r\n"
980
+ expected = "CRLF"
981
+ assert_equal(expected, CharString.guess_eol(str))
982
+ end
983
+ def test_guess_eol_mixed()
984
+ str = "foo\rbar\nbaz\r\n"
985
+ expected = "UNKNOWN"
986
+ assert_equal(expected, CharString.guess_eol(str))
987
+ end
988
+ def test_guess_eol_cr2()
989
+ str = "foo\rbar\rbaz\r".extend CharString
990
+ expected = "CR"
991
+ assert_equal(expected, CharString.guess_eol(str))
992
+ end
993
+ def test_guess_eol_lf2()
994
+ str = "foo\nbar\nbaz\n".extend CharString
995
+ expected = "LF"
996
+ assert_equal(expected, CharString.guess_eol(str))
997
+ end
998
+ def test_guess_eol_crlf2()
999
+ str = "foo\r\nbar\r\nbaz\r\n".extend CharString
1000
+ expected = "CRLF"
1001
+ assert_equal(expected, CharString.guess_eol(str))
1002
+ end
1003
+
1004
+ def teardown()
1005
+ #
1006
+ end
1007
+
1008
+ end