docdiff 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +7 -0
  3. data/Gemfile +17 -0
  4. data/Guardfile +8 -0
  5. data/Makefile +108 -0
  6. data/Rakefile +17 -0
  7. data/bin/docdiff +179 -0
  8. data/devutil/JIS0208.TXT +6952 -0
  9. data/devutil/char_by_charclass.rb +23 -0
  10. data/devutil/charclass_by_char.rb +21 -0
  11. data/devutil/jis0208.rb +343 -0
  12. data/devutil/testjis0208.rb +38 -0
  13. data/docdiff.conf.example +22 -0
  14. data/docdiff.gemspec +23 -0
  15. data/docdiffwebui.cgi +176 -0
  16. data/docdiffwebui.html +123 -0
  17. data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
  18. data/img/docdiff-screenshot-format-html-firefox.png +0 -0
  19. data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
  20. data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
  21. data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
  22. data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
  23. data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
  24. data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
  25. data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
  26. data/index.html +181 -0
  27. data/langfilter.rb +14 -0
  28. data/lib/doc_diff.rb +170 -0
  29. data/lib/docdiff.rb +7 -0
  30. data/lib/docdiff/charstring.rb +579 -0
  31. data/lib/docdiff/diff.rb +217 -0
  32. data/lib/docdiff/diff/contours.rb +382 -0
  33. data/lib/docdiff/diff/editscript.rb +148 -0
  34. data/lib/docdiff/diff/rcsdiff.rb +107 -0
  35. data/lib/docdiff/diff/shortestpath.rb +93 -0
  36. data/lib/docdiff/diff/speculative.rb +40 -0
  37. data/lib/docdiff/diff/subsequence.rb +39 -0
  38. data/lib/docdiff/diff/unidiff.rb +124 -0
  39. data/lib/docdiff/difference.rb +92 -0
  40. data/lib/docdiff/document.rb +127 -0
  41. data/lib/docdiff/encoding/en_ascii.rb +97 -0
  42. data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
  43. data/lib/docdiff/encoding/ja_sjis.rb +260 -0
  44. data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
  45. data/lib/docdiff/version.rb +3 -0
  46. data/lib/docdiff/view.rb +476 -0
  47. data/lib/viewdiff.rb +375 -0
  48. data/readme.html +713 -0
  49. data/sample/01.en.ascii.cr +1 -0
  50. data/sample/01.en.ascii.crlf +2 -0
  51. data/sample/01.en.ascii.lf +2 -0
  52. data/sample/01.ja.eucjp.lf +2 -0
  53. data/sample/01.ja.sjis.cr +1 -0
  54. data/sample/01.ja.sjis.crlf +2 -0
  55. data/sample/01.ja.utf8.crlf +2 -0
  56. data/sample/02.en.ascii.cr +1 -0
  57. data/sample/02.en.ascii.crlf +2 -0
  58. data/sample/02.en.ascii.lf +2 -0
  59. data/sample/02.ja.eucjp.lf +2 -0
  60. data/sample/02.ja.sjis.cr +1 -0
  61. data/sample/02.ja.sjis.crlf +2 -0
  62. data/sample/02.ja.utf8.crlf +2 -0
  63. data/sample/humpty_dumpty01.ascii.lf +4 -0
  64. data/sample/humpty_dumpty02.ascii.lf +4 -0
  65. data/test/charstring_test.rb +1008 -0
  66. data/test/diff_test.rb +36 -0
  67. data/test/difference_test.rb +64 -0
  68. data/test/docdiff_test.rb +193 -0
  69. data/test/document_test.rb +626 -0
  70. data/test/test_helper.rb +7 -0
  71. data/test/view_test.rb +570 -0
  72. data/test/viewdiff_test.rb +908 -0
  73. metadata +129 -0
@@ -0,0 +1 @@
1
+ Hello, my name is Watanabe.
@@ -0,0 +1,2 @@
1
+ Hello, my name is Watanabe.
2
+ I am just another Ruby porter.
@@ -0,0 +1,2 @@
1
+ Hello, my name is Watanabe.
2
+ I am just another Ruby porter.
@@ -0,0 +1,2 @@
1
+ ����ˤ��ϡ����̾���Ϥ錄�ʤ٤Ǥ���
2
+ ���Just Another Ruby Porter�Ǥ���
@@ -0,0 +1 @@
1
+ ����ɂ��́A���̖��O�͂킽�Ȃׂł��B
@@ -0,0 +1,2 @@
1
+ ����ɂ��́A���̖��O�͂킽�Ȃׂł��B
2
+ ����Just Another Ruby Porter�ł��B
@@ -0,0 +1,2 @@
1
+ こんにちは、私の名前はわたなべです。
2
+ 私はJust Another Ruby Porterです。
@@ -0,0 +1 @@
1
+ Hello, my name is matz.
@@ -0,0 +1,2 @@
1
+ Hello, my name is matz.
2
+ It's me who has created Ruby. I am a Ruby hacker.
@@ -0,0 +1,2 @@
1
+ Hello, my name is matz.
2
+ It's me who has created Ruby. I am a Ruby hacker.
@@ -0,0 +1,2 @@
1
+ ����Ф�ϡ����̾���ϤޤĤ�ȤǤ���
2
+ Ruby���ä��Τϻ�Ǥ������Ruby Hacker�Ǥ���
@@ -0,0 +1 @@
1
+ ����΂�́A���̖��O�͂܂‚��Ƃł��B
@@ -0,0 +1,2 @@
1
+ ����΂�́A���̖��O�͂܂‚��Ƃł��B
2
+ Ruby��������͎̂��ł��B����Ruby Hacker�ł��B
@@ -0,0 +1,2 @@
1
+ こんばんは、私の名前はまつもとです。
2
+ Rubyを作ったのは私です。私はRuby Hackerです。
@@ -0,0 +1,4 @@
1
+ Humpty Dumpty sat on a wall.
2
+ Humpty Dumpty had a great fall.
3
+ All the king's horses and all the king's men
4
+ Couldn't put Humpty together again!
@@ -0,0 +1,4 @@
1
+ Humpty Dumpty was sitting on a Humpty Dumpty wall.
2
+ Humpty had a great fall by mistake.
3
+ All the king's men and all the king's horses
4
+ Couldn't put Humpty Dumpty together!
@@ -0,0 +1,1008 @@
1
+ #!/usr/bin/ruby
2
+ # -*- coding: euc-jp; -*-
3
+ require 'test/unit'
4
+ require 'docdiff/charstring'
5
+ require 'nkf'
6
+
7
+ class TC_CharString < Test::Unit::TestCase
8
+
9
+ def setup()
10
+ #
11
+ end
12
+
13
+ # test encoding module registration
14
+ def test_encoding_ascii()
15
+ str = "foo".extend CharString
16
+ str.encoding = "US-ASCII"
17
+ expected = CharString::ASCII
18
+ assert_equal(expected, CharString::Encodings[str.encoding])
19
+ end
20
+ def test_encoding_eucjp()
21
+ str = "foo".extend CharString
22
+ str.encoding = "EUC-JP"
23
+ expected = CharString::EUC_JP
24
+ assert_equal(expected, CharString::Encodings[str.encoding])
25
+ end
26
+ def test_encoding_sjis()
27
+ str = "foo".extend CharString
28
+ str.encoding = "Shift_JIS"
29
+ expected = CharString::Shift_JIS
30
+ assert_equal(expected, CharString::Encodings[str.encoding])
31
+ end
32
+ def test_encoding_utf8()
33
+ str = "foo".extend CharString
34
+ str.encoding = "UTF-8"
35
+ expected = CharString::UTF8
36
+ assert_equal(expected, CharString::Encodings[str.encoding])
37
+ end
38
+
39
+ # test eol module registration
40
+ def test_eol_cr()
41
+ str = "foo".extend CharString
42
+ str.eol = "CR"
43
+ expected = CharString::CR
44
+ assert_equal(expected, CharString::EOLChars[str.eol])
45
+ end
46
+ def test_eol_lf()
47
+ str = "foo".extend CharString
48
+ str.eol = "LF"
49
+ expected = CharString::LF
50
+ assert_equal(expected, CharString::EOLChars[str.eol])
51
+ end
52
+ def test_eol_crlf()
53
+ str = "foo".extend CharString
54
+ str.eol = "CRLF"
55
+ expected = CharString::CRLF
56
+ assert_equal(expected, CharString::EOLChars[str.eol])
57
+ end
58
+
59
+ # test eol eol_char method
60
+ def test_eol_char_cr()
61
+ str = "foo\rbar\r".extend CharString
62
+ str.eol = "CR"
63
+ expected = "\r"
64
+ assert_equal(expected, str.eol_char)
65
+ end
66
+ def test_eol_char_lf()
67
+ str = "foo\nbar\n".extend CharString
68
+ str.eol = "LF"
69
+ expected = "\n"
70
+ assert_equal(expected, str.eol_char)
71
+ end
72
+ def test_eol_char_crlf()
73
+ str = "foo\r\nbar\r\n".extend CharString
74
+ str.eol = "CRLF"
75
+ expected = "\r\n"
76
+ assert_equal(expected, str.eol_char)
77
+ end
78
+ def test_eol_char_none()
79
+ str = "foobar".extend CharString
80
+ expected = nil
81
+ assert_equal(expected, str.eol_char)
82
+ end
83
+ def test_eol_char_none_for_0length_string()
84
+ str = "".extend CharString
85
+ expected = nil
86
+ assert_equal(expected, str.eol_char)
87
+ end
88
+ def test_eol_char_none_eucjp()
89
+ str = NKF.nkf("-e", "���ܸ�a b").extend CharString
90
+ expected = nil
91
+ assert_equal(expected, str.eol_char)
92
+ end
93
+ def test_eol_char_none_sjis()
94
+ str = NKF.nkf("-s", "���ܸ�a b").extend CharString
95
+ expected = nil
96
+ assert_equal(expected, str.eol_char)
97
+ end
98
+
99
+ # test eol split_to_line() method
100
+ def test_cr_split_to_line()
101
+ str = "foo\rbar\r".extend CharString
102
+ encoding, eol = "US-ASCII", "CR"
103
+ str.encoding, str.eol = encoding, eol
104
+ expected = ["foo\r", "bar\r"]
105
+ assert_equal(expected, str.split_to_line)
106
+ end
107
+ def test_cr_split_to_line_chomped_lastline()
108
+ str = "foo\rbar".extend CharString
109
+ str.encoding = "US-ASCII"
110
+ str.eol = "CR"
111
+ expected = ["foo\r", "bar"]
112
+ assert_equal(expected, str.split_to_line)
113
+ end
114
+ def test_cr_split_to_line_empty_line()
115
+ str = "foo\r\rbar\r".extend CharString
116
+ str.encoding = "US-ASCII"
117
+ str.eol = "CR"
118
+ expected = ["foo\r", "\r", "bar\r"]
119
+ assert_equal(expected, str.split_to_line)
120
+ end
121
+ def test_lf_split_to_line()
122
+ str = "foo\nbar\n".extend CharString
123
+ str.encoding = "US-ASCII"
124
+ str.eol = "LF"
125
+ expected = ["foo\n", "bar\n"]
126
+ assert_equal(expected, str.split_to_line)
127
+ end
128
+ def test_lf_split_to_line_chomped_lastline()
129
+ str = "foo\nbar".extend CharString
130
+ str.encoding = "US-ASCII"
131
+ str.eol = "LF"
132
+ expected = ["foo\n", "bar"]
133
+ assert_equal(expected, str.split_to_line)
134
+ end
135
+ def test_lf_split_to_line_empty_line()
136
+ str = "foo\n\nbar\n".extend CharString
137
+ str.encoding = "US-ASCII"
138
+ str.eol = "LF"
139
+ expected = ["foo\n", "\n", "bar\n"]
140
+ assert_equal(expected, str.split_to_line)
141
+ end
142
+ def test_crlf_split_to_line()
143
+ str = "foo\r\nbar\r\n".extend CharString
144
+ str.encoding = "US-ASCII"
145
+ str.eol = "CRLF"
146
+ expected = ["foo\r\n", "bar\r\n"]
147
+ assert_equal(expected, str.split_to_line)
148
+ end
149
+ def test_crlf_split_to_line_chomped_lastline()
150
+ str = "foo\r\nbar".extend CharString
151
+ str.encoding = "US-ASCII"
152
+ str.eol = "CRLF"
153
+ expected = ["foo\r\n", "bar"]
154
+ assert_equal(expected, str.split_to_line)
155
+ end
156
+ def test_crlf_split_to_line_empty_line()
157
+ str = "foo\r\n\r\nbar\r\n".extend CharString
158
+ str.encoding = "US-ASCII"
159
+ str.eol = "CRLF"
160
+ expected = ["foo\r\n", "\r\n", "bar\r\n"]
161
+ assert_equal(expected, str.split_to_line)
162
+ end
163
+
164
+ # test ASCII module
165
+ def test_ascii_split_to_word()
166
+ str = "foo bar".extend CharString
167
+ str.encoding = "US-ASCII"
168
+ expected = ["foo ", "bar"]
169
+ assert_equal(expected, str.split_to_word)
170
+ end
171
+ def test_ascii_split_to_word_withsymbol()
172
+ str = "foo (bar) baz-baz".extend CharString
173
+ str.encoding = "US-ASCII"
174
+ expected = ["foo ", "(bar) ", "baz-baz"]
175
+ assert_equal(expected, str.split_to_word)
176
+ end
177
+ def test_ascii_split_to_word_withquote()
178
+ str = "foo's 'foo' \"bar\" 'baz.'".extend CharString
179
+ str.encoding = "US-ASCII"
180
+ expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
181
+ assert_equal(expected, str.split_to_word)
182
+ end
183
+ def test_ascii_split_to_word_withlongspace()
184
+ str = " foo bar".extend CharString
185
+ str.encoding = "US-ASCII"
186
+ expected = [" ", "foo ", " ", "bar"]
187
+ assert_equal(expected, str.split_to_word)
188
+ end
189
+ def test_ascii_split_to_word_withdash()
190
+ str = "foo -- bar, baz - quux".extend CharString
191
+ str.encoding = "US-ASCII"
192
+ expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
193
+ assert_equal(expected, str.split_to_word)
194
+ end
195
+ def test_ascii_split_to_char()
196
+ str = "foo bar".extend CharString
197
+ str.encoding = "US-ASCII"
198
+ str.eol = "LF"
199
+ expected = ["f","o","o"," ","b","a","r"]
200
+ assert_equal(expected, str.split_to_char)
201
+ end
202
+ def test_ascii_split_to_char_with_eol_cr()
203
+ str = "foo bar\r".extend CharString
204
+ str.encoding = "US-ASCII"
205
+ str.eol = "CR"
206
+ expected = ["f","o","o"," ","b","a","r","\r"]
207
+ assert_equal(expected, str.split_to_char)
208
+ end
209
+ def test_ascii_split_to_char_with_eol_lf()
210
+ str = "foo bar\n".extend CharString
211
+ str.encoding = "US-ASCII"
212
+ str.eol = "LF"
213
+ expected = ["f","o","o"," ","b","a","r","\n"]
214
+ assert_equal(expected, str.split_to_char)
215
+ end
216
+ def test_ascii_split_to_char_with_eol_crlf()
217
+ str = "foo bar\r\n".extend CharString
218
+ str.encoding = "US-ASCII"
219
+ str.eol = "CRLF"
220
+ expected = ["f","o","o"," ","b","a","r","\r\n"]
221
+ assert_equal(expected, str.split_to_char)
222
+ end
223
+ def test_ascii_split_to_byte()
224
+ str = "foo bar\r\n".extend CharString
225
+ str.encoding = "US-ASCII"
226
+ str.eol = "CRLF"
227
+ expected = ["f","o","o"," ","b","a","r","\r","\n"]
228
+ assert_equal(expected, str.split_to_byte)
229
+ end
230
+ def test_ascii_count_byte()
231
+ str = "foo bar\r\n".extend CharString
232
+ str.encoding = "US-ASCII"
233
+ str.eol = "CRLF"
234
+ expected = 9
235
+ assert_equal(expected, str.count_byte)
236
+ end
237
+ def test_ascii_count_char()
238
+ str = "foo bar\r\nbaz quux\r\n".extend CharString
239
+ str.encoding = "US-ASCII"
240
+ str.eol = "CRLF"
241
+ expected = 17
242
+ assert_equal(expected, str.count_char)
243
+ end
244
+ def test_ascii_count_latin_graph_char()
245
+ str = "foo bar\r\nbaz quux\r\n".extend CharString
246
+ str.encoding = "US-ASCII"
247
+ str.eol = "CRLF"
248
+ expected = 13
249
+ assert_equal(expected, str.count_latin_graph_char)
250
+ end
251
+ def test_ascii_count_graph_char()
252
+ str = "foo bar\r\nbaz quux\r\n".extend CharString
253
+ str.encoding = "US-ASCII"
254
+ str.eol = "CRLF"
255
+ expected = 13
256
+ assert_equal(expected, str.count_graph_char)
257
+ end
258
+ def test_ascii_count_latin_blank_char()
259
+ str = "foo bar\r\nbaz\tquux\r\n".extend CharString
260
+ str.encoding = "US-ASCII"
261
+ str.eol = "CRLF"
262
+ expected = 2
263
+ assert_equal(expected, str.count_latin_blank_char)
264
+ end
265
+ def test_ascii_count_blank_char()
266
+ str = "foo bar\r\nbaz\tquux\r\n".extend CharString
267
+ str.encoding = "US-ASCII"
268
+ str.eol = "CRLF"
269
+ expected = 2
270
+ assert_equal(expected, str.count_blank_char)
271
+ end
272
+ def test_ascii_count_word()
273
+ str = "foo bar \r\nbaz quux\r\n".extend CharString
274
+ str.encoding = "US-ASCII"
275
+ str.eol = "CRLF"
276
+ expected = 6
277
+ assert_equal(expected, str.count_word)
278
+ end
279
+ def test_ascii_count_latin_word()
280
+ str = "foo bar \r\nbaz quux\r\n".extend CharString
281
+ str.encoding = "US-ASCII"
282
+ str.eol = "CRLF"
283
+ expected = 5 # " " is also counted as a word
284
+ assert_equal(expected, str.count_latin_word)
285
+ end
286
+ def test_ascii_count_latin_valid_word()
287
+ str = "1 foo \r\n%%% ()\r\n".extend CharString
288
+ str.encoding = "US-ASCII"
289
+ str.eol = "CRLF"
290
+ expected = 2
291
+ assert_equal(expected, str.count_latin_valid_word)
292
+ end
293
+ def test_ascii_count_line()
294
+ str = "foo\r\nbar".extend CharString
295
+ str.encoding = "US-ASCII"
296
+ str.eol = "CRLF"
297
+ expected = 2
298
+ assert_equal(expected, str.count_line)
299
+ end
300
+ def test_ascii_count_graph_line()
301
+ str = "foo\r\n ".extend CharString
302
+ str.encoding = "US-ASCII"
303
+ str.eol = "CRLF"
304
+ expected = 1
305
+ assert_equal(expected, str.count_graph_line)
306
+ end
307
+ def test_ascii_count_empty_line()
308
+ str = "foo\r\n \r\n\t\r\n\r\n".extend CharString
309
+ str.encoding = "US-ASCII"
310
+ str.eol = "CRLF"
311
+ expected = 1
312
+ assert_equal(expected, str.count_empty_line)
313
+ end
314
+ def test_ascii_count_blank_line()
315
+ str = "\r\n \r\n\t\r\n ".extend CharString
316
+ str.encoding = "US-ASCII"
317
+ str.eol = "CRLF"
318
+ expected = 3
319
+ assert_equal(expected, str.count_blank_line)
320
+ end
321
+
322
+ # test EUCJP module
323
+ def test_eucjp_split_to_word()
324
+ str = NKF.nkf("-e", "���ܸ��ʸ��foo bar").extend CharString
325
+ str.encoding = "EUC-JP"
326
+ expected = ["���ܸ��","ʸ��","foo ","bar"].collect{|c| NKF.nkf("-e", c)}
327
+ assert_equal(expected, str.split_to_word)
328
+ end
329
+ def test_eucjp_split_to_word_kanhira()
330
+ str = NKF.nkf("-e", "���ܸ��ʸ��").extend CharString
331
+ str.encoding = "EUC-JP"
332
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
333
+ assert_equal(expected, str.split_to_word)
334
+ end
335
+ def test_eucjp_split_to_word_katahira()
336
+ str = NKF.nkf("-e", "�������ʤ�ʸ��").extend CharString
337
+ str.encoding = "EUC-JP"
338
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
339
+ assert_equal(expected, str.split_to_word)
340
+ end
341
+ def test_eucjp_split_to_word_kataonbiki()
342
+ str = NKF.nkf("-e", "��ӡ�������").extend CharString
343
+ str.encoding = "EUC-JP" #<= needed to pass the test
344
+ expected = ["��ӡ�", "����", "��"].collect{|c| NKF.nkf("-e", c)}
345
+ assert_equal(expected, str.split_to_word)
346
+ end
347
+ def test_eucjp_split_to_word_hiraonbiki()
348
+ str = NKF.nkf("-e", "���ӡ���").extend CharString
349
+ str.encoding = "EUC-JP" #<= needed to pass the test
350
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-e", c)}
351
+ assert_equal(expected, str.split_to_word)
352
+ end
353
+ def test_eucjp_split_to_word_latinmix()
354
+ str = NKF.nkf("-e", "���ܸ��Latin��ʸ��").extend CharString
355
+ str.encoding = "EUC-JP"
356
+ expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
357
+ assert_equal(expected, str.split_to_word)
358
+ end
359
+ def test_eucjp_split_to_char()
360
+ str = NKF.nkf("-e", "���ܸ�a b").extend CharString
361
+ str.encoding = "EUC-JP"
362
+ str.eol = "LF" #<= needed to pass the test
363
+ expected = ["��","��","��","a"," ","b"].collect{|c|NKF.nkf("-e",c)}
364
+ assert_equal(expected, str.split_to_char)
365
+ end
366
+ def test_eucjp_split_to_char_with_cr()
367
+ str = NKF.nkf("-e", "���ܸ�a b\r").extend CharString
368
+ str.encoding = "EUC-JP"
369
+ str.eol = "CR"
370
+ expected = ["��","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-e",c)}
371
+ assert_equal(expected, str.split_to_char)
372
+ end
373
+ def test_eucjp_split_to_char_with_lf()
374
+ str = NKF.nkf("-e", "���ܸ�a b\n").extend CharString
375
+ str.encoding = "EUC-JP"
376
+ str.eol = "LF"
377
+ expected = ["��","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-e",c)}
378
+ assert_equal(expected, str.split_to_char)
379
+ end
380
+ def test_eucjp_split_to_char_with_crlf()
381
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
382
+ str.encoding = "EUC-JP"
383
+ str.eol = "CRLF"
384
+ expected = ["��","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-e",c)}
385
+ assert_equal(expected, str.split_to_char)
386
+ end
387
+ def test_eucjp_count_char()
388
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
389
+ str.encoding = "EUC-JP"
390
+ str.eol = "CRLF"
391
+ expected = 7
392
+ assert_equal(expected, str.count_char)
393
+ end
394
+ def test_eucjp_count_latin_graph_char()
395
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
396
+ str.encoding = "EUC-JP"
397
+ str.eol = "CRLF"
398
+ expected = 2
399
+ assert_equal(expected, str.count_latin_graph_char)
400
+ end
401
+ def test_eucjp_count_ja_graph_char()
402
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
403
+ str.encoding = "EUC-JP"
404
+ str.eol = "CRLF"
405
+ expected = 3
406
+ assert_equal(expected, str.count_ja_graph_char)
407
+ end
408
+ def test_eucjp_count_graph_char()
409
+ str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
410
+ str.encoding = "EUC-JP"
411
+ str.eol = "CRLF"
412
+ expected = 5
413
+ assert_equal(expected, str.count_graph_char)
414
+ end
415
+ def test_eucjp_count_latin_blank_char()
416
+ str = NKF.nkf("-e", "���ܸ�\ta b\r\n").extend CharString
417
+ str.encoding = "EUC-JP"
418
+ str.eol = "CRLF"
419
+ expected = 2
420
+ assert_equal(expected, str.count_latin_blank_char)
421
+ end
422
+ def test_eucjp_count_ja_blank_char()
423
+ str = NKF.nkf("-e", "���ܡ���\ta b\r\n").extend CharString
424
+ str.encoding = "EUC-JP"
425
+ str.eol = "CRLF"
426
+ expected = 1
427
+ assert_equal(expected, str.count_ja_blank_char)
428
+ end
429
+ def test_eucjp_count_blank_char()
430
+ str = NKF.nkf("-e", "���ܡ���\ta b\r\n").extend CharString
431
+ str.encoding = "EUC-JP"
432
+ str.eol = "CRLF"
433
+ expected = 3
434
+ assert_equal(expected, str.count_blank_char)
435
+ end
436
+ def test_eucjp_count_word()
437
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
438
+ str.encoding = "EUC-JP"
439
+ str.eol = "CRLF"
440
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
441
+ assert_equal(expected, str.count_word)
442
+ end
443
+ def test_eucjp_count_ja_word()
444
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
445
+ str.encoding = "EUC-JP"
446
+ str.eol = "CRLF"
447
+ expected = 3
448
+ assert_equal(expected, str.count_ja_word)
449
+ end
450
+ def test_eucjp_count_latin_valid_word()
451
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
452
+ str.encoding = "EUC-JP"
453
+ str.eol = "CRLF"
454
+ expected = 2
455
+ assert_equal(expected, str.count_latin_valid_word)
456
+ end
457
+ def test_eucjp_count_ja_valid_word()
458
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
459
+ str.encoding = "EUC-JP"
460
+ str.eol = "CRLF"
461
+ expected = 2
462
+ assert_equal(expected, str.count_ja_valid_word)
463
+ end
464
+ def test_eucjp_count_valid_word()
465
+ str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
466
+ str.encoding = "EUC-JP"
467
+ str.eol = "CRLF"
468
+ expected = 4
469
+ assert_equal(expected, str.count_valid_word)
470
+ end
471
+ def test_eucjp_count_line()
472
+ str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
473
+ str.encoding = "EUC-JP"
474
+ str.eol = "CRLF"
475
+ expected = 6
476
+ assert_equal(expected, str.count_line)
477
+ end
478
+ def test_eucjp_count_graph_line()
479
+ str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
480
+ str.encoding = "EUC-JP"
481
+ str.eol = "CRLF"
482
+ expected = 3
483
+ assert_equal(expected, str.count_graph_line)
484
+ end
485
+ def test_eucjp_count_empty_line()
486
+ str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
487
+ str.encoding = "EUC-JP"
488
+ str.eol = "CRLF"
489
+ expected = 1
490
+ assert_equal(expected, str.count_empty_line)
491
+ end
492
+ def test_eucjp_count_blank_line()
493
+ str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
494
+ str.encoding = "EUC-JP"
495
+ str.eol = "CRLF"
496
+ expected = 2
497
+ assert_equal(expected, str.count_blank_line)
498
+ end
499
+
500
+ # test SJIS module
501
+ def test_sjis_split_to_word()
502
+ str = NKF.nkf("-s", "���ܸ��ʸ��foo bar").extend CharString
503
+ str.encoding = "Shift_JIS"
504
+ expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c|NKF.nkf("-s",c)}
505
+ assert_equal(expected, str.split_to_word)
506
+ end
507
+ def test_sjisplit_s_to_word_kanhira()
508
+ str = NKF.nkf("-s", "���ܸ��ʸ��").extend CharString
509
+ str.encoding = "Shift_JIS"
510
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
511
+ assert_equal(expected, str.split_to_word)
512
+ end
513
+ def test_sjis_split_to_word_katahira()
514
+ str = NKF.nkf("-s", "�������ʤ�ʸ��").extend CharString
515
+ str.encoding = "Shift_JIS"
516
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
517
+ assert_equal(expected, str.split_to_word)
518
+ end
519
+ def test_sjis_split_to_word_kataonbiki()
520
+ str = NKF.nkf("-s", "��ӡ��λ���").extend CharString
521
+ str.encoding = "Shift_JIS"
522
+ expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-s", c)}
523
+ assert_equal(expected, str.split_to_word)
524
+ end
525
+ def test_sjis_split_to_word_hiraonbiki()
526
+ str = NKF.nkf("-s", "���ӡ���").extend CharString
527
+ str.encoding = "Shift_JIS"
528
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-s", c)}
529
+ assert_equal(expected, str.split_to_word)
530
+ end
531
+ def test_sjis_split_to_word_latinmix()
532
+ str = NKF.nkf("-s", "���ܸ��Latin��ʸ��").extend CharString
533
+ str.encoding = "Shift_JIS"
534
+ expected = ["���ܸ��","Latin","��","ʸ��"].collect{|c| NKF.nkf("-s", c)}
535
+ assert_equal(expected, str.split_to_word)
536
+ end
537
+ def test_sjis_split_to_char()
538
+ str = NKF.nkf("-s", "ɽ�׻�a b").extend CharString
539
+ str.encoding = "Shift_JIS"
540
+ str.eol = "LF" #<= needed to pass the test
541
+ expected = ["ɽ","��","��","a"," ","b"].collect{|c|NKF.nkf("-s",c)}
542
+ assert_equal(expected, str.split_to_char)
543
+ end
544
+ def test_sjis_split_to_char_with_cr()
545
+ str = NKF.nkf("-s", "ɽ�׻�a b\r").extend CharString
546
+ str.encoding = "Shift_JIS"
547
+ str.eol = "CR"
548
+ expected = ["ɽ","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-s",c)}
549
+ assert_equal(expected, str.split_to_char)
550
+ end
551
+ def test_sjis_split_to_char_with_lf()
552
+ str = NKF.nkf("-s", "ɽ�׻�a b\n").extend CharString
553
+ str.encoding = "Shift_JIS"
554
+ str.eol = "LF"
555
+ expected = ["ɽ","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-s",c)}
556
+ assert_equal(expected, str.split_to_char)
557
+ end
558
+ def test_sjis_split_to_char_with_crlf()
559
+ str = NKF.nkf("-s", "ɽ�׻�a b\r\n").extend CharString
560
+ str.encoding = "Shift_JIS"
561
+ str.eol = "CRLF"
562
+ expected = ["ɽ","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-s",c)}
563
+ assert_equal(expected, str.split_to_char)
564
+ end
565
+ def test_sjis_count_char()
566
+ str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
567
+ str.encoding = "Shift_JIS"
568
+ str.eol = "CRLF"
569
+ expected = 7
570
+ assert_equal(expected, str.count_char)
571
+ end
572
+ def test_sjis_count_latin_graph_char()
573
+ str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
574
+ str.encoding = "Shift_JIS"
575
+ str.eol = "CRLF"
576
+ expected = 2
577
+ assert_equal(expected, str.count_latin_graph_char)
578
+ end
579
+ def test_sjis_count_ja_graph_char()
580
+ str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
581
+ str.encoding = "Shift_JIS"
582
+ str.eol = "CRLF"
583
+ expected = 3
584
+ assert_equal(expected, str.count_ja_graph_char)
585
+ end
586
+ def test_sjis_count_graph_char()
587
+ str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
588
+ str.encoding = "Shift_JIS"
589
+ str.eol = "CRLF"
590
+ expected = 5
591
+ assert_equal(expected, str.count_graph_char)
592
+ end
593
+ def test_sjis_count_latin_blank_char()
594
+ str = NKF.nkf("-s", "���ܸ�\ta b\r\n").extend CharString
595
+ str.encoding = "Shift_JIS"
596
+ str.eol = "CRLF"
597
+ expected = 2
598
+ assert_equal(expected, str.count_latin_blank_char)
599
+ end
600
+ def test_sjis_count_ja_blank_char()
601
+ str = NKF.nkf("-s", "���ܡ���\ta b\r\n").extend CharString
602
+ str.encoding = "Shift_JIS"
603
+ str.eol = "CRLF"
604
+ expected = 1
605
+ assert_equal(expected, str.count_ja_blank_char)
606
+ end
607
+ def test_sjis_count_blank_char()
608
+ str = NKF.nkf("-s", "���ܡ���\ta b\r\n").extend CharString
609
+ str.encoding = "Shift_JIS"
610
+ str.eol = "CRLF"
611
+ expected = 3
612
+ assert_equal(expected, str.count_blank_char)
613
+ end
614
+ def test_sjis_count_word()
615
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
616
+ str.encoding = "Shift_JIS"
617
+ str.eol = "CRLF"
618
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
619
+ assert_equal(expected, str.count_word)
620
+ end
621
+ def test_sjis_count_ja_word()
622
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
623
+ str.encoding = "Shift_JIS"
624
+ str.eol = "CRLF"
625
+ expected = 3
626
+ assert_equal(expected, str.count_ja_word)
627
+ end
628
+ def test_sjis_count_latin_valid_word()
629
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
630
+ str.encoding = "Shift_JIS"
631
+ str.eol = "CRLF"
632
+ expected = 2
633
+ assert_equal(expected, str.count_latin_valid_word)
634
+ end
635
+ def test_sjis_count_ja_valid_word()
636
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
637
+ str.encoding = "Shift_JIS"
638
+ str.eol = "CRLF"
639
+ expected = 2
640
+ assert_equal(expected, str.count_ja_valid_word)
641
+ end
642
+ def test_sjis_count_valid_word()
643
+ str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
644
+ str.encoding = "Shift_JIS"
645
+ str.eol = "CRLF"
646
+ expected = 4
647
+ assert_equal(expected, str.count_valid_word)
648
+ end
649
+ def test_sjis_count_line()
650
+ str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
651
+ str.encoding = "Shift_JIS"
652
+ str.eol = "CRLF"
653
+ expected = 6
654
+ assert_equal(expected, str.count_line)
655
+ end
656
+ def test_sjis_count_graph_line()
657
+ str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
658
+ str.encoding = "Shift_JIS"
659
+ str.eol = "CRLF"
660
+ expected = 3
661
+ assert_equal(expected, str.count_graph_line)
662
+ end
663
+ def test_sjis_count_empty_line()
664
+ str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
665
+ str.encoding = "Shift_JIS"
666
+ str.eol = "CRLF"
667
+ expected = 1
668
+ assert_equal(expected, str.count_empty_line)
669
+ end
670
+ def test_sjis_count_blank_line()
671
+ str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
672
+ str.encoding = "Shift_JIS"
673
+ str.eol = "CRLF"
674
+ expected = 2
675
+ assert_equal(expected, str.count_blank_line)
676
+ end
677
+
678
+ # test UTF8 module
679
+ def test_utf8_split_to_word()
680
+ str = NKF.nkf("-E -w", "���ܸ��ʸ��foo bar").extend CharString
681
+ str.encoding = "UTF-8"
682
+ expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c| NKF.nkf("-E -w", c)}
683
+ assert_equal(expected, str.split_to_word)
684
+ end
685
+ def test_utf8_split_to_word_kanhira()
686
+ str = NKF.nkf("-E -w", "���ܸ��ʸ��").extend CharString
687
+ str.encoding = "UTF-8"
688
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
689
+ assert_equal(expected, str.split_to_word)
690
+ end
691
+ def test_utf8_split_to_word_katahira()
692
+ str = NKF.nkf("-E -w", "�������ʤ�ʸ��").extend CharString
693
+ str.encoding = "UTF-8"
694
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
695
+ assert_equal(expected, str.split_to_word)
696
+ end
697
+ def test_utf8_split_to_word_kataonbiki()
698
+ str = NKF.nkf("-E -w", "��ӡ��λ���").extend CharString
699
+ str.encoding = "UTF-8"
700
+ expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-E -w", c)}
701
+ assert_equal(expected, str.split_to_word)
702
+ end
703
+ def test_utf8_split_to_word_hiraonbiki()
704
+ str = NKF.nkf("-E -w", "���ӡ���").extend CharString
705
+ str.encoding = "UTF-8"
706
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-E -w", c)}
707
+ assert_equal(expected, str.split_to_word)
708
+ end
709
+ def test_utf8_split_to_word_latinmix()
710
+ str = NKF.nkf("-E -w", "���ܸ��Latin��ʸ��").extend CharString
711
+ str.encoding = "UTF-8"
712
+ expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
713
+ assert_equal(expected, str.split_to_word)
714
+ end
715
+ def test_utf8_split_to_char()
716
+ str = NKF.nkf("-E -w", "���ܸ�a b").extend CharString
717
+ str.encoding = "UTF-8" #<= needed to pass the test
718
+ str.eol = "LF" #<= needed to pass the test
719
+ expected = ["��", "��", "��", "a", " ", "b"].collect{|c| NKF.nkf("-E -w", c)}
720
+ assert_equal(expected, str.split_to_char)
721
+ end
722
+ def test_utf8_split_to_char_with_cr()
723
+ str = NKF.nkf("-E -w", "���ܸ�a b\r").extend CharString
724
+ str.encoding = "UTF-8" #<= needed to pass the test
725
+ str.eol = "CR"
726
+ expected = ["��","��","��","a"," ","b","\r"].collect{|c| NKF.nkf("-E -w", c)}
727
+ assert_equal(expected, str.split_to_char)
728
+ end
729
+ def test_utf8_split_to_char_with_lf()
730
+ str = NKF.nkf("-E -w", "���ܸ�a b\n").extend CharString
731
+ str.encoding = "UTF-8" #<= needed to pass the test
732
+ str.eol = "LF"
733
+ expected = ["��","��","��","a"," ","b","\n"].collect{|c| NKF.nkf("-E -w", c)}
734
+ assert_equal(expected, str.split_to_char)
735
+ end
736
+ def test_utf8_split_to_char_with_crlf()
737
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
738
+ str.encoding = "UTF-8"#<= needed to pass the test
739
+ str.eol = "CRLF"
740
+ expected = ["��","��","��","a"," ","b","\r\n"].collect{|c| NKF.nkf("-E -w", c)}
741
+ assert_equal(expected, str.split_to_char)
742
+ end
743
+ def test_utf8_count_char()
744
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
745
+ str.encoding = "UTF-8" #<= needed to pass the test
746
+ str.eol = "CRLF"
747
+ expected = 7
748
+ assert_equal(expected, str.count_char)
749
+ end
750
+ def test_utf8_count_latin_graph_char()
751
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
752
+ str.encoding = "UTF-8" #<= needed to pass the test
753
+ str.eol = "CRLF"
754
+ expected = 2
755
+ assert_equal(expected, str.count_latin_graph_char)
756
+ end
757
+ def test_utf8_count_ja_graph_char()
758
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
759
+ str.encoding = "UTF-8" #<= needed to pass the test
760
+ str.eol = "CRLF"
761
+ expected = 3
762
+ assert_equal(expected, str.count_ja_graph_char)
763
+ end
764
+ def test_utf8_count_graph_char()
765
+ str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
766
+ str.encoding = "UTF-8" #<= needed to passs the test
767
+ str.eol = "CRLF"
768
+ expected = 5
769
+ assert_equal(expected, str.count_graph_char)
770
+ end
771
+ def test_utf8_count_latin_blank_char()
772
+ str = NKF.nkf("-E -w", "���ܸ�\ta b\r\n").extend CharString
773
+ str.encoding = "UTF-8"
774
+ str.eol = "CRLF"
775
+ expected = 2
776
+ assert_equal(expected, str.count_latin_blank_char)
777
+ end
778
+ def test_utf8_count_ja_blank_char()
779
+ str = NKF.nkf("-E -w", "���ܡ���\ta b\r\n").extend CharString
780
+ str.encoding = "UTF-8"
781
+ str.eol = "CRLF"
782
+ expected = 1
783
+ assert_equal(expected, str.count_ja_blank_char)
784
+ end
785
+ def test_utf8_count_blank_char()
786
+ str = NKF.nkf("-E -w", "���ܡ���\ta b\r\n").extend CharString
787
+ str.encoding = "UTF-8"
788
+ str.eol = "CRLF"
789
+ expected = 3
790
+ assert_equal(expected, str.count_blank_char)
791
+ end
792
+ def test_utf8_count_word()
793
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
794
+ str.encoding = "UTF-8"
795
+ str.eol = "CRLF"
796
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
797
+ assert_equal(expected, str.count_word)
798
+ end
799
+ def test_utf8_count_ja_word()
800
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
801
+ str.encoding = "UTF-8"
802
+ str.eol = "CRLF"
803
+ expected = 3
804
+ assert_equal(expected, str.count_ja_word)
805
+ end
806
+ def test_utf8_count_latin_valid_word()
807
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
808
+ str.encoding = "UTF-8"
809
+ str.eol = "CRLF"
810
+ expected = 2
811
+ assert_equal(expected, str.count_latin_valid_word)
812
+ end
813
+ def test_utf8_count_ja_valid_word()
814
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
815
+ str.encoding = "UTF-8"
816
+ str.eol = "CRLF"
817
+ expected = 2
818
+ assert_equal(expected, str.count_ja_valid_word)
819
+ end
820
+ def test_utf8_count_valid_word()
821
+ str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
822
+ str.encoding = "UTF-8"
823
+ str.eol = "CRLF"
824
+ expected = 4
825
+ assert_equal(expected, str.count_valid_word)
826
+ end
827
+ def test_utf8_count_line()
828
+ str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
829
+ str.encoding = "UTF-8"
830
+ str.eol = "CRLF"
831
+ expected = 6
832
+ assert_equal(expected, str.count_line)
833
+ end
834
+ def test_utf8_count_graph_line()
835
+ str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
836
+ str.encoding = "UTF-8"
837
+ str.eol = "CRLF"
838
+ expected = 3
839
+ assert_equal(expected, str.count_graph_line)
840
+ end
841
+ def test_utf8_count_empty_line()
842
+ str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
843
+ str.encoding = "UTF-8"
844
+ str.eol = "CRLF"
845
+ expected = 1
846
+ assert_equal(expected, str.count_empty_line)
847
+ end
848
+ def test_utf8_count_blank_line()
849
+ str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
850
+ str.encoding = "UTF-8"
851
+ str.eol = "CRLF"
852
+ expected = 2
853
+ assert_equal(expected, str.count_blank_line)
854
+ end
855
+
856
+ # test module functions
857
+
858
+ def assert_guess_encoding(expected, str)
859
+ unless CharString.ruby_m17n?
860
+ assert_equal(expected, CharString.guess_encoding_using_pureruby(str))
861
+ assert_equal(expected, CharString.guess_encoding_using_iconv(str))
862
+ end
863
+ assert_equal(expected, CharString.guess_encoding(str))
864
+ end
865
+
866
+ def test_guess_encoding_nil()
867
+ str = nil
868
+ expected = nil
869
+ assert_guess_encoding(expected, str)
870
+ end
871
+ # def test_guess_encoding_binary()
872
+ # str = "\xFF\xFF"
873
+ # expected = "BINARY"
874
+ # assert_equal(expected, CharString.guess_encoding(str))
875
+ # end
876
+ def test_guess_encoding_unknown()
877
+ if CharString.ruby_m17n?
878
+ str = "".encode("BINARY") # cannot put invalid string literal
879
+ expected = "ASCII-8BIT"
880
+ else
881
+ str = "\xff\xff\xff\xff" # "\xDE\xAD\xBE\xEF"
882
+ expected = "UNKNOWN"
883
+ end
884
+ assert_guess_encoding(expected, str)
885
+ end
886
+ def test_guess_encoding_ascii_1()
887
+ if CharString.ruby_m17n?
888
+ str = "ASCII string".encode("US-ASCII")
889
+ expected = "US-ASCII"
890
+ else
891
+ str = "ASCII string"
892
+ expected = "US-ASCII"
893
+ end
894
+ assert_guess_encoding(expected, str)
895
+ end
896
+ def test_guess_encoding_ascii_2()
897
+ if CharString.ruby_m17n?
898
+ str = "abc\ndef\n".encode("US-ASCII")
899
+ expected = "US-ASCII"
900
+ else
901
+ str = "abc\ndef\n"
902
+ expected = "US-ASCII"
903
+ end
904
+ assert_guess_encoding(expected, str)
905
+ end
906
+ # CharString.guess_encoding mistakes JIS for ASCII sometimes, due to Iconv.
907
+ # def test_guess_encoding_jis_1()
908
+ # str = NKF.nkf("-j", "�����ȥ������ʤȤҤ餬��\n")
909
+ # expected = "JIS"
910
+ # assert_guess_encoding(expected, str)
911
+ # end
912
+ # def test_guess_encoding_jis_2()
913
+ # str = NKF.nkf("-j", "�����ȥ������ʤȤҤ餬�ʤ�Latin��ʸ���ȶ���( )�ȵ���@\n" * 100)
914
+ # expected = "JIS"
915
+ # assert_guess_encoding(expected, str)
916
+ # end
917
+ def test_guess_encoding_eucjp_1()
918
+ str = NKF.nkf("-e", "���ܸ��Latin��ʸ��")
919
+ expected = "EUC-JP"
920
+ assert_guess_encoding(expected, str)
921
+ end
922
+ def test_guess_encoding_eucjp_2()
923
+ str = NKF.nkf('-e', "�����ȥ������ʤȤҤ餬�ʤ�Latin��ʸ���ȶ���( )\n" * 10)
924
+ expected = "EUC-JP"
925
+ assert_guess_encoding(expected, str)
926
+ end
927
+ def test_guess_encoding_eucjp_3()
928
+ str = NKF.nkf('-e', "����Ф�ϡ����̾���ϤޤĤ�ȤǤ���\nRuby���ä��Τϻ�Ǥ������Ruby Hacker�Ǥ���\n")
929
+ expected = "EUC-JP"
930
+ assert_guess_encoding(expected, str)
931
+ end
932
+ def test_guess_encoding_sjis_1()
933
+ str = NKF.nkf("-s", "���ܸ��Latin��ʸ��")
934
+ expected = "Shift_JIS"
935
+ assert_guess_encoding(expected, str)
936
+ end
937
+ def test_guess_encoding_sjis_2()
938
+ str = NKF.nkf('-s', "������\n�������ʤ�\n�Ҥ餬�ʤ�\nLatin")
939
+ expected = "Shift_JIS"
940
+ assert_guess_encoding(expected, str)
941
+ end
942
+ def test_guess_encoding_utf8_1()
943
+ str = NKF.nkf("-E -w", "���ܸ��Latin��ʸ��")
944
+ expected = "UTF-8"
945
+ assert_guess_encoding(expected, str)
946
+ end
947
+ def test_guess_encoding_utf8_2()
948
+ str = NKF.nkf("-E -w", "������\n�ˤۤؤ�\n")
949
+ expected = "UTF-8"
950
+ assert_guess_encoding(expected, str)
951
+ end
952
+
953
+ def test_guess_eol_nil()
954
+ str = nil
955
+ expected = nil
956
+ assert_equal(expected, CharString.guess_eol(str))
957
+ end
958
+ def test_guess_eol_empty()
959
+ str = ""
960
+ expected = "NONE"
961
+ assert_equal(expected, CharString.guess_eol(str))
962
+ end
963
+ def test_guess_eol_none()
964
+ str = "foo bar"
965
+ expected = "NONE"
966
+ assert_equal(expected, CharString.guess_eol(str))
967
+ end
968
+ def test_guess_eol_cr()
969
+ str = "foo bar\r"
970
+ expected = "CR"
971
+ assert_equal(expected, CharString.guess_eol(str))
972
+ end
973
+ def test_guess_eol_lf()
974
+ str = "foo bar\n"
975
+ expected = "LF"
976
+ assert_equal(expected, CharString.guess_eol(str))
977
+ end
978
+ def test_guess_eol_crlf()
979
+ str = "foo bar\r\n"
980
+ expected = "CRLF"
981
+ assert_equal(expected, CharString.guess_eol(str))
982
+ end
983
+ def test_guess_eol_mixed()
984
+ str = "foo\rbar\nbaz\r\n"
985
+ expected = "UNKNOWN"
986
+ assert_equal(expected, CharString.guess_eol(str))
987
+ end
988
+ def test_guess_eol_cr2()
989
+ str = "foo\rbar\rbaz\r".extend CharString
990
+ expected = "CR"
991
+ assert_equal(expected, CharString.guess_eol(str))
992
+ end
993
+ def test_guess_eol_lf2()
994
+ str = "foo\nbar\nbaz\n".extend CharString
995
+ expected = "LF"
996
+ assert_equal(expected, CharString.guess_eol(str))
997
+ end
998
+ def test_guess_eol_crlf2()
999
+ str = "foo\r\nbar\r\nbaz\r\n".extend CharString
1000
+ expected = "CRLF"
1001
+ assert_equal(expected, CharString.guess_eol(str))
1002
+ end
1003
+
1004
+ def teardown()
1005
+ #
1006
+ end
1007
+
1008
+ end