docdiff 0.6.1 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +27 -49
  3. data/README.md +351 -0
  4. data/README_ja.md +351 -0
  5. data/Rakefile +2 -42
  6. data/bin/docdiff +66 -29
  7. data/{docdiff.conf.example → doc/example/docdiff.conf.example} +4 -3
  8. data/doc/man/docdiff.adoc +146 -0
  9. data/doc/news.md +180 -0
  10. data/doc/shell_completion/_docdiff.zsh +51 -0
  11. data/doc/shell_completion/docdiff.bash +68 -0
  12. data/docdiff.gemspec +2 -1
  13. data/lib/doc_diff.rb +13 -0
  14. data/lib/docdiff/charstring.rb +4 -3
  15. data/lib/docdiff/diff/unidiff.rb +0 -1
  16. data/lib/docdiff/encoding/en_ascii.rb +12 -39
  17. data/lib/docdiff/encoding/ja_eucjp.rb +12 -39
  18. data/lib/docdiff/encoding/ja_sjis.rb +12 -39
  19. data/lib/docdiff/encoding/ja_utf8.rb +12 -39
  20. data/lib/docdiff/version.rb +1 -1
  21. data/lib/docdiff/view.rb +16 -8
  22. data/test/charstring_test.rb +124 -121
  23. data/test/docdiff_test.rb +5 -3
  24. data/test/document_test.rb +112 -109
  25. data/test/fixture/01_ja_utf8_lf.txt +2 -0
  26. data/test/fixture/02_ja_utf8_lf.txt +2 -0
  27. data/test/view_test.rb +135 -111
  28. metadata +41 -43
  29. data/devutil/changelog.sh +0 -40
  30. data/index.html +0 -181
  31. data/langfilter.rb +0 -10
  32. data/lib/viewdiff.rb +0 -379
  33. data/readme.html +0 -733
  34. data/readme.md +0 -184
  35. data/test/viewdiff_test.rb +0 -911
  36. /data/{docdiffwebui.cgi → doc/example/docdiffwebui.cgi} +0 -0
  37. /data/{docdiffwebui.html → doc/example/docdiffwebui.html} +0 -0
  38. /data/{img/docdiff-screenshot-format-html-digest-firefox.png → doc/img/screenshot-format-html-digest-firefox.png} +0 -0
  39. /data/{img/docdiff-screenshot-format-html-firefox.png → doc/img/screenshot-format-html-firefox.png} +0 -0
  40. /data/{img/docdiff-screenshot-format-tty-cmdexe-en.png → doc/img/screenshot-format-tty-cmdexe-en.png} +0 -0
  41. /data/{img/docdiff-screenshot-format-tty-cmdexe-ja.png → doc/img/screenshot-format-tty-cmdexe-ja.png} +0 -0
  42. /data/{img/docdiff-screenshot-format-tty-rxvtunicode-en.png → doc/img/screenshot-format-tty-rxvtunicode-en.png} +0 -0
  43. /data/{img/docdiff-screenshot-format-tty-rxvtunicode-ja.png → doc/img/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
  44. /data/{img/docdiff-screenshot-format-tty-xterm-en.png → doc/img/screenshot-format-tty-xterm-en.png} +0 -0
  45. /data/{img/docdiff-screenshot-format-tty-xterm-ja.png → doc/img/screenshot-format-tty-xterm-ja.png} +0 -0
  46. /data/{img/docdiff-screenshot-resolution-linewordchar-xterm.png → doc/img/screenshot-resolution-linewordchar-xterm.png} +0 -0
  47. /data/{sample/01.en.ascii.cr → test/fixture/01_en_ascii_cr.txt} +0 -0
  48. /data/{sample/01.en.ascii.crlf → test/fixture/01_en_ascii_crlf.txt} +0 -0
  49. /data/{sample/01.en.ascii.lf → test/fixture/01_en_ascii_lf.txt} +0 -0
  50. /data/{sample/01.ja.eucjp.lf → test/fixture/01_ja_eucjp_lf.txt} +0 -0
  51. /data/{sample/01.ja.sjis.cr → test/fixture/01_ja_sjis_cr.txt} +0 -0
  52. /data/{sample/01.ja.sjis.crlf → test/fixture/01_ja_sjis_crlf.txt} +0 -0
  53. /data/{sample/01.ja.utf8.crlf → test/fixture/01_ja_utf8_crlf.txt} +0 -0
  54. /data/{sample/02.en.ascii.cr → test/fixture/02_en_ascii_cr.txt} +0 -0
  55. /data/{sample/02.en.ascii.crlf → test/fixture/02_en_ascii_crlf.txt} +0 -0
  56. /data/{sample/02.en.ascii.lf → test/fixture/02_en_ascii_lf.txt} +0 -0
  57. /data/{sample/02.ja.eucjp.lf → test/fixture/02_ja_eucjp_lf.txt} +0 -0
  58. /data/{sample/02.ja.sjis.cr → test/fixture/02_ja_sjis_cr.txt} +0 -0
  59. /data/{sample/02.ja.sjis.crlf → test/fixture/02_ja_sjis_crlf.txt} +0 -0
  60. /data/{sample/02.ja.utf8.crlf → test/fixture/02_ja_utf8_crlf.txt} +0 -0
  61. /data/{sample/humpty_dumpty01.ascii.lf → test/fixture/humpty_dumpty01_ascii_lf.txt} +0 -0
  62. /data/{sample/humpty_dumpty02.ascii.lf → test/fixture/humpty_dumpty02_ascii_lf.txt} +0 -0
data/test/docdiff_test.rb CHANGED
@@ -1,10 +1,13 @@
1
1
  #!/usr/bin/ruby
2
2
  # -*- coding: us-ascii; -*-
3
+
4
+ # frozen_string_literal: false
5
+
3
6
  require 'test/unit'
4
7
  require 'docdiff'
5
8
  require 'nkf'
6
9
 
7
- class TC_DocDiff_Document < Test::Unit::TestCase
10
+ class TC_DocDiff < Test::Unit::TestCase
8
11
  Document = DocDiff::Document
9
12
 
10
13
  def setup()
@@ -126,7 +129,6 @@ class TC_DocDiff_Document < Test::Unit::TestCase
126
129
  "",
127
130
  nil].join
128
131
  expected = {:foo1=>true, :foo2=>"bar baz", :foo3=>123, :foo4=>false}
129
- docdiff = DocDiff.new
130
132
  assert_equal(expected,
131
133
  DocDiff.parse_config_file_content(content))
132
134
  end
@@ -188,7 +190,7 @@ class TC_DocDiff_Document < Test::Unit::TestCase
188
190
  def test_cli()
189
191
  expected = "Hello, my name is [-Watanabe.-]{+matz.+}\n"
190
192
  cmd = "ruby -I lib bin/docdiff --wdiff" +
191
- " sample/01.en.ascii.lf sample/02.en.ascii.lf"
193
+ " test/fixture/01_en_ascii_lf.txt test/fixture/02_en_ascii_lf.txt"
192
194
  actual = `#{cmd}`.scan(/^.*?$\n/m).first
193
195
  assert_equal(expected, actual)
194
196
  end
@@ -1,5 +1,8 @@
1
1
  #!/usr/bin/ruby
2
- # -*- coding: euc-jp; -*-
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # frozen_string_literal: false
5
+
3
6
  require 'test/unit'
4
7
  require 'docdiff/document'
5
8
  require 'nkf'
@@ -220,396 +223,396 @@ class TC_DocDiff_Document < Test::Unit::TestCase
220
223
 
221
224
  # test EUCJP module
222
225
  def test_eucjp_split_to_word()
223
- doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��foo bar"))
224
- expected = ["���ܸ��","ʸ��","foo ","bar"].collect{|c| NKF.nkf("-e", c)}
226
+ doc = Document.new(NKF.nkf("--euc", "日本語の文字foo bar"))
227
+ expected = ["日本語の","文字","foo ","bar"].map{|c| NKF.nkf("--euc", c)}
225
228
  assert_equal(expected, doc.split_to_word)
226
229
  end
227
230
  def test_eucjp_split_to_word_kanhira()
228
- doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��"))
229
- expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
231
+ doc = Document.new(NKF.nkf("--euc", "日本語の文字"))
232
+ expected = ["日本語の", "文字"].map{|c| NKF.nkf("--euc", c)}
230
233
  assert_equal(expected, doc.split_to_word)
231
234
  end
232
235
  def test_eucjp_split_to_word_katahira()
233
- doc = Document.new(NKF.nkf("-e", "�������ʤ�ʸ��"))
234
- expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
236
+ doc = Document.new(NKF.nkf("--euc", "カタカナの文字"))
237
+ expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--euc", c)}
235
238
  assert_equal(expected, doc.split_to_word)
236
239
  end
237
240
  def test_eucjp_split_to_word_kataonbiki()
238
- doc = Document.new(NKF.nkf("-e", "��ӡ�������"), "EUC-JP")
239
- expected = ["��ӡ�", "����", "��"].collect{|c| NKF.nkf("-e", c)}
241
+ doc = Document.new(NKF.nkf("--euc", "ルビー色の石"), "EUC-JP")
242
+ expected = ["ルビー", "色の", ""].map{|c| NKF.nkf("--euc", c)}
240
243
  assert_equal(expected, doc.split_to_word)
241
244
  end
242
245
  def test_eucjp_split_to_word_hiraonbiki()
243
- doc = Document.new(NKF.nkf("-e", "���ӡ���"), "EUC-JP")
244
- expected = (["�", "��ӡ���"]).collect{|c| NKF.nkf("-e", c)}
246
+ doc = Document.new(NKF.nkf("--euc", "わールビーだ"), "EUC-JP")
247
+ expected = (["わー", "ルビーだ"]).map{|c| NKF.nkf("--euc", c)}
245
248
  assert_equal(expected, doc.split_to_word)
246
249
  end
247
250
  def test_eucjp_split_to_word_latinmix()
248
- doc = Document.new(NKF.nkf("-e", "���ܸ��Latin��ʸ��"))
249
- expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
251
+ doc = Document.new(NKF.nkf("--euc", "日本語とLatinの文字"))
252
+ expected = ["日本語と", "Latin", "", "文字"].map{|c| NKF.nkf("--euc", c)}
250
253
  assert_equal(expected, doc.split_to_word)
251
254
  end
252
255
  def test_eucjp_split_to_char()
253
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b"))
254
- expected = ["��","��","��","a"," ","b"].collect{|c|NKF.nkf("-e",c)}
256
+ doc = Document.new(NKF.nkf("--euc", "日本語a b"))
257
+ expected = ["","","","a"," ","b"].map{|c|NKF.nkf("--euc",c)}
255
258
  assert_equal(expected, doc.split_to_char)
256
259
  end
257
260
  def test_eucjp_split_to_char_with_cr()
258
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r"))
259
- expected = ["��","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-e",c)}
261
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r"))
262
+ expected = ["","","","a"," ","b","\r"].map{|c|NKF.nkf("--euc",c)}
260
263
  assert_equal(expected, doc.split_to_char)
261
264
  end
262
265
  def test_eucjp_split_to_char_with_lf()
263
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\n"))
264
- expected = ["��","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-e",c)}
266
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\n"))
267
+ expected = ["","","","a"," ","b","\n"].map{|c|NKF.nkf("--euc",c)}
265
268
  assert_equal(expected, doc.split_to_char)
266
269
  end
267
270
  def test_eucjp_split_to_char_with_crlf()
268
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
269
- expected = ["��","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-e",c)}
271
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
272
+ expected = ["","","","a"," ","b","\r\n"].map{|c|NKF.nkf("--euc",c)}
270
273
  assert_equal(expected, doc.split_to_char)
271
274
  end
272
275
  def test_eucjp_count_char()
273
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
276
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
274
277
  expected = 7
275
278
  assert_equal(expected, doc.count_char)
276
279
  end
277
280
  def test_eucjp_count_latin_graph_char()
278
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
281
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
279
282
  expected = 2
280
283
  assert_equal(expected, doc.count_latin_graph_char)
281
284
  end
282
285
  def test_eucjp_count_ja_graph_char()
283
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
286
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
284
287
  expected = 3
285
288
  assert_equal(expected, doc.count_ja_graph_char)
286
289
  end
287
290
  def test_eucjp_count_graph_char()
288
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
291
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
289
292
  expected = 5
290
293
  assert_equal(expected, doc.count_graph_char)
291
294
  end
292
295
  def test_eucjp_count_latin_blank_char()
293
- doc = Document.new(NKF.nkf("-e", "���ܸ�\ta b\r\n"))
296
+ doc = Document.new(NKF.nkf("--euc", "日本語\ta b\r\n"))
294
297
  expected = 2
295
298
  assert_equal(expected, doc.count_latin_blank_char)
296
299
  end
297
300
  def test_eucjp_count_ja_blank_char()
298
- doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
301
+ doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
299
302
  expected = 1
300
303
  assert_equal(expected, doc.count_ja_blank_char)
301
304
  end
302
305
  def test_eucjp_count_blank_char()
303
- doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
306
+ doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
304
307
  expected = 3
305
308
  assert_equal(expected, doc.count_blank_char)
306
309
  end
307
310
  def test_eucjp_count_word()
308
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
311
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
309
312
  expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
310
313
  assert_equal(expected, doc.count_word)
311
314
  end
312
315
  def test_eucjp_count_ja_word()
313
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
316
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
314
317
  expected = 3
315
318
  assert_equal(expected, doc.count_ja_word)
316
319
  end
317
320
  def test_eucjp_count_latin_valid_word()
318
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
321
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
319
322
  expected = 2
320
323
  assert_equal(expected, doc.count_latin_valid_word)
321
324
  end
322
325
  def test_eucjp_count_ja_valid_word()
323
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
326
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
324
327
  expected = 2
325
328
  assert_equal(expected, doc.count_ja_valid_word)
326
329
  end
327
330
  def test_eucjp_count_valid_word()
328
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
331
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
329
332
  expected = 4
330
333
  assert_equal(expected, doc.count_valid_word)
331
334
  end
332
335
  def test_eucjp_count_line()
333
- doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
336
+ doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
334
337
  expected = 6
335
338
  assert_equal(expected, doc.count_line)
336
339
  end
337
340
  def test_eucjp_count_graph_line()
338
- doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
341
+ doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
339
342
  expected = 3
340
343
  assert_equal(expected, doc.count_graph_line)
341
344
  end
342
345
  def test_eucjp_count_empty_line()
343
- doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
346
+ doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
344
347
  expected = 1
345
348
  assert_equal(expected, doc.count_empty_line)
346
349
  end
347
350
  def test_eucjp_count_blank_line()
348
- doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
351
+ doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
349
352
  expected = 2
350
353
  assert_equal(expected, doc.count_blank_line)
351
354
  end
352
355
 
353
356
  # test SJIS module
354
357
  def test_sjis_split_to_word()
355
- doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��foo bar"))
356
- expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c|NKF.nkf("-s",c)}
358
+ doc = Document.new(NKF.nkf("--sjis", "日本語の文字foo bar"))
359
+ expected = ["日本語の", "文字", "foo ", "bar"].map{|c|NKF.nkf("--sjis",c)}
357
360
  assert_equal(expected, doc.split_to_word)
358
361
  end
359
362
  def test_sjisplit_s_to_word_kanhira()
360
- doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��"))
361
- expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
363
+ doc = Document.new(NKF.nkf("--sjis", "日本語の文字"))
364
+ expected = ["日本語の", "文字"].map{|c| NKF.nkf("--sjis", c)}
362
365
  assert_equal(expected, doc.split_to_word)
363
366
  end
364
367
  def test_sjis_split_to_word_katahira()
365
- doc = Document.new(NKF.nkf("-s", "�������ʤ�ʸ��"))
366
- expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
368
+ doc = Document.new(NKF.nkf("--sjis", "カタカナの文字"))
369
+ expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--sjis", c)}
367
370
  assert_equal(expected, doc.split_to_word)
368
371
  end
369
372
  def test_sjis_split_to_word_kataonbiki()
370
- doc = Document.new(NKF.nkf("-s", "��ӡ��λ���"))
371
- expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-s", c)}
373
+ doc = Document.new(NKF.nkf("--sjis", "ルビーの指輪"))
374
+ expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--sjis", c)}
372
375
  assert_equal(expected, doc.split_to_word)
373
376
  end
374
377
  def test_sjis_split_to_word_hiraonbiki()
375
- doc = Document.new(NKF.nkf("-s", "���ӡ���"))
376
- expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-s", c)}
378
+ doc = Document.new(NKF.nkf("--sjis", "わールビーだ"))
379
+ expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--sjis", c)}
377
380
  assert_equal(expected, doc.split_to_word)
378
381
  end
379
382
  def test_sjis_split_to_word_latinmix()
380
- doc = Document.new(NKF.nkf("-s", "���ܸ��Latin��ʸ��"))
381
- expected = ["���ܸ��","Latin","��","ʸ��"].collect{|c| NKF.nkf("-s", c)}
383
+ doc = Document.new(NKF.nkf("--sjis", "日本語とLatinの文字"))
384
+ expected = ["日本語と","Latin","","文字"].map{|c| NKF.nkf("--sjis", c)}
382
385
  assert_equal(expected, doc.split_to_word)
383
386
  end
384
387
  def test_sjis_split_to_char()
385
- doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b"))
386
- expected = ["ɽ","��","��","a"," ","b"].collect{|c|NKF.nkf("-s",c)}
388
+ doc = Document.new(NKF.nkf("--sjis", "表計算a b"))
389
+ expected = ["","","","a"," ","b"].map{|c|NKF.nkf("--sjis",c)}
387
390
  assert_equal(expected, doc.split_to_char)
388
391
  end
389
392
  def test_sjis_split_to_char_with_cr()
390
- doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\r"))
391
- expected = ["ɽ","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-s",c)}
393
+ doc = Document.new(NKF.nkf("--sjis", "表計算a b\r"))
394
+ expected = ["","","","a"," ","b","\r"].map{|c|NKF.nkf("--sjis",c)}
392
395
  assert_equal(expected, doc.split_to_char)
393
396
  end
394
397
  def test_sjis_split_to_char_with_lf()
395
- doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\n"))
396
- expected = ["ɽ","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-s",c)}
398
+ doc = Document.new(NKF.nkf("--sjis", "表計算a b\n"))
399
+ expected = ["","","","a"," ","b","\n"].map{|c|NKF.nkf("--sjis",c)}
397
400
  assert_equal(expected, doc.split_to_char)
398
401
  end
399
402
  def test_sjis_split_to_char_with_crlf()
400
- doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\r\n"))
401
- expected = ["ɽ","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-s",c)}
403
+ doc = Document.new(NKF.nkf("--sjis", "表計算a b\r\n"))
404
+ expected = ["","","","a"," ","b","\r\n"].map{|c|NKF.nkf("--sjis",c)}
402
405
  assert_equal(expected, doc.split_to_char)
403
406
  end
404
407
  def test_sjis_count_char()
405
- doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
408
+ doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
406
409
  expected = 7
407
410
  assert_equal(expected, doc.count_char)
408
411
  end
409
412
  def test_sjis_count_latin_graph_char()
410
- doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
413
+ doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
411
414
  expected = 2
412
415
  assert_equal(expected, doc.count_latin_graph_char)
413
416
  end
414
417
  def test_sjis_count_ja_graph_char()
415
- doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
418
+ doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
416
419
  expected = 3
417
420
  assert_equal(expected, doc.count_ja_graph_char)
418
421
  end
419
422
  def test_sjis_count_graph_char()
420
- doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
423
+ doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
421
424
  expected = 5
422
425
  assert_equal(expected, doc.count_graph_char)
423
426
  end
424
427
  def test_sjis_count_latin_blank_char()
425
- doc = Document.new(NKF.nkf("-s", "���ܸ�\ta b\r\n"))
428
+ doc = Document.new(NKF.nkf("--sjis", "日本語\ta b\r\n"))
426
429
  expected = 2
427
430
  assert_equal(expected, doc.count_latin_blank_char)
428
431
  end
429
432
  def test_sjis_count_ja_blank_char()
430
- doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
433
+ doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
431
434
  expected = 1
432
435
  assert_equal(expected, doc.count_ja_blank_char)
433
436
  end
434
437
  def test_sjis_count_blank_char()
435
- doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
438
+ doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
436
439
  expected = 3
437
440
  assert_equal(expected, doc.count_blank_char)
438
441
  end
439
442
  def test_sjis_count_word()
440
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
443
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
441
444
  expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
442
445
  assert_equal(expected, doc.count_word)
443
446
  end
444
447
  def test_sjis_count_ja_word()
445
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
448
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
446
449
  expected = 3
447
450
  assert_equal(expected, doc.count_ja_word)
448
451
  end
449
452
  def test_sjis_count_latin_valid_word()
450
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
453
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
451
454
  expected = 2
452
455
  assert_equal(expected, doc.count_latin_valid_word)
453
456
  end
454
457
  def test_sjis_count_ja_valid_word()
455
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
458
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
456
459
  expected = 2
457
460
  assert_equal(expected, doc.count_ja_valid_word)
458
461
  end
459
462
  def test_sjis_count_valid_word()
460
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
463
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
461
464
  expected = 4
462
465
  assert_equal(expected, doc.count_valid_word)
463
466
  end
464
467
  def test_sjis_count_line()
465
- doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
468
+ doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
466
469
  expected = 6
467
470
  assert_equal(expected, doc.count_line)
468
471
  end
469
472
  def test_sjis_count_graph_line()
470
- doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
473
+ doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
471
474
  expected = 3
472
475
  assert_equal(expected, doc.count_graph_line)
473
476
  end
474
477
  def test_sjis_count_empty_line()
475
- doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
478
+ doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
476
479
  expected = 1
477
480
  assert_equal(expected, doc.count_empty_line)
478
481
  end
479
482
  def test_sjis_count_blank_line()
480
- doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
483
+ doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
481
484
  expected = 2
482
485
  assert_equal(expected, doc.count_blank_line)
483
486
  end
484
487
 
485
488
  # test UTF8 module
486
489
  def test_utf8_split_to_word()
487
- doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��foo bar"))
488
- expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c| NKF.nkf("-E -w", c)}
490
+ doc = Document.new(NKF.nkf("--utf8", "日本語の文字foo bar"))
491
+ expected = ["日本語の", "文字", "foo ", "bar"].map{|c| NKF.nkf("--utf8", c)}
489
492
  assert_equal(expected, doc.split_to_word)
490
493
  end
491
494
  def test_utf8_split_to_word_kanhira()
492
- doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��"))
493
- expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
495
+ doc = Document.new(NKF.nkf("--utf8", "日本語の文字"))
496
+ expected = ["日本語の", "文字"].map{|c| NKF.nkf("--utf8", c)}
494
497
  assert_equal(expected, doc.split_to_word)
495
498
  end
496
499
  def test_utf8_split_to_word_katahira()
497
- doc = Document.new(NKF.nkf("-E -w", "�������ʤ�ʸ��"))
498
- expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
500
+ doc = Document.new(NKF.nkf("--utf8", "カタカナの文字"))
501
+ expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--utf8", c)}
499
502
  assert_equal(expected, doc.split_to_word)
500
503
  end
501
504
  def test_utf8_split_to_word_kataonbiki()
502
- doc = Document.new(NKF.nkf("-E -w", "��ӡ��λ���"))
503
- expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-E -w", c)}
505
+ doc = Document.new(NKF.nkf("--utf8", "ルビーの指輪"))
506
+ expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--utf8", c)}
504
507
  assert_equal(expected, doc.split_to_word)
505
508
  end
506
509
  def test_utf8_split_to_word_hiraonbiki()
507
- doc = Document.new(NKF.nkf("-E -w", "���ӡ���"))
508
- expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-E -w", c)}
510
+ doc = Document.new(NKF.nkf("--utf8", "わールビーだ"))
511
+ expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--utf8", c)}
509
512
  assert_equal(expected, doc.split_to_word)
510
513
  end
511
514
  def test_utf8_split_to_word_latinmix()
512
- doc = Document.new(NKF.nkf("-E -w", "���ܸ��Latin��ʸ��"))
513
- expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
515
+ doc = Document.new(NKF.nkf("--utf8", "日本語とLatinの文字"))
516
+ expected = ["日本語と", "Latin", "", "文字"].map{|c| NKF.nkf("--utf8", c)}
514
517
  assert_equal(expected, doc.split_to_word)
515
518
  end
516
519
  def test_utf8_split_to_char()
517
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b"), "UTF-8")
518
- expected = ["��", "��", "��", "a", " ", "b"].collect{|c| NKF.nkf("-E -w", c)}
520
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b"), "UTF-8")
521
+ expected = ["", "", "", "a", " ", "b"].map{|c| NKF.nkf("--utf8", c)}
519
522
  assert_equal(expected, doc.split_to_char)
520
523
  end
521
524
  def test_utf8_split_to_char_with_cr()
522
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r"), "UTF-8")
523
- expected = ["��","��","��","a"," ","b","\r"].collect{|c| NKF.nkf("-E -w", c)}
525
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r"), "UTF-8")
526
+ expected = ["","","","a"," ","b","\r"].map{|c| NKF.nkf("--utf8", c)}
524
527
  assert_equal(expected, doc.split_to_char)
525
528
  end
526
529
  def test_utf8_split_to_char_with_lf()
527
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\n"), "UTF-8")
528
- expected = ["��","��","��","a"," ","b","\n"].collect{|c| NKF.nkf("-E -w", c)}
530
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\n"), "UTF-8")
531
+ expected = ["","","","a"," ","b","\n"].map{|c| NKF.nkf("--utf8", c)}
529
532
  assert_equal(expected, doc.split_to_char)
530
533
  end
531
534
  def test_utf8_split_to_char_with_crlf()
532
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
533
- expected = ["��","��","��","a"," ","b","\r\n"].collect{|c| NKF.nkf("-E -w", c)}
535
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
536
+ expected = ["","","","a"," ","b","\r\n"].map{|c| NKF.nkf("--utf8", c)}
534
537
  assert_equal(expected, doc.split_to_char)
535
538
  end
536
539
  def test_utf8_count_char()
537
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
540
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
538
541
  expected = 7
539
542
  assert_equal(expected, doc.count_char)
540
543
  end
541
544
  def test_utf8_count_latin_graph_char()
542
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
545
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
543
546
  expected = 2
544
547
  assert_equal(expected, doc.count_latin_graph_char)
545
548
  end
546
549
  def test_utf8_count_ja_graph_char()
547
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
550
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
548
551
  expected = 3
549
552
  assert_equal(expected, doc.count_ja_graph_char)
550
553
  end
551
554
  def test_utf8_count_graph_char()
552
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
555
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
553
556
  expected = 5
554
557
  assert_equal(expected, doc.count_graph_char)
555
558
  end
556
559
  def test_utf8_count_latin_blank_char()
557
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\ta b\r\n"))
560
+ doc = Document.new(NKF.nkf("--utf8", "日本語\ta b\r\n"))
558
561
  expected = 2
559
562
  assert_equal(expected, doc.count_latin_blank_char)
560
563
  end
561
564
  def test_utf8_count_ja_blank_char()
562
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
565
+ doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
563
566
  expected = 1
564
567
  assert_equal(expected, doc.count_ja_blank_char)
565
568
  end
566
569
  def test_utf8_count_blank_char()
567
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
570
+ doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
568
571
  expected = 3
569
572
  assert_equal(expected, doc.count_blank_char)
570
573
  end
571
574
  def test_utf8_count_word()
572
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
575
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
573
576
  expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
574
577
  assert_equal(expected, doc.count_word)
575
578
  end
576
579
  def test_utf8_count_ja_word()
577
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
580
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
578
581
  expected = 3
579
582
  assert_equal(expected, doc.count_ja_word)
580
583
  end
581
584
  def test_utf8_count_latin_valid_word()
582
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
585
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
583
586
  expected = 2
584
587
  assert_equal(expected, doc.count_latin_valid_word)
585
588
  end
586
589
  def test_utf8_count_ja_valid_word()
587
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
590
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
588
591
  expected = 2
589
592
  assert_equal(expected, doc.count_ja_valid_word)
590
593
  end
591
594
  def test_utf8_count_valid_word()
592
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
595
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
593
596
  expected = 4
594
597
  assert_equal(expected, doc.count_valid_word)
595
598
  end
596
599
  def test_utf8_count_line()
597
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
600
+ doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
598
601
  expected = 6
599
602
  assert_equal(expected, doc.count_line)
600
603
  end
601
604
  def test_utf8_count_graph_line()
602
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
605
+ doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
603
606
  expected = 3
604
607
  assert_equal(expected, doc.count_graph_line)
605
608
  end
606
609
  def test_utf8_count_empty_line()
607
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
610
+ doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
608
611
  expected = 1
609
612
  assert_equal(expected, doc.count_empty_line)
610
613
  end
611
614
  def test_utf8_count_blank_line()
612
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
615
+ doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
613
616
  expected = 2
614
617
  assert_equal(expected, doc.count_blank_line)
615
618
  end
@@ -0,0 +1,2 @@
1
+ こんにちは、私の名前はわたなべです。
2
+ 私はJust Another Ruby Porterです。
@@ -0,0 +1,2 @@
1
+ こんばんは、私の名前はまつもとです。
2
+ Rubyを作ったのは私です。私はRuby Hackerです。