docdiff 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +27 -49
  3. data/README.md +351 -0
  4. data/README_ja.md +351 -0
  5. data/Rakefile +2 -42
  6. data/bin/docdiff +53 -30
  7. data/{docdiff.conf.example → doc/example/docdiff.conf.example} +4 -3
  8. data/doc/man/docdiff.adoc +146 -0
  9. data/doc/news.md +180 -0
  10. data/doc/shell_completion/_docdiff.zsh +51 -0
  11. data/doc/shell_completion/docdiff.bash +68 -0
  12. data/docdiff.gemspec +1 -0
  13. data/lib/doc_diff.rb +13 -0
  14. data/lib/docdiff/version.rb +1 -1
  15. data/lib/docdiff/view.rb +4 -4
  16. data/test/charstring_test.rb +121 -121
  17. data/test/docdiff_test.rb +1 -1
  18. data/test/document_test.rb +109 -109
  19. data/test/fixture/01_ja_utf8_lf.txt +2 -0
  20. data/test/fixture/02_ja_utf8_lf.txt +2 -0
  21. data/test/view_test.rb +135 -111
  22. metadata +39 -36
  23. data/devutil/changelog.sh +0 -40
  24. data/index.html +0 -181
  25. data/langfilter.rb +0 -10
  26. data/readme.html +0 -750
  27. data/readme.md +0 -185
  28. /data/{docdiffwebui.cgi → doc/example/docdiffwebui.cgi} +0 -0
  29. /data/{docdiffwebui.html → doc/example/docdiffwebui.html} +0 -0
  30. /data/{img/docdiff-screenshot-format-html-digest-firefox.png → doc/img/screenshot-format-html-digest-firefox.png} +0 -0
  31. /data/{img/docdiff-screenshot-format-html-firefox.png → doc/img/screenshot-format-html-firefox.png} +0 -0
  32. /data/{img/docdiff-screenshot-format-tty-cmdexe-en.png → doc/img/screenshot-format-tty-cmdexe-en.png} +0 -0
  33. /data/{img/docdiff-screenshot-format-tty-cmdexe-ja.png → doc/img/screenshot-format-tty-cmdexe-ja.png} +0 -0
  34. /data/{img/docdiff-screenshot-format-tty-rxvtunicode-en.png → doc/img/screenshot-format-tty-rxvtunicode-en.png} +0 -0
  35. /data/{img/docdiff-screenshot-format-tty-rxvtunicode-ja.png → doc/img/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
  36. /data/{img/docdiff-screenshot-format-tty-xterm-en.png → doc/img/screenshot-format-tty-xterm-en.png} +0 -0
  37. /data/{img/docdiff-screenshot-format-tty-xterm-ja.png → doc/img/screenshot-format-tty-xterm-ja.png} +0 -0
  38. /data/{img/docdiff-screenshot-resolution-linewordchar-xterm.png → doc/img/screenshot-resolution-linewordchar-xterm.png} +0 -0
  39. /data/{sample/01.en.ascii.cr → test/fixture/01_en_ascii_cr.txt} +0 -0
  40. /data/{sample/01.en.ascii.crlf → test/fixture/01_en_ascii_crlf.txt} +0 -0
  41. /data/{sample/01.en.ascii.lf → test/fixture/01_en_ascii_lf.txt} +0 -0
  42. /data/{sample/01.ja.eucjp.lf → test/fixture/01_ja_eucjp_lf.txt} +0 -0
  43. /data/{sample/01.ja.sjis.cr → test/fixture/01_ja_sjis_cr.txt} +0 -0
  44. /data/{sample/01.ja.sjis.crlf → test/fixture/01_ja_sjis_crlf.txt} +0 -0
  45. /data/{sample/01.ja.utf8.crlf → test/fixture/01_ja_utf8_crlf.txt} +0 -0
  46. /data/{sample/02.en.ascii.cr → test/fixture/02_en_ascii_cr.txt} +0 -0
  47. /data/{sample/02.en.ascii.crlf → test/fixture/02_en_ascii_crlf.txt} +0 -0
  48. /data/{sample/02.en.ascii.lf → test/fixture/02_en_ascii_lf.txt} +0 -0
  49. /data/{sample/02.ja.eucjp.lf → test/fixture/02_ja_eucjp_lf.txt} +0 -0
  50. /data/{sample/02.ja.sjis.cr → test/fixture/02_ja_sjis_cr.txt} +0 -0
  51. /data/{sample/02.ja.sjis.crlf → test/fixture/02_ja_sjis_crlf.txt} +0 -0
  52. /data/{sample/02.ja.utf8.crlf → test/fixture/02_ja_utf8_crlf.txt} +0 -0
  53. /data/{sample/humpty_dumpty01.ascii.lf → test/fixture/humpty_dumpty01_ascii_lf.txt} +0 -0
  54. /data/{sample/humpty_dumpty02.ascii.lf → test/fixture/humpty_dumpty02_ascii_lf.txt} +0 -0
data/test/docdiff_test.rb CHANGED
@@ -190,7 +190,7 @@ class TC_DocDiff < Test::Unit::TestCase
190
190
  def test_cli()
191
191
  expected = "Hello, my name is [-Watanabe.-]{+matz.+}\n"
192
192
  cmd = "ruby -I lib bin/docdiff --wdiff" +
193
- " sample/01.en.ascii.lf sample/02.en.ascii.lf"
193
+ " test/fixture/01_en_ascii_lf.txt test/fixture/02_en_ascii_lf.txt"
194
194
  actual = `#{cmd}`.scan(/^.*?$\n/m).first
195
195
  assert_equal(expected, actual)
196
196
  end
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/ruby
2
- # -*- coding: euc-jp; -*-
2
+ # -*- coding: utf-8; -*-
3
3
 
4
4
  # frozen_string_literal: false
5
5
 
@@ -223,396 +223,396 @@ class TC_DocDiff_Document < Test::Unit::TestCase
223
223
 
224
224
  # test EUCJP module
225
225
  def test_eucjp_split_to_word()
226
- doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��foo bar"))
227
- expected = ["���ܸ��","ʸ��","foo ","bar"].collect{|c| NKF.nkf("-e", c)}
226
+ doc = Document.new(NKF.nkf("--euc", "日本語の文字foo bar"))
227
+ expected = ["日本語の","文字","foo ","bar"].map{|c| NKF.nkf("--euc", c)}
228
228
  assert_equal(expected, doc.split_to_word)
229
229
  end
230
230
  def test_eucjp_split_to_word_kanhira()
231
- doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��"))
232
- expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
231
+ doc = Document.new(NKF.nkf("--euc", "日本語の文字"))
232
+ expected = ["日本語の", "文字"].map{|c| NKF.nkf("--euc", c)}
233
233
  assert_equal(expected, doc.split_to_word)
234
234
  end
235
235
  def test_eucjp_split_to_word_katahira()
236
- doc = Document.new(NKF.nkf("-e", "�������ʤ�ʸ��"))
237
- expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
236
+ doc = Document.new(NKF.nkf("--euc", "カタカナの文字"))
237
+ expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--euc", c)}
238
238
  assert_equal(expected, doc.split_to_word)
239
239
  end
240
240
  def test_eucjp_split_to_word_kataonbiki()
241
- doc = Document.new(NKF.nkf("-e", "��ӡ�������"), "EUC-JP")
242
- expected = ["��ӡ�", "����", "��"].collect{|c| NKF.nkf("-e", c)}
241
+ doc = Document.new(NKF.nkf("--euc", "ルビー色の石"), "EUC-JP")
242
+ expected = ["ルビー", "色の", ""].map{|c| NKF.nkf("--euc", c)}
243
243
  assert_equal(expected, doc.split_to_word)
244
244
  end
245
245
  def test_eucjp_split_to_word_hiraonbiki()
246
- doc = Document.new(NKF.nkf("-e", "���ӡ���"), "EUC-JP")
247
- expected = (["�", "��ӡ���"]).collect{|c| NKF.nkf("-e", c)}
246
+ doc = Document.new(NKF.nkf("--euc", "わールビーだ"), "EUC-JP")
247
+ expected = (["わー", "ルビーだ"]).map{|c| NKF.nkf("--euc", c)}
248
248
  assert_equal(expected, doc.split_to_word)
249
249
  end
250
250
  def test_eucjp_split_to_word_latinmix()
251
- doc = Document.new(NKF.nkf("-e", "���ܸ��Latin��ʸ��"))
252
- expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
251
+ doc = Document.new(NKF.nkf("--euc", "日本語とLatinの文字"))
252
+ expected = ["日本語と", "Latin", "", "文字"].map{|c| NKF.nkf("--euc", c)}
253
253
  assert_equal(expected, doc.split_to_word)
254
254
  end
255
255
  def test_eucjp_split_to_char()
256
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b"))
257
- expected = ["��","��","��","a"," ","b"].collect{|c|NKF.nkf("-e",c)}
256
+ doc = Document.new(NKF.nkf("--euc", "日本語a b"))
257
+ expected = ["","","","a"," ","b"].map{|c|NKF.nkf("--euc",c)}
258
258
  assert_equal(expected, doc.split_to_char)
259
259
  end
260
260
  def test_eucjp_split_to_char_with_cr()
261
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r"))
262
- expected = ["��","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-e",c)}
261
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r"))
262
+ expected = ["","","","a"," ","b","\r"].map{|c|NKF.nkf("--euc",c)}
263
263
  assert_equal(expected, doc.split_to_char)
264
264
  end
265
265
  def test_eucjp_split_to_char_with_lf()
266
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\n"))
267
- expected = ["��","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-e",c)}
266
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\n"))
267
+ expected = ["","","","a"," ","b","\n"].map{|c|NKF.nkf("--euc",c)}
268
268
  assert_equal(expected, doc.split_to_char)
269
269
  end
270
270
  def test_eucjp_split_to_char_with_crlf()
271
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
272
- expected = ["��","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-e",c)}
271
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
272
+ expected = ["","","","a"," ","b","\r\n"].map{|c|NKF.nkf("--euc",c)}
273
273
  assert_equal(expected, doc.split_to_char)
274
274
  end
275
275
  def test_eucjp_count_char()
276
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
276
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
277
277
  expected = 7
278
278
  assert_equal(expected, doc.count_char)
279
279
  end
280
280
  def test_eucjp_count_latin_graph_char()
281
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
281
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
282
282
  expected = 2
283
283
  assert_equal(expected, doc.count_latin_graph_char)
284
284
  end
285
285
  def test_eucjp_count_ja_graph_char()
286
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
286
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
287
287
  expected = 3
288
288
  assert_equal(expected, doc.count_ja_graph_char)
289
289
  end
290
290
  def test_eucjp_count_graph_char()
291
- doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
291
+ doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
292
292
  expected = 5
293
293
  assert_equal(expected, doc.count_graph_char)
294
294
  end
295
295
  def test_eucjp_count_latin_blank_char()
296
- doc = Document.new(NKF.nkf("-e", "���ܸ�\ta b\r\n"))
296
+ doc = Document.new(NKF.nkf("--euc", "日本語\ta b\r\n"))
297
297
  expected = 2
298
298
  assert_equal(expected, doc.count_latin_blank_char)
299
299
  end
300
300
  def test_eucjp_count_ja_blank_char()
301
- doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
301
+ doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
302
302
  expected = 1
303
303
  assert_equal(expected, doc.count_ja_blank_char)
304
304
  end
305
305
  def test_eucjp_count_blank_char()
306
- doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
306
+ doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
307
307
  expected = 3
308
308
  assert_equal(expected, doc.count_blank_char)
309
309
  end
310
310
  def test_eucjp_count_word()
311
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
311
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
312
312
  expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
313
313
  assert_equal(expected, doc.count_word)
314
314
  end
315
315
  def test_eucjp_count_ja_word()
316
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
316
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
317
317
  expected = 3
318
318
  assert_equal(expected, doc.count_ja_word)
319
319
  end
320
320
  def test_eucjp_count_latin_valid_word()
321
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
321
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
322
322
  expected = 2
323
323
  assert_equal(expected, doc.count_latin_valid_word)
324
324
  end
325
325
  def test_eucjp_count_ja_valid_word()
326
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
326
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
327
327
  expected = 2
328
328
  assert_equal(expected, doc.count_ja_valid_word)
329
329
  end
330
330
  def test_eucjp_count_valid_word()
331
- doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
331
+ doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
332
332
  expected = 4
333
333
  assert_equal(expected, doc.count_valid_word)
334
334
  end
335
335
  def test_eucjp_count_line()
336
- doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
336
+ doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
337
337
  expected = 6
338
338
  assert_equal(expected, doc.count_line)
339
339
  end
340
340
  def test_eucjp_count_graph_line()
341
- doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
341
+ doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
342
342
  expected = 3
343
343
  assert_equal(expected, doc.count_graph_line)
344
344
  end
345
345
  def test_eucjp_count_empty_line()
346
- doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
346
+ doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
347
347
  expected = 1
348
348
  assert_equal(expected, doc.count_empty_line)
349
349
  end
350
350
  def test_eucjp_count_blank_line()
351
- doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
351
+ doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
352
352
  expected = 2
353
353
  assert_equal(expected, doc.count_blank_line)
354
354
  end
355
355
 
356
356
  # test SJIS module
357
357
  def test_sjis_split_to_word()
358
- doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��foo bar"))
359
- expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c|NKF.nkf("-s",c)}
358
+ doc = Document.new(NKF.nkf("--sjis", "日本語の文字foo bar"))
359
+ expected = ["日本語の", "文字", "foo ", "bar"].map{|c|NKF.nkf("--sjis",c)}
360
360
  assert_equal(expected, doc.split_to_word)
361
361
  end
362
362
  def test_sjisplit_s_to_word_kanhira()
363
- doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��"))
364
- expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
363
+ doc = Document.new(NKF.nkf("--sjis", "日本語の文字"))
364
+ expected = ["日本語の", "文字"].map{|c| NKF.nkf("--sjis", c)}
365
365
  assert_equal(expected, doc.split_to_word)
366
366
  end
367
367
  def test_sjis_split_to_word_katahira()
368
- doc = Document.new(NKF.nkf("-s", "�������ʤ�ʸ��"))
369
- expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
368
+ doc = Document.new(NKF.nkf("--sjis", "カタカナの文字"))
369
+ expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--sjis", c)}
370
370
  assert_equal(expected, doc.split_to_word)
371
371
  end
372
372
  def test_sjis_split_to_word_kataonbiki()
373
- doc = Document.new(NKF.nkf("-s", "��ӡ��λ���"))
374
- expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-s", c)}
373
+ doc = Document.new(NKF.nkf("--sjis", "ルビーの指輪"))
374
+ expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--sjis", c)}
375
375
  assert_equal(expected, doc.split_to_word)
376
376
  end
377
377
  def test_sjis_split_to_word_hiraonbiki()
378
- doc = Document.new(NKF.nkf("-s", "���ӡ���"))
379
- expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-s", c)}
378
+ doc = Document.new(NKF.nkf("--sjis", "わールビーだ"))
379
+ expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--sjis", c)}
380
380
  assert_equal(expected, doc.split_to_word)
381
381
  end
382
382
  def test_sjis_split_to_word_latinmix()
383
- doc = Document.new(NKF.nkf("-s", "���ܸ��Latin��ʸ��"))
384
- expected = ["���ܸ��","Latin","��","ʸ��"].collect{|c| NKF.nkf("-s", c)}
383
+ doc = Document.new(NKF.nkf("--sjis", "日本語とLatinの文字"))
384
+ expected = ["日本語と","Latin","","文字"].map{|c| NKF.nkf("--sjis", c)}
385
385
  assert_equal(expected, doc.split_to_word)
386
386
  end
387
387
  def test_sjis_split_to_char()
388
- doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b"))
389
- expected = ["ɽ","��","��","a"," ","b"].collect{|c|NKF.nkf("-s",c)}
388
+ doc = Document.new(NKF.nkf("--sjis", "表計算a b"))
389
+ expected = ["","","","a"," ","b"].map{|c|NKF.nkf("--sjis",c)}
390
390
  assert_equal(expected, doc.split_to_char)
391
391
  end
392
392
  def test_sjis_split_to_char_with_cr()
393
- doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\r"))
394
- expected = ["ɽ","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-s",c)}
393
+ doc = Document.new(NKF.nkf("--sjis", "表計算a b\r"))
394
+ expected = ["","","","a"," ","b","\r"].map{|c|NKF.nkf("--sjis",c)}
395
395
  assert_equal(expected, doc.split_to_char)
396
396
  end
397
397
  def test_sjis_split_to_char_with_lf()
398
- doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\n"))
399
- expected = ["ɽ","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-s",c)}
398
+ doc = Document.new(NKF.nkf("--sjis", "表計算a b\n"))
399
+ expected = ["","","","a"," ","b","\n"].map{|c|NKF.nkf("--sjis",c)}
400
400
  assert_equal(expected, doc.split_to_char)
401
401
  end
402
402
  def test_sjis_split_to_char_with_crlf()
403
- doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\r\n"))
404
- expected = ["ɽ","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-s",c)}
403
+ doc = Document.new(NKF.nkf("--sjis", "表計算a b\r\n"))
404
+ expected = ["","","","a"," ","b","\r\n"].map{|c|NKF.nkf("--sjis",c)}
405
405
  assert_equal(expected, doc.split_to_char)
406
406
  end
407
407
  def test_sjis_count_char()
408
- doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
408
+ doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
409
409
  expected = 7
410
410
  assert_equal(expected, doc.count_char)
411
411
  end
412
412
  def test_sjis_count_latin_graph_char()
413
- doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
413
+ doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
414
414
  expected = 2
415
415
  assert_equal(expected, doc.count_latin_graph_char)
416
416
  end
417
417
  def test_sjis_count_ja_graph_char()
418
- doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
418
+ doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
419
419
  expected = 3
420
420
  assert_equal(expected, doc.count_ja_graph_char)
421
421
  end
422
422
  def test_sjis_count_graph_char()
423
- doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
423
+ doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
424
424
  expected = 5
425
425
  assert_equal(expected, doc.count_graph_char)
426
426
  end
427
427
  def test_sjis_count_latin_blank_char()
428
- doc = Document.new(NKF.nkf("-s", "���ܸ�\ta b\r\n"))
428
+ doc = Document.new(NKF.nkf("--sjis", "日本語\ta b\r\n"))
429
429
  expected = 2
430
430
  assert_equal(expected, doc.count_latin_blank_char)
431
431
  end
432
432
  def test_sjis_count_ja_blank_char()
433
- doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
433
+ doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
434
434
  expected = 1
435
435
  assert_equal(expected, doc.count_ja_blank_char)
436
436
  end
437
437
  def test_sjis_count_blank_char()
438
- doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
438
+ doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
439
439
  expected = 3
440
440
  assert_equal(expected, doc.count_blank_char)
441
441
  end
442
442
  def test_sjis_count_word()
443
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
443
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
444
444
  expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
445
445
  assert_equal(expected, doc.count_word)
446
446
  end
447
447
  def test_sjis_count_ja_word()
448
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
448
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
449
449
  expected = 3
450
450
  assert_equal(expected, doc.count_ja_word)
451
451
  end
452
452
  def test_sjis_count_latin_valid_word()
453
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
453
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
454
454
  expected = 2
455
455
  assert_equal(expected, doc.count_latin_valid_word)
456
456
  end
457
457
  def test_sjis_count_ja_valid_word()
458
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
458
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
459
459
  expected = 2
460
460
  assert_equal(expected, doc.count_ja_valid_word)
461
461
  end
462
462
  def test_sjis_count_valid_word()
463
- doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
463
+ doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
464
464
  expected = 4
465
465
  assert_equal(expected, doc.count_valid_word)
466
466
  end
467
467
  def test_sjis_count_line()
468
- doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
468
+ doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
469
469
  expected = 6
470
470
  assert_equal(expected, doc.count_line)
471
471
  end
472
472
  def test_sjis_count_graph_line()
473
- doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
473
+ doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
474
474
  expected = 3
475
475
  assert_equal(expected, doc.count_graph_line)
476
476
  end
477
477
  def test_sjis_count_empty_line()
478
- doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
478
+ doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
479
479
  expected = 1
480
480
  assert_equal(expected, doc.count_empty_line)
481
481
  end
482
482
  def test_sjis_count_blank_line()
483
- doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
483
+ doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
484
484
  expected = 2
485
485
  assert_equal(expected, doc.count_blank_line)
486
486
  end
487
487
 
488
488
  # test UTF8 module
489
489
  def test_utf8_split_to_word()
490
- doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��foo bar"))
491
- expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c| NKF.nkf("-E -w", c)}
490
+ doc = Document.new(NKF.nkf("--utf8", "日本語の文字foo bar"))
491
+ expected = ["日本語の", "文字", "foo ", "bar"].map{|c| NKF.nkf("--utf8", c)}
492
492
  assert_equal(expected, doc.split_to_word)
493
493
  end
494
494
  def test_utf8_split_to_word_kanhira()
495
- doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��"))
496
- expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
495
+ doc = Document.new(NKF.nkf("--utf8", "日本語の文字"))
496
+ expected = ["日本語の", "文字"].map{|c| NKF.nkf("--utf8", c)}
497
497
  assert_equal(expected, doc.split_to_word)
498
498
  end
499
499
  def test_utf8_split_to_word_katahira()
500
- doc = Document.new(NKF.nkf("-E -w", "�������ʤ�ʸ��"))
501
- expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
500
+ doc = Document.new(NKF.nkf("--utf8", "カタカナの文字"))
501
+ expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--utf8", c)}
502
502
  assert_equal(expected, doc.split_to_word)
503
503
  end
504
504
  def test_utf8_split_to_word_kataonbiki()
505
- doc = Document.new(NKF.nkf("-E -w", "��ӡ��λ���"))
506
- expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-E -w", c)}
505
+ doc = Document.new(NKF.nkf("--utf8", "ルビーの指輪"))
506
+ expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--utf8", c)}
507
507
  assert_equal(expected, doc.split_to_word)
508
508
  end
509
509
  def test_utf8_split_to_word_hiraonbiki()
510
- doc = Document.new(NKF.nkf("-E -w", "���ӡ���"))
511
- expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-E -w", c)}
510
+ doc = Document.new(NKF.nkf("--utf8", "わールビーだ"))
511
+ expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--utf8", c)}
512
512
  assert_equal(expected, doc.split_to_word)
513
513
  end
514
514
  def test_utf8_split_to_word_latinmix()
515
- doc = Document.new(NKF.nkf("-E -w", "���ܸ��Latin��ʸ��"))
516
- expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
515
+ doc = Document.new(NKF.nkf("--utf8", "日本語とLatinの文字"))
516
+ expected = ["日本語と", "Latin", "", "文字"].map{|c| NKF.nkf("--utf8", c)}
517
517
  assert_equal(expected, doc.split_to_word)
518
518
  end
519
519
  def test_utf8_split_to_char()
520
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b"), "UTF-8")
521
- expected = ["��", "��", "��", "a", " ", "b"].collect{|c| NKF.nkf("-E -w", c)}
520
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b"), "UTF-8")
521
+ expected = ["", "", "", "a", " ", "b"].map{|c| NKF.nkf("--utf8", c)}
522
522
  assert_equal(expected, doc.split_to_char)
523
523
  end
524
524
  def test_utf8_split_to_char_with_cr()
525
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r"), "UTF-8")
526
- expected = ["��","��","��","a"," ","b","\r"].collect{|c| NKF.nkf("-E -w", c)}
525
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r"), "UTF-8")
526
+ expected = ["","","","a"," ","b","\r"].map{|c| NKF.nkf("--utf8", c)}
527
527
  assert_equal(expected, doc.split_to_char)
528
528
  end
529
529
  def test_utf8_split_to_char_with_lf()
530
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\n"), "UTF-8")
531
- expected = ["��","��","��","a"," ","b","\n"].collect{|c| NKF.nkf("-E -w", c)}
530
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\n"), "UTF-8")
531
+ expected = ["","","","a"," ","b","\n"].map{|c| NKF.nkf("--utf8", c)}
532
532
  assert_equal(expected, doc.split_to_char)
533
533
  end
534
534
  def test_utf8_split_to_char_with_crlf()
535
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
536
- expected = ["��","��","��","a"," ","b","\r\n"].collect{|c| NKF.nkf("-E -w", c)}
535
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
536
+ expected = ["","","","a"," ","b","\r\n"].map{|c| NKF.nkf("--utf8", c)}
537
537
  assert_equal(expected, doc.split_to_char)
538
538
  end
539
539
  def test_utf8_count_char()
540
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
540
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
541
541
  expected = 7
542
542
  assert_equal(expected, doc.count_char)
543
543
  end
544
544
  def test_utf8_count_latin_graph_char()
545
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
545
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
546
546
  expected = 2
547
547
  assert_equal(expected, doc.count_latin_graph_char)
548
548
  end
549
549
  def test_utf8_count_ja_graph_char()
550
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
550
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
551
551
  expected = 3
552
552
  assert_equal(expected, doc.count_ja_graph_char)
553
553
  end
554
554
  def test_utf8_count_graph_char()
555
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
555
+ doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
556
556
  expected = 5
557
557
  assert_equal(expected, doc.count_graph_char)
558
558
  end
559
559
  def test_utf8_count_latin_blank_char()
560
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\ta b\r\n"))
560
+ doc = Document.new(NKF.nkf("--utf8", "日本語\ta b\r\n"))
561
561
  expected = 2
562
562
  assert_equal(expected, doc.count_latin_blank_char)
563
563
  end
564
564
  def test_utf8_count_ja_blank_char()
565
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
565
+ doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
566
566
  expected = 1
567
567
  assert_equal(expected, doc.count_ja_blank_char)
568
568
  end
569
569
  def test_utf8_count_blank_char()
570
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
570
+ doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
571
571
  expected = 3
572
572
  assert_equal(expected, doc.count_blank_char)
573
573
  end
574
574
  def test_utf8_count_word()
575
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
575
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
576
576
  expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
577
577
  assert_equal(expected, doc.count_word)
578
578
  end
579
579
  def test_utf8_count_ja_word()
580
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
580
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
581
581
  expected = 3
582
582
  assert_equal(expected, doc.count_ja_word)
583
583
  end
584
584
  def test_utf8_count_latin_valid_word()
585
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
585
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
586
586
  expected = 2
587
587
  assert_equal(expected, doc.count_latin_valid_word)
588
588
  end
589
589
  def test_utf8_count_ja_valid_word()
590
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
590
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
591
591
  expected = 2
592
592
  assert_equal(expected, doc.count_ja_valid_word)
593
593
  end
594
594
  def test_utf8_count_valid_word()
595
- doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
595
+ doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
596
596
  expected = 4
597
597
  assert_equal(expected, doc.count_valid_word)
598
598
  end
599
599
  def test_utf8_count_line()
600
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
600
+ doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
601
601
  expected = 6
602
602
  assert_equal(expected, doc.count_line)
603
603
  end
604
604
  def test_utf8_count_graph_line()
605
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
605
+ doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
606
606
  expected = 3
607
607
  assert_equal(expected, doc.count_graph_line)
608
608
  end
609
609
  def test_utf8_count_empty_line()
610
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
610
+ doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
611
611
  expected = 1
612
612
  assert_equal(expected, doc.count_empty_line)
613
613
  end
614
614
  def test_utf8_count_blank_line()
615
- doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
615
+ doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
616
616
  expected = 2
617
617
  assert_equal(expected, doc.count_blank_line)
618
618
  end
@@ -0,0 +1,2 @@
1
+ こんにちは、私の名前はわたなべです。
2
+ 私はJust Another Ruby Porterです。
@@ -0,0 +1,2 @@
1
+ こんばんは、私の名前はまつもとです。
2
+ Rubyを作ったのは私です。私はRuby Hackerです。