docdiff 0.6.1 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +27 -49
- data/README.md +351 -0
- data/README_ja.md +351 -0
- data/Rakefile +2 -42
- data/bin/docdiff +66 -29
- data/{docdiff.conf.example → doc/example/docdiff.conf.example} +4 -3
- data/doc/man/docdiff.adoc +146 -0
- data/doc/news.md +180 -0
- data/doc/shell_completion/_docdiff.zsh +51 -0
- data/doc/shell_completion/docdiff.bash +68 -0
- data/docdiff.gemspec +2 -1
- data/lib/doc_diff.rb +13 -0
- data/lib/docdiff/charstring.rb +4 -3
- data/lib/docdiff/diff/unidiff.rb +0 -1
- data/lib/docdiff/encoding/en_ascii.rb +12 -39
- data/lib/docdiff/encoding/ja_eucjp.rb +12 -39
- data/lib/docdiff/encoding/ja_sjis.rb +12 -39
- data/lib/docdiff/encoding/ja_utf8.rb +12 -39
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +16 -8
- data/test/charstring_test.rb +124 -121
- data/test/docdiff_test.rb +5 -3
- data/test/document_test.rb +112 -109
- data/test/fixture/01_ja_utf8_lf.txt +2 -0
- data/test/fixture/02_ja_utf8_lf.txt +2 -0
- data/test/view_test.rb +135 -111
- metadata +41 -43
- data/devutil/changelog.sh +0 -40
- data/index.html +0 -181
- data/langfilter.rb +0 -10
- data/lib/viewdiff.rb +0 -379
- data/readme.html +0 -733
- data/readme.md +0 -184
- data/test/viewdiff_test.rb +0 -911
- /data/{docdiffwebui.cgi → doc/example/docdiffwebui.cgi} +0 -0
- /data/{docdiffwebui.html → doc/example/docdiffwebui.html} +0 -0
- /data/{img/docdiff-screenshot-format-html-digest-firefox.png → doc/img/screenshot-format-html-digest-firefox.png} +0 -0
- /data/{img/docdiff-screenshot-format-html-firefox.png → doc/img/screenshot-format-html-firefox.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-cmdexe-en.png → doc/img/screenshot-format-tty-cmdexe-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-cmdexe-ja.png → doc/img/screenshot-format-tty-cmdexe-ja.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-rxvtunicode-en.png → doc/img/screenshot-format-tty-rxvtunicode-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-rxvtunicode-ja.png → doc/img/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-xterm-en.png → doc/img/screenshot-format-tty-xterm-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-xterm-ja.png → doc/img/screenshot-format-tty-xterm-ja.png} +0 -0
- /data/{img/docdiff-screenshot-resolution-linewordchar-xterm.png → doc/img/screenshot-resolution-linewordchar-xterm.png} +0 -0
- /data/{sample/01.en.ascii.cr → test/fixture/01_en_ascii_cr.txt} +0 -0
- /data/{sample/01.en.ascii.crlf → test/fixture/01_en_ascii_crlf.txt} +0 -0
- /data/{sample/01.en.ascii.lf → test/fixture/01_en_ascii_lf.txt} +0 -0
- /data/{sample/01.ja.eucjp.lf → test/fixture/01_ja_eucjp_lf.txt} +0 -0
- /data/{sample/01.ja.sjis.cr → test/fixture/01_ja_sjis_cr.txt} +0 -0
- /data/{sample/01.ja.sjis.crlf → test/fixture/01_ja_sjis_crlf.txt} +0 -0
- /data/{sample/01.ja.utf8.crlf → test/fixture/01_ja_utf8_crlf.txt} +0 -0
- /data/{sample/02.en.ascii.cr → test/fixture/02_en_ascii_cr.txt} +0 -0
- /data/{sample/02.en.ascii.crlf → test/fixture/02_en_ascii_crlf.txt} +0 -0
- /data/{sample/02.en.ascii.lf → test/fixture/02_en_ascii_lf.txt} +0 -0
- /data/{sample/02.ja.eucjp.lf → test/fixture/02_ja_eucjp_lf.txt} +0 -0
- /data/{sample/02.ja.sjis.cr → test/fixture/02_ja_sjis_cr.txt} +0 -0
- /data/{sample/02.ja.sjis.crlf → test/fixture/02_ja_sjis_crlf.txt} +0 -0
- /data/{sample/02.ja.utf8.crlf → test/fixture/02_ja_utf8_crlf.txt} +0 -0
- /data/{sample/humpty_dumpty01.ascii.lf → test/fixture/humpty_dumpty01_ascii_lf.txt} +0 -0
- /data/{sample/humpty_dumpty02.ascii.lf → test/fixture/humpty_dumpty02_ascii_lf.txt} +0 -0
data/test/charstring_test.rb
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/ruby
|
|
2
|
-
# -*- coding:
|
|
2
|
+
# -*- coding: utf-8; -*-
|
|
3
|
+
|
|
4
|
+
# frozen_string_literal: false
|
|
5
|
+
|
|
3
6
|
require 'test/unit'
|
|
4
7
|
require 'docdiff/charstring'
|
|
5
8
|
require 'nkf'
|
|
@@ -87,12 +90,12 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
87
90
|
assert_equal(expected, str.eol_char)
|
|
88
91
|
end
|
|
89
92
|
def test_eol_char_none_eucjp()
|
|
90
|
-
str = NKF.nkf("
|
|
93
|
+
str = NKF.nkf("--euc", "日本語a b").extend CharString
|
|
91
94
|
expected = nil
|
|
92
95
|
assert_equal(expected, str.eol_char)
|
|
93
96
|
end
|
|
94
97
|
def test_eol_char_none_sjis()
|
|
95
|
-
str = NKF.nkf("
|
|
98
|
+
str = NKF.nkf("--sjis", "日本語a b").extend CharString
|
|
96
99
|
expected = nil
|
|
97
100
|
assert_equal(expected, str.eol_char)
|
|
98
101
|
end
|
|
@@ -322,176 +325,176 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
322
325
|
|
|
323
326
|
# test EUCJP module
|
|
324
327
|
def test_eucjp_split_to_word()
|
|
325
|
-
str = NKF.nkf("
|
|
328
|
+
str = NKF.nkf("--euc", "日本語の文字foo bar").extend CharString
|
|
326
329
|
str.encoding = "EUC-JP"
|
|
327
|
-
expected = ["
|
|
330
|
+
expected = ["日本語の","文字","foo ","bar"].map{|c| NKF.nkf("--euc", c)}
|
|
328
331
|
assert_equal(expected, str.split_to_word)
|
|
329
332
|
end
|
|
330
333
|
def test_eucjp_split_to_word_kanhira()
|
|
331
|
-
str = NKF.nkf("
|
|
334
|
+
str = NKF.nkf("--euc", "日本語の文字").extend CharString
|
|
332
335
|
str.encoding = "EUC-JP"
|
|
333
|
-
expected = ["
|
|
336
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
334
337
|
assert_equal(expected, str.split_to_word)
|
|
335
338
|
end
|
|
336
339
|
def test_eucjp_split_to_word_katahira()
|
|
337
|
-
str = NKF.nkf("
|
|
340
|
+
str = NKF.nkf("--euc", "カタカナの文字").extend CharString
|
|
338
341
|
str.encoding = "EUC-JP"
|
|
339
|
-
expected = ["
|
|
342
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
340
343
|
assert_equal(expected, str.split_to_word)
|
|
341
344
|
end
|
|
342
345
|
def test_eucjp_split_to_word_kataonbiki()
|
|
343
|
-
str = NKF.nkf("
|
|
346
|
+
str = NKF.nkf("--euc", "ルビー色の石").extend CharString
|
|
344
347
|
str.encoding = "EUC-JP" #<= needed to pass the test
|
|
345
|
-
expected = ["
|
|
348
|
+
expected = ["ルビー", "色の", "石"].map{|c| NKF.nkf("--euc", c)}
|
|
346
349
|
assert_equal(expected, str.split_to_word)
|
|
347
350
|
end
|
|
348
351
|
def test_eucjp_split_to_word_hiraonbiki()
|
|
349
|
-
str = NKF.nkf("
|
|
352
|
+
str = NKF.nkf("--euc", "わールビーだ").extend CharString
|
|
350
353
|
str.encoding = "EUC-JP" #<= needed to pass the test
|
|
351
|
-
expected = ["
|
|
354
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--euc", c)}
|
|
352
355
|
assert_equal(expected, str.split_to_word)
|
|
353
356
|
end
|
|
354
357
|
def test_eucjp_split_to_word_latinmix()
|
|
355
|
-
str = NKF.nkf("
|
|
358
|
+
str = NKF.nkf("--euc", "日本語とLatinの文字").extend CharString
|
|
356
359
|
str.encoding = "EUC-JP"
|
|
357
|
-
expected = ["
|
|
360
|
+
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
358
361
|
assert_equal(expected, str.split_to_word)
|
|
359
362
|
end
|
|
360
363
|
def test_eucjp_split_to_char()
|
|
361
|
-
str = NKF.nkf("
|
|
364
|
+
str = NKF.nkf("--euc", "日本語a b").extend CharString
|
|
362
365
|
str.encoding = "EUC-JP"
|
|
363
366
|
str.eol = "LF" #<= needed to pass the test
|
|
364
|
-
expected = ["
|
|
367
|
+
expected = ["日","本","語","a"," ","b"].map{|c|NKF.nkf("--euc",c)}
|
|
365
368
|
assert_equal(expected, str.split_to_char)
|
|
366
369
|
end
|
|
367
370
|
def test_eucjp_split_to_char_with_cr()
|
|
368
|
-
str = NKF.nkf("
|
|
371
|
+
str = NKF.nkf("--euc", "日本語a b\r").extend CharString
|
|
369
372
|
str.encoding = "EUC-JP"
|
|
370
373
|
str.eol = "CR"
|
|
371
|
-
expected = ["
|
|
374
|
+
expected = ["日","本","語","a"," ","b","\r"].map{|c|NKF.nkf("--euc",c)}
|
|
372
375
|
assert_equal(expected, str.split_to_char)
|
|
373
376
|
end
|
|
374
377
|
def test_eucjp_split_to_char_with_lf()
|
|
375
|
-
str = NKF.nkf("
|
|
378
|
+
str = NKF.nkf("--euc", "日本語a b\n").extend CharString
|
|
376
379
|
str.encoding = "EUC-JP"
|
|
377
380
|
str.eol = "LF"
|
|
378
|
-
expected = ["
|
|
381
|
+
expected = ["日","本","語","a"," ","b","\n"].map{|c|NKF.nkf("--euc",c)}
|
|
379
382
|
assert_equal(expected, str.split_to_char)
|
|
380
383
|
end
|
|
381
384
|
def test_eucjp_split_to_char_with_crlf()
|
|
382
|
-
str = NKF.nkf("
|
|
385
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
383
386
|
str.encoding = "EUC-JP"
|
|
384
387
|
str.eol = "CRLF"
|
|
385
|
-
expected = ["
|
|
388
|
+
expected = ["日","本","語","a"," ","b","\r\n"].map{|c|NKF.nkf("--euc",c)}
|
|
386
389
|
assert_equal(expected, str.split_to_char)
|
|
387
390
|
end
|
|
388
391
|
def test_eucjp_count_char()
|
|
389
|
-
str = NKF.nkf("
|
|
392
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
390
393
|
str.encoding = "EUC-JP"
|
|
391
394
|
str.eol = "CRLF"
|
|
392
395
|
expected = 7
|
|
393
396
|
assert_equal(expected, str.count_char)
|
|
394
397
|
end
|
|
395
398
|
def test_eucjp_count_latin_graph_char()
|
|
396
|
-
str = NKF.nkf("
|
|
399
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
397
400
|
str.encoding = "EUC-JP"
|
|
398
401
|
str.eol = "CRLF"
|
|
399
402
|
expected = 2
|
|
400
403
|
assert_equal(expected, str.count_latin_graph_char)
|
|
401
404
|
end
|
|
402
405
|
def test_eucjp_count_ja_graph_char()
|
|
403
|
-
str = NKF.nkf("
|
|
406
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
404
407
|
str.encoding = "EUC-JP"
|
|
405
408
|
str.eol = "CRLF"
|
|
406
409
|
expected = 3
|
|
407
410
|
assert_equal(expected, str.count_ja_graph_char)
|
|
408
411
|
end
|
|
409
412
|
def test_eucjp_count_graph_char()
|
|
410
|
-
str = NKF.nkf("
|
|
413
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
411
414
|
str.encoding = "EUC-JP"
|
|
412
415
|
str.eol = "CRLF"
|
|
413
416
|
expected = 5
|
|
414
417
|
assert_equal(expected, str.count_graph_char)
|
|
415
418
|
end
|
|
416
419
|
def test_eucjp_count_latin_blank_char()
|
|
417
|
-
str = NKF.nkf("
|
|
420
|
+
str = NKF.nkf("--euc", "日本語\ta b\r\n").extend CharString
|
|
418
421
|
str.encoding = "EUC-JP"
|
|
419
422
|
str.eol = "CRLF"
|
|
420
423
|
expected = 2
|
|
421
424
|
assert_equal(expected, str.count_latin_blank_char)
|
|
422
425
|
end
|
|
423
426
|
def test_eucjp_count_ja_blank_char()
|
|
424
|
-
str = NKF.nkf("
|
|
427
|
+
str = NKF.nkf("--euc", "日本 語\ta b\r\n").extend CharString
|
|
425
428
|
str.encoding = "EUC-JP"
|
|
426
429
|
str.eol = "CRLF"
|
|
427
430
|
expected = 1
|
|
428
431
|
assert_equal(expected, str.count_ja_blank_char)
|
|
429
432
|
end
|
|
430
433
|
def test_eucjp_count_blank_char()
|
|
431
|
-
str = NKF.nkf("
|
|
434
|
+
str = NKF.nkf("--euc", "日本 語\ta b\r\n").extend CharString
|
|
432
435
|
str.encoding = "EUC-JP"
|
|
433
436
|
str.eol = "CRLF"
|
|
434
437
|
expected = 3
|
|
435
438
|
assert_equal(expected, str.count_blank_char)
|
|
436
439
|
end
|
|
437
440
|
def test_eucjp_count_word()
|
|
438
|
-
str = NKF.nkf("
|
|
441
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
439
442
|
str.encoding = "EUC-JP"
|
|
440
443
|
str.eol = "CRLF"
|
|
441
444
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
442
445
|
assert_equal(expected, str.count_word)
|
|
443
446
|
end
|
|
444
447
|
def test_eucjp_count_ja_word()
|
|
445
|
-
str = NKF.nkf("
|
|
448
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
446
449
|
str.encoding = "EUC-JP"
|
|
447
450
|
str.eol = "CRLF"
|
|
448
451
|
expected = 3
|
|
449
452
|
assert_equal(expected, str.count_ja_word)
|
|
450
453
|
end
|
|
451
454
|
def test_eucjp_count_latin_valid_word()
|
|
452
|
-
str = NKF.nkf("
|
|
455
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
453
456
|
str.encoding = "EUC-JP"
|
|
454
457
|
str.eol = "CRLF"
|
|
455
458
|
expected = 2
|
|
456
459
|
assert_equal(expected, str.count_latin_valid_word)
|
|
457
460
|
end
|
|
458
461
|
def test_eucjp_count_ja_valid_word()
|
|
459
|
-
str = NKF.nkf("
|
|
462
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
460
463
|
str.encoding = "EUC-JP"
|
|
461
464
|
str.eol = "CRLF"
|
|
462
465
|
expected = 2
|
|
463
466
|
assert_equal(expected, str.count_ja_valid_word)
|
|
464
467
|
end
|
|
465
468
|
def test_eucjp_count_valid_word()
|
|
466
|
-
str = NKF.nkf("
|
|
469
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
467
470
|
str.encoding = "EUC-JP"
|
|
468
471
|
str.eol = "CRLF"
|
|
469
472
|
expected = 4
|
|
470
473
|
assert_equal(expected, str.count_valid_word)
|
|
471
474
|
end
|
|
472
475
|
def test_eucjp_count_line()
|
|
473
|
-
str = NKF.nkf("
|
|
476
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
474
477
|
str.encoding = "EUC-JP"
|
|
475
478
|
str.eol = "CRLF"
|
|
476
479
|
expected = 6
|
|
477
480
|
assert_equal(expected, str.count_line)
|
|
478
481
|
end
|
|
479
482
|
def test_eucjp_count_graph_line()
|
|
480
|
-
str = NKF.nkf("
|
|
483
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
481
484
|
str.encoding = "EUC-JP"
|
|
482
485
|
str.eol = "CRLF"
|
|
483
486
|
expected = 3
|
|
484
487
|
assert_equal(expected, str.count_graph_line)
|
|
485
488
|
end
|
|
486
489
|
def test_eucjp_count_empty_line()
|
|
487
|
-
str = NKF.nkf("
|
|
490
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
488
491
|
str.encoding = "EUC-JP"
|
|
489
492
|
str.eol = "CRLF"
|
|
490
493
|
expected = 1
|
|
491
494
|
assert_equal(expected, str.count_empty_line)
|
|
492
495
|
end
|
|
493
496
|
def test_eucjp_count_blank_line()
|
|
494
|
-
str = NKF.nkf("
|
|
497
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
495
498
|
str.encoding = "EUC-JP"
|
|
496
499
|
str.eol = "CRLF"
|
|
497
500
|
expected = 2
|
|
@@ -500,176 +503,176 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
500
503
|
|
|
501
504
|
# test SJIS module
|
|
502
505
|
def test_sjis_split_to_word()
|
|
503
|
-
str = NKF.nkf("
|
|
506
|
+
str = NKF.nkf("--sjis", "日本語の文字foo bar").extend CharString
|
|
504
507
|
str.encoding = "Shift_JIS"
|
|
505
|
-
expected = ["
|
|
508
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map{|c|NKF.nkf("--sjis",c)}
|
|
506
509
|
assert_equal(expected, str.split_to_word)
|
|
507
510
|
end
|
|
508
511
|
def test_sjisplit_s_to_word_kanhira()
|
|
509
|
-
str = NKF.nkf("
|
|
512
|
+
str = NKF.nkf("--sjis", "日本語の文字").extend CharString
|
|
510
513
|
str.encoding = "Shift_JIS"
|
|
511
|
-
expected = ["
|
|
514
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
512
515
|
assert_equal(expected, str.split_to_word)
|
|
513
516
|
end
|
|
514
517
|
def test_sjis_split_to_word_katahira()
|
|
515
|
-
str = NKF.nkf("
|
|
518
|
+
str = NKF.nkf("--sjis", "カタカナの文字").extend CharString
|
|
516
519
|
str.encoding = "Shift_JIS"
|
|
517
|
-
expected = ["
|
|
520
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
518
521
|
assert_equal(expected, str.split_to_word)
|
|
519
522
|
end
|
|
520
523
|
def test_sjis_split_to_word_kataonbiki()
|
|
521
|
-
str = NKF.nkf("
|
|
524
|
+
str = NKF.nkf("--sjis", "ルビーの指輪").extend CharString
|
|
522
525
|
str.encoding = "Shift_JIS"
|
|
523
|
-
expected = ["
|
|
526
|
+
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--sjis", c)}
|
|
524
527
|
assert_equal(expected, str.split_to_word)
|
|
525
528
|
end
|
|
526
529
|
def test_sjis_split_to_word_hiraonbiki()
|
|
527
|
-
str = NKF.nkf("
|
|
530
|
+
str = NKF.nkf("--sjis", "わールビーだ").extend CharString
|
|
528
531
|
str.encoding = "Shift_JIS"
|
|
529
|
-
expected = ["
|
|
532
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--sjis", c)}
|
|
530
533
|
assert_equal(expected, str.split_to_word)
|
|
531
534
|
end
|
|
532
535
|
def test_sjis_split_to_word_latinmix()
|
|
533
|
-
str = NKF.nkf("
|
|
536
|
+
str = NKF.nkf("--sjis", "日本語とLatinの文字").extend CharString
|
|
534
537
|
str.encoding = "Shift_JIS"
|
|
535
|
-
expected = ["
|
|
538
|
+
expected = ["日本語と","Latin","の","文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
536
539
|
assert_equal(expected, str.split_to_word)
|
|
537
540
|
end
|
|
538
541
|
def test_sjis_split_to_char()
|
|
539
|
-
str = NKF.nkf("
|
|
542
|
+
str = NKF.nkf("--sjis", "表計算a b").extend CharString
|
|
540
543
|
str.encoding = "Shift_JIS"
|
|
541
544
|
str.eol = "LF" #<= needed to pass the test
|
|
542
|
-
expected = ["
|
|
545
|
+
expected = ["表","計","算","a"," ","b"].map{|c|NKF.nkf("--sjis",c)}
|
|
543
546
|
assert_equal(expected, str.split_to_char)
|
|
544
547
|
end
|
|
545
548
|
def test_sjis_split_to_char_with_cr()
|
|
546
|
-
str = NKF.nkf("
|
|
549
|
+
str = NKF.nkf("--sjis", "表計算a b\r").extend CharString
|
|
547
550
|
str.encoding = "Shift_JIS"
|
|
548
551
|
str.eol = "CR"
|
|
549
|
-
expected = ["
|
|
552
|
+
expected = ["表","計","算","a"," ","b","\r"].map{|c|NKF.nkf("--sjis",c)}
|
|
550
553
|
assert_equal(expected, str.split_to_char)
|
|
551
554
|
end
|
|
552
555
|
def test_sjis_split_to_char_with_lf()
|
|
553
|
-
str = NKF.nkf("
|
|
556
|
+
str = NKF.nkf("--sjis", "表計算a b\n").extend CharString
|
|
554
557
|
str.encoding = "Shift_JIS"
|
|
555
558
|
str.eol = "LF"
|
|
556
|
-
expected = ["
|
|
559
|
+
expected = ["表","計","算","a"," ","b","\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
557
560
|
assert_equal(expected, str.split_to_char)
|
|
558
561
|
end
|
|
559
562
|
def test_sjis_split_to_char_with_crlf()
|
|
560
|
-
str = NKF.nkf("
|
|
563
|
+
str = NKF.nkf("--sjis", "表計算a b\r\n").extend CharString
|
|
561
564
|
str.encoding = "Shift_JIS"
|
|
562
565
|
str.eol = "CRLF"
|
|
563
|
-
expected = ["
|
|
566
|
+
expected = ["表","計","算","a"," ","b","\r\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
564
567
|
assert_equal(expected, str.split_to_char)
|
|
565
568
|
end
|
|
566
569
|
def test_sjis_count_char()
|
|
567
|
-
str = NKF.nkf("
|
|
570
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend CharString
|
|
568
571
|
str.encoding = "Shift_JIS"
|
|
569
572
|
str.eol = "CRLF"
|
|
570
573
|
expected = 7
|
|
571
574
|
assert_equal(expected, str.count_char)
|
|
572
575
|
end
|
|
573
576
|
def test_sjis_count_latin_graph_char()
|
|
574
|
-
str = NKF.nkf("
|
|
577
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend CharString
|
|
575
578
|
str.encoding = "Shift_JIS"
|
|
576
579
|
str.eol = "CRLF"
|
|
577
580
|
expected = 2
|
|
578
581
|
assert_equal(expected, str.count_latin_graph_char)
|
|
579
582
|
end
|
|
580
583
|
def test_sjis_count_ja_graph_char()
|
|
581
|
-
str = NKF.nkf("
|
|
584
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend CharString
|
|
582
585
|
str.encoding = "Shift_JIS"
|
|
583
586
|
str.eol = "CRLF"
|
|
584
587
|
expected = 3
|
|
585
588
|
assert_equal(expected, str.count_ja_graph_char)
|
|
586
589
|
end
|
|
587
590
|
def test_sjis_count_graph_char()
|
|
588
|
-
str = NKF.nkf("
|
|
591
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend CharString
|
|
589
592
|
str.encoding = "Shift_JIS"
|
|
590
593
|
str.eol = "CRLF"
|
|
591
594
|
expected = 5
|
|
592
595
|
assert_equal(expected, str.count_graph_char)
|
|
593
596
|
end
|
|
594
597
|
def test_sjis_count_latin_blank_char()
|
|
595
|
-
str = NKF.nkf("
|
|
598
|
+
str = NKF.nkf("--sjis", "日本語\ta b\r\n").extend CharString
|
|
596
599
|
str.encoding = "Shift_JIS"
|
|
597
600
|
str.eol = "CRLF"
|
|
598
601
|
expected = 2
|
|
599
602
|
assert_equal(expected, str.count_latin_blank_char)
|
|
600
603
|
end
|
|
601
604
|
def test_sjis_count_ja_blank_char()
|
|
602
|
-
str = NKF.nkf("
|
|
605
|
+
str = NKF.nkf("--sjis", "日本 語\ta b\r\n").extend CharString
|
|
603
606
|
str.encoding = "Shift_JIS"
|
|
604
607
|
str.eol = "CRLF"
|
|
605
608
|
expected = 1
|
|
606
609
|
assert_equal(expected, str.count_ja_blank_char)
|
|
607
610
|
end
|
|
608
611
|
def test_sjis_count_blank_char()
|
|
609
|
-
str = NKF.nkf("
|
|
612
|
+
str = NKF.nkf("--sjis", "日本 語\ta b\r\n").extend CharString
|
|
610
613
|
str.encoding = "Shift_JIS"
|
|
611
614
|
str.eol = "CRLF"
|
|
612
615
|
expected = 3
|
|
613
616
|
assert_equal(expected, str.count_blank_char)
|
|
614
617
|
end
|
|
615
618
|
def test_sjis_count_word()
|
|
616
|
-
str = NKF.nkf("
|
|
619
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
617
620
|
str.encoding = "Shift_JIS"
|
|
618
621
|
str.eol = "CRLF"
|
|
619
622
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
620
623
|
assert_equal(expected, str.count_word)
|
|
621
624
|
end
|
|
622
625
|
def test_sjis_count_ja_word()
|
|
623
|
-
str = NKF.nkf("
|
|
626
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
624
627
|
str.encoding = "Shift_JIS"
|
|
625
628
|
str.eol = "CRLF"
|
|
626
629
|
expected = 3
|
|
627
630
|
assert_equal(expected, str.count_ja_word)
|
|
628
631
|
end
|
|
629
632
|
def test_sjis_count_latin_valid_word()
|
|
630
|
-
str = NKF.nkf("
|
|
633
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
631
634
|
str.encoding = "Shift_JIS"
|
|
632
635
|
str.eol = "CRLF"
|
|
633
636
|
expected = 2
|
|
634
637
|
assert_equal(expected, str.count_latin_valid_word)
|
|
635
638
|
end
|
|
636
639
|
def test_sjis_count_ja_valid_word()
|
|
637
|
-
str = NKF.nkf("
|
|
640
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
638
641
|
str.encoding = "Shift_JIS"
|
|
639
642
|
str.eol = "CRLF"
|
|
640
643
|
expected = 2
|
|
641
644
|
assert_equal(expected, str.count_ja_valid_word)
|
|
642
645
|
end
|
|
643
646
|
def test_sjis_count_valid_word()
|
|
644
|
-
str = NKF.nkf("
|
|
647
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
645
648
|
str.encoding = "Shift_JIS"
|
|
646
649
|
str.eol = "CRLF"
|
|
647
650
|
expected = 4
|
|
648
651
|
assert_equal(expected, str.count_valid_word)
|
|
649
652
|
end
|
|
650
653
|
def test_sjis_count_line()
|
|
651
|
-
str = NKF.nkf("
|
|
654
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
652
655
|
str.encoding = "Shift_JIS"
|
|
653
656
|
str.eol = "CRLF"
|
|
654
657
|
expected = 6
|
|
655
658
|
assert_equal(expected, str.count_line)
|
|
656
659
|
end
|
|
657
660
|
def test_sjis_count_graph_line()
|
|
658
|
-
str = NKF.nkf("
|
|
661
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
659
662
|
str.encoding = "Shift_JIS"
|
|
660
663
|
str.eol = "CRLF"
|
|
661
664
|
expected = 3
|
|
662
665
|
assert_equal(expected, str.count_graph_line)
|
|
663
666
|
end
|
|
664
667
|
def test_sjis_count_empty_line()
|
|
665
|
-
str = NKF.nkf("
|
|
668
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
666
669
|
str.encoding = "Shift_JIS"
|
|
667
670
|
str.eol = "CRLF"
|
|
668
671
|
expected = 1
|
|
669
672
|
assert_equal(expected, str.count_empty_line)
|
|
670
673
|
end
|
|
671
674
|
def test_sjis_count_blank_line()
|
|
672
|
-
str = NKF.nkf("
|
|
675
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
673
676
|
str.encoding = "Shift_JIS"
|
|
674
677
|
str.eol = "CRLF"
|
|
675
678
|
expected = 2
|
|
@@ -678,176 +681,176 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
678
681
|
|
|
679
682
|
# test UTF8 module
|
|
680
683
|
def test_utf8_split_to_word()
|
|
681
|
-
str = NKF.nkf("
|
|
684
|
+
str = NKF.nkf("--utf8", "日本語の文字foo bar").extend CharString
|
|
682
685
|
str.encoding = "UTF-8"
|
|
683
|
-
expected = ["
|
|
686
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map{|c| NKF.nkf("--utf8", c)}
|
|
684
687
|
assert_equal(expected, str.split_to_word)
|
|
685
688
|
end
|
|
686
689
|
def test_utf8_split_to_word_kanhira()
|
|
687
|
-
str = NKF.nkf("
|
|
690
|
+
str = NKF.nkf("--utf8", "日本語の文字").extend CharString
|
|
688
691
|
str.encoding = "UTF-8"
|
|
689
|
-
expected = ["
|
|
692
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
690
693
|
assert_equal(expected, str.split_to_word)
|
|
691
694
|
end
|
|
692
695
|
def test_utf8_split_to_word_katahira()
|
|
693
|
-
str = NKF.nkf("
|
|
696
|
+
str = NKF.nkf("--utf8", "カタカナの文字").extend CharString
|
|
694
697
|
str.encoding = "UTF-8"
|
|
695
|
-
expected = ["
|
|
698
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
696
699
|
assert_equal(expected, str.split_to_word)
|
|
697
700
|
end
|
|
698
701
|
def test_utf8_split_to_word_kataonbiki()
|
|
699
|
-
str = NKF.nkf("
|
|
702
|
+
str = NKF.nkf("--utf8", "ルビーの指輪").extend CharString
|
|
700
703
|
str.encoding = "UTF-8"
|
|
701
|
-
expected = ["
|
|
704
|
+
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--utf8", c)}
|
|
702
705
|
assert_equal(expected, str.split_to_word)
|
|
703
706
|
end
|
|
704
707
|
def test_utf8_split_to_word_hiraonbiki()
|
|
705
|
-
str = NKF.nkf("
|
|
708
|
+
str = NKF.nkf("--utf8", "わールビーだ").extend CharString
|
|
706
709
|
str.encoding = "UTF-8"
|
|
707
|
-
expected = ["
|
|
710
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--utf8", c)}
|
|
708
711
|
assert_equal(expected, str.split_to_word)
|
|
709
712
|
end
|
|
710
713
|
def test_utf8_split_to_word_latinmix()
|
|
711
|
-
str = NKF.nkf("
|
|
714
|
+
str = NKF.nkf("--utf8", "日本語とLatinの文字").extend CharString
|
|
712
715
|
str.encoding = "UTF-8"
|
|
713
|
-
expected = ["
|
|
716
|
+
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
714
717
|
assert_equal(expected, str.split_to_word)
|
|
715
718
|
end
|
|
716
719
|
def test_utf8_split_to_char()
|
|
717
|
-
str = NKF.nkf("
|
|
720
|
+
str = NKF.nkf("--utf8", "日本語a b").extend CharString
|
|
718
721
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
719
722
|
str.eol = "LF" #<= needed to pass the test
|
|
720
|
-
expected = ["
|
|
723
|
+
expected = ["日", "本", "語", "a", " ", "b"].map{|c| NKF.nkf("--utf8", c)}
|
|
721
724
|
assert_equal(expected, str.split_to_char)
|
|
722
725
|
end
|
|
723
726
|
def test_utf8_split_to_char_with_cr()
|
|
724
|
-
str = NKF.nkf("
|
|
727
|
+
str = NKF.nkf("--utf8", "日本語a b\r").extend CharString
|
|
725
728
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
726
729
|
str.eol = "CR"
|
|
727
|
-
expected = ["
|
|
730
|
+
expected = ["日","本","語","a"," ","b","\r"].map{|c| NKF.nkf("--utf8", c)}
|
|
728
731
|
assert_equal(expected, str.split_to_char)
|
|
729
732
|
end
|
|
730
733
|
def test_utf8_split_to_char_with_lf()
|
|
731
|
-
str = NKF.nkf("
|
|
734
|
+
str = NKF.nkf("--utf8", "日本語a b\n").extend CharString
|
|
732
735
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
733
736
|
str.eol = "LF"
|
|
734
|
-
expected = ["
|
|
737
|
+
expected = ["日","本","語","a"," ","b","\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
735
738
|
assert_equal(expected, str.split_to_char)
|
|
736
739
|
end
|
|
737
740
|
def test_utf8_split_to_char_with_crlf()
|
|
738
|
-
str = NKF.nkf("
|
|
741
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
739
742
|
str.encoding = "UTF-8"#<= needed to pass the test
|
|
740
743
|
str.eol = "CRLF"
|
|
741
|
-
expected = ["
|
|
744
|
+
expected = ["日","本","語","a"," ","b","\r\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
742
745
|
assert_equal(expected, str.split_to_char)
|
|
743
746
|
end
|
|
744
747
|
def test_utf8_count_char()
|
|
745
|
-
str = NKF.nkf("
|
|
748
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
746
749
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
747
750
|
str.eol = "CRLF"
|
|
748
751
|
expected = 7
|
|
749
752
|
assert_equal(expected, str.count_char)
|
|
750
753
|
end
|
|
751
754
|
def test_utf8_count_latin_graph_char()
|
|
752
|
-
str = NKF.nkf("
|
|
755
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
753
756
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
754
757
|
str.eol = "CRLF"
|
|
755
758
|
expected = 2
|
|
756
759
|
assert_equal(expected, str.count_latin_graph_char)
|
|
757
760
|
end
|
|
758
761
|
def test_utf8_count_ja_graph_char()
|
|
759
|
-
str = NKF.nkf("
|
|
762
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
760
763
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
761
764
|
str.eol = "CRLF"
|
|
762
765
|
expected = 3
|
|
763
766
|
assert_equal(expected, str.count_ja_graph_char)
|
|
764
767
|
end
|
|
765
768
|
def test_utf8_count_graph_char()
|
|
766
|
-
str = NKF.nkf("
|
|
769
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
767
770
|
str.encoding = "UTF-8" #<= needed to passs the test
|
|
768
771
|
str.eol = "CRLF"
|
|
769
772
|
expected = 5
|
|
770
773
|
assert_equal(expected, str.count_graph_char)
|
|
771
774
|
end
|
|
772
775
|
def test_utf8_count_latin_blank_char()
|
|
773
|
-
str = NKF.nkf("
|
|
776
|
+
str = NKF.nkf("--utf8", "日本語\ta b\r\n").extend CharString
|
|
774
777
|
str.encoding = "UTF-8"
|
|
775
778
|
str.eol = "CRLF"
|
|
776
779
|
expected = 2
|
|
777
780
|
assert_equal(expected, str.count_latin_blank_char)
|
|
778
781
|
end
|
|
779
782
|
def test_utf8_count_ja_blank_char()
|
|
780
|
-
str = NKF.nkf("
|
|
783
|
+
str = NKF.nkf("--utf8", "日本 語\ta b\r\n").extend CharString
|
|
781
784
|
str.encoding = "UTF-8"
|
|
782
785
|
str.eol = "CRLF"
|
|
783
786
|
expected = 1
|
|
784
787
|
assert_equal(expected, str.count_ja_blank_char)
|
|
785
788
|
end
|
|
786
789
|
def test_utf8_count_blank_char()
|
|
787
|
-
str = NKF.nkf("
|
|
790
|
+
str = NKF.nkf("--utf8", "日本 語\ta b\r\n").extend CharString
|
|
788
791
|
str.encoding = "UTF-8"
|
|
789
792
|
str.eol = "CRLF"
|
|
790
793
|
expected = 3
|
|
791
794
|
assert_equal(expected, str.count_blank_char)
|
|
792
795
|
end
|
|
793
796
|
def test_utf8_count_word()
|
|
794
|
-
str = NKF.nkf("
|
|
797
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
795
798
|
str.encoding = "UTF-8"
|
|
796
799
|
str.eol = "CRLF"
|
|
797
800
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
798
801
|
assert_equal(expected, str.count_word)
|
|
799
802
|
end
|
|
800
803
|
def test_utf8_count_ja_word()
|
|
801
|
-
str = NKF.nkf("
|
|
804
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
802
805
|
str.encoding = "UTF-8"
|
|
803
806
|
str.eol = "CRLF"
|
|
804
807
|
expected = 3
|
|
805
808
|
assert_equal(expected, str.count_ja_word)
|
|
806
809
|
end
|
|
807
810
|
def test_utf8_count_latin_valid_word()
|
|
808
|
-
str = NKF.nkf("
|
|
811
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
809
812
|
str.encoding = "UTF-8"
|
|
810
813
|
str.eol = "CRLF"
|
|
811
814
|
expected = 2
|
|
812
815
|
assert_equal(expected, str.count_latin_valid_word)
|
|
813
816
|
end
|
|
814
817
|
def test_utf8_count_ja_valid_word()
|
|
815
|
-
str = NKF.nkf("
|
|
818
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
816
819
|
str.encoding = "UTF-8"
|
|
817
820
|
str.eol = "CRLF"
|
|
818
821
|
expected = 2
|
|
819
822
|
assert_equal(expected, str.count_ja_valid_word)
|
|
820
823
|
end
|
|
821
824
|
def test_utf8_count_valid_word()
|
|
822
|
-
str = NKF.nkf("
|
|
825
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
823
826
|
str.encoding = "UTF-8"
|
|
824
827
|
str.eol = "CRLF"
|
|
825
828
|
expected = 4
|
|
826
829
|
assert_equal(expected, str.count_valid_word)
|
|
827
830
|
end
|
|
828
831
|
def test_utf8_count_line()
|
|
829
|
-
str = NKF.nkf("
|
|
832
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
830
833
|
str.encoding = "UTF-8"
|
|
831
834
|
str.eol = "CRLF"
|
|
832
835
|
expected = 6
|
|
833
836
|
assert_equal(expected, str.count_line)
|
|
834
837
|
end
|
|
835
838
|
def test_utf8_count_graph_line()
|
|
836
|
-
str = NKF.nkf("
|
|
839
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
837
840
|
str.encoding = "UTF-8"
|
|
838
841
|
str.eol = "CRLF"
|
|
839
842
|
expected = 3
|
|
840
843
|
assert_equal(expected, str.count_graph_line)
|
|
841
844
|
end
|
|
842
845
|
def test_utf8_count_empty_line()
|
|
843
|
-
str = NKF.nkf("
|
|
846
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
844
847
|
str.encoding = "UTF-8"
|
|
845
848
|
str.eol = "CRLF"
|
|
846
849
|
expected = 1
|
|
847
850
|
assert_equal(expected, str.count_empty_line)
|
|
848
851
|
end
|
|
849
852
|
def test_utf8_count_blank_line()
|
|
850
|
-
str = NKF.nkf("
|
|
853
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
851
854
|
str.encoding = "UTF-8"
|
|
852
855
|
str.eol = "CRLF"
|
|
853
856
|
expected = 2
|
|
@@ -887,52 +890,52 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
887
890
|
end
|
|
888
891
|
# CharString.guess_encoding mistakes JIS for ASCII sometimes, due to Iconv.
|
|
889
892
|
# def test_guess_encoding_jis_1()
|
|
890
|
-
# str = NKF.nkf("
|
|
893
|
+
# str = NKF.nkf("--jis", "漢字とカタカナとひらがな\n")
|
|
891
894
|
# expected = "JIS"
|
|
892
895
|
# assert_guess_encoding(expected, str)
|
|
893
896
|
# end
|
|
894
897
|
# def test_guess_encoding_jis_2()
|
|
895
|
-
# str = NKF.nkf("
|
|
898
|
+
# str = NKF.nkf("--jis", "漢字とカタカナとひらがなとLatinの文字と空白( )と記号@\n" * 100)
|
|
896
899
|
# expected = "JIS"
|
|
897
900
|
# assert_guess_encoding(expected, str)
|
|
898
901
|
# end
|
|
899
902
|
def test_guess_encoding_eucjp_1()
|
|
900
|
-
str = NKF.nkf("
|
|
903
|
+
str = NKF.nkf("--euc", "日本語とLatinの文字")
|
|
901
904
|
expected = "EUC-JP"
|
|
902
905
|
assert_guess_encoding(expected, str)
|
|
903
906
|
end
|
|
904
907
|
def test_guess_encoding_eucjp_2()
|
|
905
|
-
str = NKF.nkf(
|
|
908
|
+
str = NKF.nkf("--euc", "漢字とカタカナとひらがなとLatinの文字と空白( )\n" * 10)
|
|
906
909
|
expected = "EUC-JP"
|
|
907
910
|
assert_guess_encoding(expected, str)
|
|
908
911
|
end
|
|
909
912
|
def test_guess_encoding_eucjp_3()
|
|
910
|
-
str = NKF.nkf(
|
|
913
|
+
str = NKF.nkf("--euc", "こんばんは、私の名前はまつもとです。\nRubyを作ったのは私です。私はRuby Hackerです。\n")
|
|
911
914
|
expected = "EUC-JP"
|
|
912
915
|
assert_guess_encoding(expected, str)
|
|
913
916
|
end
|
|
914
917
|
def test_guess_encoding_sjis_1()
|
|
915
|
-
str = NKF.nkf("
|
|
918
|
+
str = NKF.nkf("--sjis", "日本語とLatinの文字")
|
|
916
919
|
expected = "Shift_JIS"
|
|
917
920
|
assert_guess_encoding(expected, str)
|
|
918
921
|
end
|
|
919
922
|
def test_guess_encoding_sjis_2()
|
|
920
|
-
str = NKF.nkf(
|
|
923
|
+
str = NKF.nkf("--sjis", "漢字と\nカタカナと\nひらがなと\nLatin")
|
|
921
924
|
expected = "Shift_JIS"
|
|
922
925
|
assert_guess_encoding(expected, str)
|
|
923
926
|
end
|
|
924
927
|
def test_guess_encoding_cp932_1()
|
|
925
|
-
str = NKF.nkf(
|
|
928
|
+
str = NKF.nkf("--oc=CP932", "\\u2460") # CIRCLED DIGIT ONE
|
|
926
929
|
expected = "Windows-31J" # CP932 == Windows-31J in Ruby 1.9+
|
|
927
930
|
assert_guess_encoding(expected, str)
|
|
928
931
|
end
|
|
929
932
|
def test_guess_encoding_utf8_1()
|
|
930
|
-
str = NKF.nkf("
|
|
933
|
+
str = NKF.nkf("--utf8", "日本語とLatinの文字")
|
|
931
934
|
expected = "UTF-8"
|
|
932
935
|
assert_guess_encoding(expected, str)
|
|
933
936
|
end
|
|
934
937
|
def test_guess_encoding_utf8_2()
|
|
935
|
-
str = NKF.nkf("
|
|
938
|
+
str = NKF.nkf("--utf8", "いろは\nにほへと\n")
|
|
936
939
|
expected = "UTF-8"
|
|
937
940
|
assert_guess_encoding(expected, str)
|
|
938
941
|
end
|