docdiff 0.6.2 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +27 -49
- data/README.md +351 -0
- data/README_ja.md +351 -0
- data/Rakefile +2 -42
- data/bin/docdiff +53 -30
- data/{docdiff.conf.example → doc/example/docdiff.conf.example} +4 -3
- data/doc/man/docdiff.adoc +146 -0
- data/doc/news.md +180 -0
- data/doc/shell_completion/_docdiff.zsh +51 -0
- data/doc/shell_completion/docdiff.bash +68 -0
- data/docdiff.gemspec +1 -0
- data/lib/doc_diff.rb +13 -0
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +4 -4
- data/test/charstring_test.rb +121 -121
- data/test/docdiff_test.rb +1 -1
- data/test/document_test.rb +109 -109
- data/test/fixture/01_ja_utf8_lf.txt +2 -0
- data/test/fixture/02_ja_utf8_lf.txt +2 -0
- data/test/view_test.rb +135 -111
- metadata +39 -36
- data/devutil/changelog.sh +0 -40
- data/index.html +0 -181
- data/langfilter.rb +0 -10
- data/readme.html +0 -750
- data/readme.md +0 -185
- /data/{docdiffwebui.cgi → doc/example/docdiffwebui.cgi} +0 -0
- /data/{docdiffwebui.html → doc/example/docdiffwebui.html} +0 -0
- /data/{img/docdiff-screenshot-format-html-digest-firefox.png → doc/img/screenshot-format-html-digest-firefox.png} +0 -0
- /data/{img/docdiff-screenshot-format-html-firefox.png → doc/img/screenshot-format-html-firefox.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-cmdexe-en.png → doc/img/screenshot-format-tty-cmdexe-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-cmdexe-ja.png → doc/img/screenshot-format-tty-cmdexe-ja.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-rxvtunicode-en.png → doc/img/screenshot-format-tty-rxvtunicode-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-rxvtunicode-ja.png → doc/img/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-xterm-en.png → doc/img/screenshot-format-tty-xterm-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-xterm-ja.png → doc/img/screenshot-format-tty-xterm-ja.png} +0 -0
- /data/{img/docdiff-screenshot-resolution-linewordchar-xterm.png → doc/img/screenshot-resolution-linewordchar-xterm.png} +0 -0
- /data/{sample/01.en.ascii.cr → test/fixture/01_en_ascii_cr.txt} +0 -0
- /data/{sample/01.en.ascii.crlf → test/fixture/01_en_ascii_crlf.txt} +0 -0
- /data/{sample/01.en.ascii.lf → test/fixture/01_en_ascii_lf.txt} +0 -0
- /data/{sample/01.ja.eucjp.lf → test/fixture/01_ja_eucjp_lf.txt} +0 -0
- /data/{sample/01.ja.sjis.cr → test/fixture/01_ja_sjis_cr.txt} +0 -0
- /data/{sample/01.ja.sjis.crlf → test/fixture/01_ja_sjis_crlf.txt} +0 -0
- /data/{sample/01.ja.utf8.crlf → test/fixture/01_ja_utf8_crlf.txt} +0 -0
- /data/{sample/02.en.ascii.cr → test/fixture/02_en_ascii_cr.txt} +0 -0
- /data/{sample/02.en.ascii.crlf → test/fixture/02_en_ascii_crlf.txt} +0 -0
- /data/{sample/02.en.ascii.lf → test/fixture/02_en_ascii_lf.txt} +0 -0
- /data/{sample/02.ja.eucjp.lf → test/fixture/02_ja_eucjp_lf.txt} +0 -0
- /data/{sample/02.ja.sjis.cr → test/fixture/02_ja_sjis_cr.txt} +0 -0
- /data/{sample/02.ja.sjis.crlf → test/fixture/02_ja_sjis_crlf.txt} +0 -0
- /data/{sample/02.ja.utf8.crlf → test/fixture/02_ja_utf8_crlf.txt} +0 -0
- /data/{sample/humpty_dumpty01.ascii.lf → test/fixture/humpty_dumpty01_ascii_lf.txt} +0 -0
- /data/{sample/humpty_dumpty02.ascii.lf → test/fixture/humpty_dumpty02_ascii_lf.txt} +0 -0
data/test/charstring_test.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/ruby
|
|
2
|
-
# -*- coding:
|
|
2
|
+
# -*- coding: utf-8; -*-
|
|
3
3
|
|
|
4
4
|
# frozen_string_literal: false
|
|
5
5
|
|
|
@@ -90,12 +90,12 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
90
90
|
assert_equal(expected, str.eol_char)
|
|
91
91
|
end
|
|
92
92
|
def test_eol_char_none_eucjp()
|
|
93
|
-
str = NKF.nkf("
|
|
93
|
+
str = NKF.nkf("--euc", "日本語a b").extend CharString
|
|
94
94
|
expected = nil
|
|
95
95
|
assert_equal(expected, str.eol_char)
|
|
96
96
|
end
|
|
97
97
|
def test_eol_char_none_sjis()
|
|
98
|
-
str = NKF.nkf("
|
|
98
|
+
str = NKF.nkf("--sjis", "日本語a b").extend CharString
|
|
99
99
|
expected = nil
|
|
100
100
|
assert_equal(expected, str.eol_char)
|
|
101
101
|
end
|
|
@@ -325,176 +325,176 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
325
325
|
|
|
326
326
|
# test EUCJP module
|
|
327
327
|
def test_eucjp_split_to_word()
|
|
328
|
-
str = NKF.nkf("
|
|
328
|
+
str = NKF.nkf("--euc", "日本語の文字foo bar").extend CharString
|
|
329
329
|
str.encoding = "EUC-JP"
|
|
330
|
-
expected = ["
|
|
330
|
+
expected = ["日本語の","文字","foo ","bar"].map{|c| NKF.nkf("--euc", c)}
|
|
331
331
|
assert_equal(expected, str.split_to_word)
|
|
332
332
|
end
|
|
333
333
|
def test_eucjp_split_to_word_kanhira()
|
|
334
|
-
str = NKF.nkf("
|
|
334
|
+
str = NKF.nkf("--euc", "日本語の文字").extend CharString
|
|
335
335
|
str.encoding = "EUC-JP"
|
|
336
|
-
expected = ["
|
|
336
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
337
337
|
assert_equal(expected, str.split_to_word)
|
|
338
338
|
end
|
|
339
339
|
def test_eucjp_split_to_word_katahira()
|
|
340
|
-
str = NKF.nkf("
|
|
340
|
+
str = NKF.nkf("--euc", "カタカナの文字").extend CharString
|
|
341
341
|
str.encoding = "EUC-JP"
|
|
342
|
-
expected = ["
|
|
342
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
343
343
|
assert_equal(expected, str.split_to_word)
|
|
344
344
|
end
|
|
345
345
|
def test_eucjp_split_to_word_kataonbiki()
|
|
346
|
-
str = NKF.nkf("
|
|
346
|
+
str = NKF.nkf("--euc", "ルビー色の石").extend CharString
|
|
347
347
|
str.encoding = "EUC-JP" #<= needed to pass the test
|
|
348
|
-
expected = ["
|
|
348
|
+
expected = ["ルビー", "色の", "石"].map{|c| NKF.nkf("--euc", c)}
|
|
349
349
|
assert_equal(expected, str.split_to_word)
|
|
350
350
|
end
|
|
351
351
|
def test_eucjp_split_to_word_hiraonbiki()
|
|
352
|
-
str = NKF.nkf("
|
|
352
|
+
str = NKF.nkf("--euc", "わールビーだ").extend CharString
|
|
353
353
|
str.encoding = "EUC-JP" #<= needed to pass the test
|
|
354
|
-
expected = ["
|
|
354
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--euc", c)}
|
|
355
355
|
assert_equal(expected, str.split_to_word)
|
|
356
356
|
end
|
|
357
357
|
def test_eucjp_split_to_word_latinmix()
|
|
358
|
-
str = NKF.nkf("
|
|
358
|
+
str = NKF.nkf("--euc", "日本語とLatinの文字").extend CharString
|
|
359
359
|
str.encoding = "EUC-JP"
|
|
360
|
-
expected = ["
|
|
360
|
+
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
361
361
|
assert_equal(expected, str.split_to_word)
|
|
362
362
|
end
|
|
363
363
|
def test_eucjp_split_to_char()
|
|
364
|
-
str = NKF.nkf("
|
|
364
|
+
str = NKF.nkf("--euc", "日本語a b").extend CharString
|
|
365
365
|
str.encoding = "EUC-JP"
|
|
366
366
|
str.eol = "LF" #<= needed to pass the test
|
|
367
|
-
expected = ["
|
|
367
|
+
expected = ["日","本","語","a"," ","b"].map{|c|NKF.nkf("--euc",c)}
|
|
368
368
|
assert_equal(expected, str.split_to_char)
|
|
369
369
|
end
|
|
370
370
|
def test_eucjp_split_to_char_with_cr()
|
|
371
|
-
str = NKF.nkf("
|
|
371
|
+
str = NKF.nkf("--euc", "日本語a b\r").extend CharString
|
|
372
372
|
str.encoding = "EUC-JP"
|
|
373
373
|
str.eol = "CR"
|
|
374
|
-
expected = ["
|
|
374
|
+
expected = ["日","本","語","a"," ","b","\r"].map{|c|NKF.nkf("--euc",c)}
|
|
375
375
|
assert_equal(expected, str.split_to_char)
|
|
376
376
|
end
|
|
377
377
|
def test_eucjp_split_to_char_with_lf()
|
|
378
|
-
str = NKF.nkf("
|
|
378
|
+
str = NKF.nkf("--euc", "日本語a b\n").extend CharString
|
|
379
379
|
str.encoding = "EUC-JP"
|
|
380
380
|
str.eol = "LF"
|
|
381
|
-
expected = ["
|
|
381
|
+
expected = ["日","本","語","a"," ","b","\n"].map{|c|NKF.nkf("--euc",c)}
|
|
382
382
|
assert_equal(expected, str.split_to_char)
|
|
383
383
|
end
|
|
384
384
|
def test_eucjp_split_to_char_with_crlf()
|
|
385
|
-
str = NKF.nkf("
|
|
385
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
386
386
|
str.encoding = "EUC-JP"
|
|
387
387
|
str.eol = "CRLF"
|
|
388
|
-
expected = ["
|
|
388
|
+
expected = ["日","本","語","a"," ","b","\r\n"].map{|c|NKF.nkf("--euc",c)}
|
|
389
389
|
assert_equal(expected, str.split_to_char)
|
|
390
390
|
end
|
|
391
391
|
def test_eucjp_count_char()
|
|
392
|
-
str = NKF.nkf("
|
|
392
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
393
393
|
str.encoding = "EUC-JP"
|
|
394
394
|
str.eol = "CRLF"
|
|
395
395
|
expected = 7
|
|
396
396
|
assert_equal(expected, str.count_char)
|
|
397
397
|
end
|
|
398
398
|
def test_eucjp_count_latin_graph_char()
|
|
399
|
-
str = NKF.nkf("
|
|
399
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
400
400
|
str.encoding = "EUC-JP"
|
|
401
401
|
str.eol = "CRLF"
|
|
402
402
|
expected = 2
|
|
403
403
|
assert_equal(expected, str.count_latin_graph_char)
|
|
404
404
|
end
|
|
405
405
|
def test_eucjp_count_ja_graph_char()
|
|
406
|
-
str = NKF.nkf("
|
|
406
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
407
407
|
str.encoding = "EUC-JP"
|
|
408
408
|
str.eol = "CRLF"
|
|
409
409
|
expected = 3
|
|
410
410
|
assert_equal(expected, str.count_ja_graph_char)
|
|
411
411
|
end
|
|
412
412
|
def test_eucjp_count_graph_char()
|
|
413
|
-
str = NKF.nkf("
|
|
413
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend CharString
|
|
414
414
|
str.encoding = "EUC-JP"
|
|
415
415
|
str.eol = "CRLF"
|
|
416
416
|
expected = 5
|
|
417
417
|
assert_equal(expected, str.count_graph_char)
|
|
418
418
|
end
|
|
419
419
|
def test_eucjp_count_latin_blank_char()
|
|
420
|
-
str = NKF.nkf("
|
|
420
|
+
str = NKF.nkf("--euc", "日本語\ta b\r\n").extend CharString
|
|
421
421
|
str.encoding = "EUC-JP"
|
|
422
422
|
str.eol = "CRLF"
|
|
423
423
|
expected = 2
|
|
424
424
|
assert_equal(expected, str.count_latin_blank_char)
|
|
425
425
|
end
|
|
426
426
|
def test_eucjp_count_ja_blank_char()
|
|
427
|
-
str = NKF.nkf("
|
|
427
|
+
str = NKF.nkf("--euc", "日本 語\ta b\r\n").extend CharString
|
|
428
428
|
str.encoding = "EUC-JP"
|
|
429
429
|
str.eol = "CRLF"
|
|
430
430
|
expected = 1
|
|
431
431
|
assert_equal(expected, str.count_ja_blank_char)
|
|
432
432
|
end
|
|
433
433
|
def test_eucjp_count_blank_char()
|
|
434
|
-
str = NKF.nkf("
|
|
434
|
+
str = NKF.nkf("--euc", "日本 語\ta b\r\n").extend CharString
|
|
435
435
|
str.encoding = "EUC-JP"
|
|
436
436
|
str.eol = "CRLF"
|
|
437
437
|
expected = 3
|
|
438
438
|
assert_equal(expected, str.count_blank_char)
|
|
439
439
|
end
|
|
440
440
|
def test_eucjp_count_word()
|
|
441
|
-
str = NKF.nkf("
|
|
441
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
442
442
|
str.encoding = "EUC-JP"
|
|
443
443
|
str.eol = "CRLF"
|
|
444
444
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
445
445
|
assert_equal(expected, str.count_word)
|
|
446
446
|
end
|
|
447
447
|
def test_eucjp_count_ja_word()
|
|
448
|
-
str = NKF.nkf("
|
|
448
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
449
449
|
str.encoding = "EUC-JP"
|
|
450
450
|
str.eol = "CRLF"
|
|
451
451
|
expected = 3
|
|
452
452
|
assert_equal(expected, str.count_ja_word)
|
|
453
453
|
end
|
|
454
454
|
def test_eucjp_count_latin_valid_word()
|
|
455
|
-
str = NKF.nkf("
|
|
455
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
456
456
|
str.encoding = "EUC-JP"
|
|
457
457
|
str.eol = "CRLF"
|
|
458
458
|
expected = 2
|
|
459
459
|
assert_equal(expected, str.count_latin_valid_word)
|
|
460
460
|
end
|
|
461
461
|
def test_eucjp_count_ja_valid_word()
|
|
462
|
-
str = NKF.nkf("
|
|
462
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
463
463
|
str.encoding = "EUC-JP"
|
|
464
464
|
str.eol = "CRLF"
|
|
465
465
|
expected = 2
|
|
466
466
|
assert_equal(expected, str.count_ja_valid_word)
|
|
467
467
|
end
|
|
468
468
|
def test_eucjp_count_valid_word()
|
|
469
|
-
str = NKF.nkf("
|
|
469
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend CharString
|
|
470
470
|
str.encoding = "EUC-JP"
|
|
471
471
|
str.eol = "CRLF"
|
|
472
472
|
expected = 4
|
|
473
473
|
assert_equal(expected, str.count_valid_word)
|
|
474
474
|
end
|
|
475
475
|
def test_eucjp_count_line()
|
|
476
|
-
str = NKF.nkf("
|
|
476
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
477
477
|
str.encoding = "EUC-JP"
|
|
478
478
|
str.eol = "CRLF"
|
|
479
479
|
expected = 6
|
|
480
480
|
assert_equal(expected, str.count_line)
|
|
481
481
|
end
|
|
482
482
|
def test_eucjp_count_graph_line()
|
|
483
|
-
str = NKF.nkf("
|
|
483
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
484
484
|
str.encoding = "EUC-JP"
|
|
485
485
|
str.eol = "CRLF"
|
|
486
486
|
expected = 3
|
|
487
487
|
assert_equal(expected, str.count_graph_line)
|
|
488
488
|
end
|
|
489
489
|
def test_eucjp_count_empty_line()
|
|
490
|
-
str = NKF.nkf("
|
|
490
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
491
491
|
str.encoding = "EUC-JP"
|
|
492
492
|
str.eol = "CRLF"
|
|
493
493
|
expected = 1
|
|
494
494
|
assert_equal(expected, str.count_empty_line)
|
|
495
495
|
end
|
|
496
496
|
def test_eucjp_count_blank_line()
|
|
497
|
-
str = NKF.nkf("
|
|
497
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
498
498
|
str.encoding = "EUC-JP"
|
|
499
499
|
str.eol = "CRLF"
|
|
500
500
|
expected = 2
|
|
@@ -503,176 +503,176 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
503
503
|
|
|
504
504
|
# test SJIS module
|
|
505
505
|
def test_sjis_split_to_word()
|
|
506
|
-
str = NKF.nkf("
|
|
506
|
+
str = NKF.nkf("--sjis", "日本語の文字foo bar").extend CharString
|
|
507
507
|
str.encoding = "Shift_JIS"
|
|
508
|
-
expected = ["
|
|
508
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map{|c|NKF.nkf("--sjis",c)}
|
|
509
509
|
assert_equal(expected, str.split_to_word)
|
|
510
510
|
end
|
|
511
511
|
def test_sjisplit_s_to_word_kanhira()
|
|
512
|
-
str = NKF.nkf("
|
|
512
|
+
str = NKF.nkf("--sjis", "日本語の文字").extend CharString
|
|
513
513
|
str.encoding = "Shift_JIS"
|
|
514
|
-
expected = ["
|
|
514
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
515
515
|
assert_equal(expected, str.split_to_word)
|
|
516
516
|
end
|
|
517
517
|
def test_sjis_split_to_word_katahira()
|
|
518
|
-
str = NKF.nkf("
|
|
518
|
+
str = NKF.nkf("--sjis", "カタカナの文字").extend CharString
|
|
519
519
|
str.encoding = "Shift_JIS"
|
|
520
|
-
expected = ["
|
|
520
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
521
521
|
assert_equal(expected, str.split_to_word)
|
|
522
522
|
end
|
|
523
523
|
def test_sjis_split_to_word_kataonbiki()
|
|
524
|
-
str = NKF.nkf("
|
|
524
|
+
str = NKF.nkf("--sjis", "ルビーの指輪").extend CharString
|
|
525
525
|
str.encoding = "Shift_JIS"
|
|
526
|
-
expected = ["
|
|
526
|
+
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--sjis", c)}
|
|
527
527
|
assert_equal(expected, str.split_to_word)
|
|
528
528
|
end
|
|
529
529
|
def test_sjis_split_to_word_hiraonbiki()
|
|
530
|
-
str = NKF.nkf("
|
|
530
|
+
str = NKF.nkf("--sjis", "わールビーだ").extend CharString
|
|
531
531
|
str.encoding = "Shift_JIS"
|
|
532
|
-
expected = ["
|
|
532
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--sjis", c)}
|
|
533
533
|
assert_equal(expected, str.split_to_word)
|
|
534
534
|
end
|
|
535
535
|
def test_sjis_split_to_word_latinmix()
|
|
536
|
-
str = NKF.nkf("
|
|
536
|
+
str = NKF.nkf("--sjis", "日本語とLatinの文字").extend CharString
|
|
537
537
|
str.encoding = "Shift_JIS"
|
|
538
|
-
expected = ["
|
|
538
|
+
expected = ["日本語と","Latin","の","文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
539
539
|
assert_equal(expected, str.split_to_word)
|
|
540
540
|
end
|
|
541
541
|
def test_sjis_split_to_char()
|
|
542
|
-
str = NKF.nkf("
|
|
542
|
+
str = NKF.nkf("--sjis", "表計算a b").extend CharString
|
|
543
543
|
str.encoding = "Shift_JIS"
|
|
544
544
|
str.eol = "LF" #<= needed to pass the test
|
|
545
|
-
expected = ["
|
|
545
|
+
expected = ["表","計","算","a"," ","b"].map{|c|NKF.nkf("--sjis",c)}
|
|
546
546
|
assert_equal(expected, str.split_to_char)
|
|
547
547
|
end
|
|
548
548
|
def test_sjis_split_to_char_with_cr()
|
|
549
|
-
str = NKF.nkf("
|
|
549
|
+
str = NKF.nkf("--sjis", "表計算a b\r").extend CharString
|
|
550
550
|
str.encoding = "Shift_JIS"
|
|
551
551
|
str.eol = "CR"
|
|
552
|
-
expected = ["
|
|
552
|
+
expected = ["表","計","算","a"," ","b","\r"].map{|c|NKF.nkf("--sjis",c)}
|
|
553
553
|
assert_equal(expected, str.split_to_char)
|
|
554
554
|
end
|
|
555
555
|
def test_sjis_split_to_char_with_lf()
|
|
556
|
-
str = NKF.nkf("
|
|
556
|
+
str = NKF.nkf("--sjis", "表計算a b\n").extend CharString
|
|
557
557
|
str.encoding = "Shift_JIS"
|
|
558
558
|
str.eol = "LF"
|
|
559
|
-
expected = ["
|
|
559
|
+
expected = ["表","計","算","a"," ","b","\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
560
560
|
assert_equal(expected, str.split_to_char)
|
|
561
561
|
end
|
|
562
562
|
def test_sjis_split_to_char_with_crlf()
|
|
563
|
-
str = NKF.nkf("
|
|
563
|
+
str = NKF.nkf("--sjis", "表計算a b\r\n").extend CharString
|
|
564
564
|
str.encoding = "Shift_JIS"
|
|
565
565
|
str.eol = "CRLF"
|
|
566
|
-
expected = ["
|
|
566
|
+
expected = ["表","計","算","a"," ","b","\r\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
567
567
|
assert_equal(expected, str.split_to_char)
|
|
568
568
|
end
|
|
569
569
|
def test_sjis_count_char()
|
|
570
|
-
str = NKF.nkf("
|
|
570
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend CharString
|
|
571
571
|
str.encoding = "Shift_JIS"
|
|
572
572
|
str.eol = "CRLF"
|
|
573
573
|
expected = 7
|
|
574
574
|
assert_equal(expected, str.count_char)
|
|
575
575
|
end
|
|
576
576
|
def test_sjis_count_latin_graph_char()
|
|
577
|
-
str = NKF.nkf("
|
|
577
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend CharString
|
|
578
578
|
str.encoding = "Shift_JIS"
|
|
579
579
|
str.eol = "CRLF"
|
|
580
580
|
expected = 2
|
|
581
581
|
assert_equal(expected, str.count_latin_graph_char)
|
|
582
582
|
end
|
|
583
583
|
def test_sjis_count_ja_graph_char()
|
|
584
|
-
str = NKF.nkf("
|
|
584
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend CharString
|
|
585
585
|
str.encoding = "Shift_JIS"
|
|
586
586
|
str.eol = "CRLF"
|
|
587
587
|
expected = 3
|
|
588
588
|
assert_equal(expected, str.count_ja_graph_char)
|
|
589
589
|
end
|
|
590
590
|
def test_sjis_count_graph_char()
|
|
591
|
-
str = NKF.nkf("
|
|
591
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend CharString
|
|
592
592
|
str.encoding = "Shift_JIS"
|
|
593
593
|
str.eol = "CRLF"
|
|
594
594
|
expected = 5
|
|
595
595
|
assert_equal(expected, str.count_graph_char)
|
|
596
596
|
end
|
|
597
597
|
def test_sjis_count_latin_blank_char()
|
|
598
|
-
str = NKF.nkf("
|
|
598
|
+
str = NKF.nkf("--sjis", "日本語\ta b\r\n").extend CharString
|
|
599
599
|
str.encoding = "Shift_JIS"
|
|
600
600
|
str.eol = "CRLF"
|
|
601
601
|
expected = 2
|
|
602
602
|
assert_equal(expected, str.count_latin_blank_char)
|
|
603
603
|
end
|
|
604
604
|
def test_sjis_count_ja_blank_char()
|
|
605
|
-
str = NKF.nkf("
|
|
605
|
+
str = NKF.nkf("--sjis", "日本 語\ta b\r\n").extend CharString
|
|
606
606
|
str.encoding = "Shift_JIS"
|
|
607
607
|
str.eol = "CRLF"
|
|
608
608
|
expected = 1
|
|
609
609
|
assert_equal(expected, str.count_ja_blank_char)
|
|
610
610
|
end
|
|
611
611
|
def test_sjis_count_blank_char()
|
|
612
|
-
str = NKF.nkf("
|
|
612
|
+
str = NKF.nkf("--sjis", "日本 語\ta b\r\n").extend CharString
|
|
613
613
|
str.encoding = "Shift_JIS"
|
|
614
614
|
str.eol = "CRLF"
|
|
615
615
|
expected = 3
|
|
616
616
|
assert_equal(expected, str.count_blank_char)
|
|
617
617
|
end
|
|
618
618
|
def test_sjis_count_word()
|
|
619
|
-
str = NKF.nkf("
|
|
619
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
620
620
|
str.encoding = "Shift_JIS"
|
|
621
621
|
str.eol = "CRLF"
|
|
622
622
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
623
623
|
assert_equal(expected, str.count_word)
|
|
624
624
|
end
|
|
625
625
|
def test_sjis_count_ja_word()
|
|
626
|
-
str = NKF.nkf("
|
|
626
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
627
627
|
str.encoding = "Shift_JIS"
|
|
628
628
|
str.eol = "CRLF"
|
|
629
629
|
expected = 3
|
|
630
630
|
assert_equal(expected, str.count_ja_word)
|
|
631
631
|
end
|
|
632
632
|
def test_sjis_count_latin_valid_word()
|
|
633
|
-
str = NKF.nkf("
|
|
633
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
634
634
|
str.encoding = "Shift_JIS"
|
|
635
635
|
str.eol = "CRLF"
|
|
636
636
|
expected = 2
|
|
637
637
|
assert_equal(expected, str.count_latin_valid_word)
|
|
638
638
|
end
|
|
639
639
|
def test_sjis_count_ja_valid_word()
|
|
640
|
-
str = NKF.nkf("
|
|
640
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
641
641
|
str.encoding = "Shift_JIS"
|
|
642
642
|
str.eol = "CRLF"
|
|
643
643
|
expected = 2
|
|
644
644
|
assert_equal(expected, str.count_ja_valid_word)
|
|
645
645
|
end
|
|
646
646
|
def test_sjis_count_valid_word()
|
|
647
|
-
str = NKF.nkf("
|
|
647
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend CharString
|
|
648
648
|
str.encoding = "Shift_JIS"
|
|
649
649
|
str.eol = "CRLF"
|
|
650
650
|
expected = 4
|
|
651
651
|
assert_equal(expected, str.count_valid_word)
|
|
652
652
|
end
|
|
653
653
|
def test_sjis_count_line()
|
|
654
|
-
str = NKF.nkf("
|
|
654
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
655
655
|
str.encoding = "Shift_JIS"
|
|
656
656
|
str.eol = "CRLF"
|
|
657
657
|
expected = 6
|
|
658
658
|
assert_equal(expected, str.count_line)
|
|
659
659
|
end
|
|
660
660
|
def test_sjis_count_graph_line()
|
|
661
|
-
str = NKF.nkf("
|
|
661
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
662
662
|
str.encoding = "Shift_JIS"
|
|
663
663
|
str.eol = "CRLF"
|
|
664
664
|
expected = 3
|
|
665
665
|
assert_equal(expected, str.count_graph_line)
|
|
666
666
|
end
|
|
667
667
|
def test_sjis_count_empty_line()
|
|
668
|
-
str = NKF.nkf("
|
|
668
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
669
669
|
str.encoding = "Shift_JIS"
|
|
670
670
|
str.eol = "CRLF"
|
|
671
671
|
expected = 1
|
|
672
672
|
assert_equal(expected, str.count_empty_line)
|
|
673
673
|
end
|
|
674
674
|
def test_sjis_count_blank_line()
|
|
675
|
-
str = NKF.nkf("
|
|
675
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
676
676
|
str.encoding = "Shift_JIS"
|
|
677
677
|
str.eol = "CRLF"
|
|
678
678
|
expected = 2
|
|
@@ -681,176 +681,176 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
681
681
|
|
|
682
682
|
# test UTF8 module
|
|
683
683
|
def test_utf8_split_to_word()
|
|
684
|
-
str = NKF.nkf("
|
|
684
|
+
str = NKF.nkf("--utf8", "日本語の文字foo bar").extend CharString
|
|
685
685
|
str.encoding = "UTF-8"
|
|
686
|
-
expected = ["
|
|
686
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map{|c| NKF.nkf("--utf8", c)}
|
|
687
687
|
assert_equal(expected, str.split_to_word)
|
|
688
688
|
end
|
|
689
689
|
def test_utf8_split_to_word_kanhira()
|
|
690
|
-
str = NKF.nkf("
|
|
690
|
+
str = NKF.nkf("--utf8", "日本語の文字").extend CharString
|
|
691
691
|
str.encoding = "UTF-8"
|
|
692
|
-
expected = ["
|
|
692
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
693
693
|
assert_equal(expected, str.split_to_word)
|
|
694
694
|
end
|
|
695
695
|
def test_utf8_split_to_word_katahira()
|
|
696
|
-
str = NKF.nkf("
|
|
696
|
+
str = NKF.nkf("--utf8", "カタカナの文字").extend CharString
|
|
697
697
|
str.encoding = "UTF-8"
|
|
698
|
-
expected = ["
|
|
698
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
699
699
|
assert_equal(expected, str.split_to_word)
|
|
700
700
|
end
|
|
701
701
|
def test_utf8_split_to_word_kataonbiki()
|
|
702
|
-
str = NKF.nkf("
|
|
702
|
+
str = NKF.nkf("--utf8", "ルビーの指輪").extend CharString
|
|
703
703
|
str.encoding = "UTF-8"
|
|
704
|
-
expected = ["
|
|
704
|
+
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--utf8", c)}
|
|
705
705
|
assert_equal(expected, str.split_to_word)
|
|
706
706
|
end
|
|
707
707
|
def test_utf8_split_to_word_hiraonbiki()
|
|
708
|
-
str = NKF.nkf("
|
|
708
|
+
str = NKF.nkf("--utf8", "わールビーだ").extend CharString
|
|
709
709
|
str.encoding = "UTF-8"
|
|
710
|
-
expected = ["
|
|
710
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--utf8", c)}
|
|
711
711
|
assert_equal(expected, str.split_to_word)
|
|
712
712
|
end
|
|
713
713
|
def test_utf8_split_to_word_latinmix()
|
|
714
|
-
str = NKF.nkf("
|
|
714
|
+
str = NKF.nkf("--utf8", "日本語とLatinの文字").extend CharString
|
|
715
715
|
str.encoding = "UTF-8"
|
|
716
|
-
expected = ["
|
|
716
|
+
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
717
717
|
assert_equal(expected, str.split_to_word)
|
|
718
718
|
end
|
|
719
719
|
def test_utf8_split_to_char()
|
|
720
|
-
str = NKF.nkf("
|
|
720
|
+
str = NKF.nkf("--utf8", "日本語a b").extend CharString
|
|
721
721
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
722
722
|
str.eol = "LF" #<= needed to pass the test
|
|
723
|
-
expected = ["
|
|
723
|
+
expected = ["日", "本", "語", "a", " ", "b"].map{|c| NKF.nkf("--utf8", c)}
|
|
724
724
|
assert_equal(expected, str.split_to_char)
|
|
725
725
|
end
|
|
726
726
|
def test_utf8_split_to_char_with_cr()
|
|
727
|
-
str = NKF.nkf("
|
|
727
|
+
str = NKF.nkf("--utf8", "日本語a b\r").extend CharString
|
|
728
728
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
729
729
|
str.eol = "CR"
|
|
730
|
-
expected = ["
|
|
730
|
+
expected = ["日","本","語","a"," ","b","\r"].map{|c| NKF.nkf("--utf8", c)}
|
|
731
731
|
assert_equal(expected, str.split_to_char)
|
|
732
732
|
end
|
|
733
733
|
def test_utf8_split_to_char_with_lf()
|
|
734
|
-
str = NKF.nkf("
|
|
734
|
+
str = NKF.nkf("--utf8", "日本語a b\n").extend CharString
|
|
735
735
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
736
736
|
str.eol = "LF"
|
|
737
|
-
expected = ["
|
|
737
|
+
expected = ["日","本","語","a"," ","b","\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
738
738
|
assert_equal(expected, str.split_to_char)
|
|
739
739
|
end
|
|
740
740
|
def test_utf8_split_to_char_with_crlf()
|
|
741
|
-
str = NKF.nkf("
|
|
741
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
742
742
|
str.encoding = "UTF-8"#<= needed to pass the test
|
|
743
743
|
str.eol = "CRLF"
|
|
744
|
-
expected = ["
|
|
744
|
+
expected = ["日","本","語","a"," ","b","\r\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
745
745
|
assert_equal(expected, str.split_to_char)
|
|
746
746
|
end
|
|
747
747
|
def test_utf8_count_char()
|
|
748
|
-
str = NKF.nkf("
|
|
748
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
749
749
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
750
750
|
str.eol = "CRLF"
|
|
751
751
|
expected = 7
|
|
752
752
|
assert_equal(expected, str.count_char)
|
|
753
753
|
end
|
|
754
754
|
def test_utf8_count_latin_graph_char()
|
|
755
|
-
str = NKF.nkf("
|
|
755
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
756
756
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
757
757
|
str.eol = "CRLF"
|
|
758
758
|
expected = 2
|
|
759
759
|
assert_equal(expected, str.count_latin_graph_char)
|
|
760
760
|
end
|
|
761
761
|
def test_utf8_count_ja_graph_char()
|
|
762
|
-
str = NKF.nkf("
|
|
762
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
763
763
|
str.encoding = "UTF-8" #<= needed to pass the test
|
|
764
764
|
str.eol = "CRLF"
|
|
765
765
|
expected = 3
|
|
766
766
|
assert_equal(expected, str.count_ja_graph_char)
|
|
767
767
|
end
|
|
768
768
|
def test_utf8_count_graph_char()
|
|
769
|
-
str = NKF.nkf("
|
|
769
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend CharString
|
|
770
770
|
str.encoding = "UTF-8" #<= needed to passs the test
|
|
771
771
|
str.eol = "CRLF"
|
|
772
772
|
expected = 5
|
|
773
773
|
assert_equal(expected, str.count_graph_char)
|
|
774
774
|
end
|
|
775
775
|
def test_utf8_count_latin_blank_char()
|
|
776
|
-
str = NKF.nkf("
|
|
776
|
+
str = NKF.nkf("--utf8", "日本語\ta b\r\n").extend CharString
|
|
777
777
|
str.encoding = "UTF-8"
|
|
778
778
|
str.eol = "CRLF"
|
|
779
779
|
expected = 2
|
|
780
780
|
assert_equal(expected, str.count_latin_blank_char)
|
|
781
781
|
end
|
|
782
782
|
def test_utf8_count_ja_blank_char()
|
|
783
|
-
str = NKF.nkf("
|
|
783
|
+
str = NKF.nkf("--utf8", "日本 語\ta b\r\n").extend CharString
|
|
784
784
|
str.encoding = "UTF-8"
|
|
785
785
|
str.eol = "CRLF"
|
|
786
786
|
expected = 1
|
|
787
787
|
assert_equal(expected, str.count_ja_blank_char)
|
|
788
788
|
end
|
|
789
789
|
def test_utf8_count_blank_char()
|
|
790
|
-
str = NKF.nkf("
|
|
790
|
+
str = NKF.nkf("--utf8", "日本 語\ta b\r\n").extend CharString
|
|
791
791
|
str.encoding = "UTF-8"
|
|
792
792
|
str.eol = "CRLF"
|
|
793
793
|
expected = 3
|
|
794
794
|
assert_equal(expected, str.count_blank_char)
|
|
795
795
|
end
|
|
796
796
|
def test_utf8_count_word()
|
|
797
|
-
str = NKF.nkf("
|
|
797
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
798
798
|
str.encoding = "UTF-8"
|
|
799
799
|
str.eol = "CRLF"
|
|
800
800
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
801
801
|
assert_equal(expected, str.count_word)
|
|
802
802
|
end
|
|
803
803
|
def test_utf8_count_ja_word()
|
|
804
|
-
str = NKF.nkf("
|
|
804
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
805
805
|
str.encoding = "UTF-8"
|
|
806
806
|
str.eol = "CRLF"
|
|
807
807
|
expected = 3
|
|
808
808
|
assert_equal(expected, str.count_ja_word)
|
|
809
809
|
end
|
|
810
810
|
def test_utf8_count_latin_valid_word()
|
|
811
|
-
str = NKF.nkf("
|
|
811
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
812
812
|
str.encoding = "UTF-8"
|
|
813
813
|
str.eol = "CRLF"
|
|
814
814
|
expected = 2
|
|
815
815
|
assert_equal(expected, str.count_latin_valid_word)
|
|
816
816
|
end
|
|
817
817
|
def test_utf8_count_ja_valid_word()
|
|
818
|
-
str = NKF.nkf("
|
|
818
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
819
819
|
str.encoding = "UTF-8"
|
|
820
820
|
str.eol = "CRLF"
|
|
821
821
|
expected = 2
|
|
822
822
|
assert_equal(expected, str.count_ja_valid_word)
|
|
823
823
|
end
|
|
824
824
|
def test_utf8_count_valid_word()
|
|
825
|
-
str = NKF.nkf("
|
|
825
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend CharString
|
|
826
826
|
str.encoding = "UTF-8"
|
|
827
827
|
str.eol = "CRLF"
|
|
828
828
|
expected = 4
|
|
829
829
|
assert_equal(expected, str.count_valid_word)
|
|
830
830
|
end
|
|
831
831
|
def test_utf8_count_line()
|
|
832
|
-
str = NKF.nkf("
|
|
832
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
833
833
|
str.encoding = "UTF-8"
|
|
834
834
|
str.eol = "CRLF"
|
|
835
835
|
expected = 6
|
|
836
836
|
assert_equal(expected, str.count_line)
|
|
837
837
|
end
|
|
838
838
|
def test_utf8_count_graph_line()
|
|
839
|
-
str = NKF.nkf("
|
|
839
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
840
840
|
str.encoding = "UTF-8"
|
|
841
841
|
str.eol = "CRLF"
|
|
842
842
|
expected = 3
|
|
843
843
|
assert_equal(expected, str.count_graph_line)
|
|
844
844
|
end
|
|
845
845
|
def test_utf8_count_empty_line()
|
|
846
|
-
str = NKF.nkf("
|
|
846
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
847
847
|
str.encoding = "UTF-8"
|
|
848
848
|
str.eol = "CRLF"
|
|
849
849
|
expected = 1
|
|
850
850
|
assert_equal(expected, str.count_empty_line)
|
|
851
851
|
end
|
|
852
852
|
def test_utf8_count_blank_line()
|
|
853
|
-
str = NKF.nkf("
|
|
853
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend CharString
|
|
854
854
|
str.encoding = "UTF-8"
|
|
855
855
|
str.eol = "CRLF"
|
|
856
856
|
expected = 2
|
|
@@ -890,52 +890,52 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
890
890
|
end
|
|
891
891
|
# CharString.guess_encoding mistakes JIS for ASCII sometimes, due to Iconv.
|
|
892
892
|
# def test_guess_encoding_jis_1()
|
|
893
|
-
# str = NKF.nkf("
|
|
893
|
+
# str = NKF.nkf("--jis", "漢字とカタカナとひらがな\n")
|
|
894
894
|
# expected = "JIS"
|
|
895
895
|
# assert_guess_encoding(expected, str)
|
|
896
896
|
# end
|
|
897
897
|
# def test_guess_encoding_jis_2()
|
|
898
|
-
# str = NKF.nkf("
|
|
898
|
+
# str = NKF.nkf("--jis", "漢字とカタカナとひらがなとLatinの文字と空白( )と記号@\n" * 100)
|
|
899
899
|
# expected = "JIS"
|
|
900
900
|
# assert_guess_encoding(expected, str)
|
|
901
901
|
# end
|
|
902
902
|
def test_guess_encoding_eucjp_1()
|
|
903
|
-
str = NKF.nkf("
|
|
903
|
+
str = NKF.nkf("--euc", "日本語とLatinの文字")
|
|
904
904
|
expected = "EUC-JP"
|
|
905
905
|
assert_guess_encoding(expected, str)
|
|
906
906
|
end
|
|
907
907
|
def test_guess_encoding_eucjp_2()
|
|
908
|
-
str = NKF.nkf(
|
|
908
|
+
str = NKF.nkf("--euc", "漢字とカタカナとひらがなとLatinの文字と空白( )\n" * 10)
|
|
909
909
|
expected = "EUC-JP"
|
|
910
910
|
assert_guess_encoding(expected, str)
|
|
911
911
|
end
|
|
912
912
|
def test_guess_encoding_eucjp_3()
|
|
913
|
-
str = NKF.nkf(
|
|
913
|
+
str = NKF.nkf("--euc", "こんばんは、私の名前はまつもとです。\nRubyを作ったのは私です。私はRuby Hackerです。\n")
|
|
914
914
|
expected = "EUC-JP"
|
|
915
915
|
assert_guess_encoding(expected, str)
|
|
916
916
|
end
|
|
917
917
|
def test_guess_encoding_sjis_1()
|
|
918
|
-
str = NKF.nkf("
|
|
918
|
+
str = NKF.nkf("--sjis", "日本語とLatinの文字")
|
|
919
919
|
expected = "Shift_JIS"
|
|
920
920
|
assert_guess_encoding(expected, str)
|
|
921
921
|
end
|
|
922
922
|
def test_guess_encoding_sjis_2()
|
|
923
|
-
str = NKF.nkf(
|
|
923
|
+
str = NKF.nkf("--sjis", "漢字と\nカタカナと\nひらがなと\nLatin")
|
|
924
924
|
expected = "Shift_JIS"
|
|
925
925
|
assert_guess_encoding(expected, str)
|
|
926
926
|
end
|
|
927
927
|
def test_guess_encoding_cp932_1()
|
|
928
|
-
str = NKF.nkf(
|
|
928
|
+
str = NKF.nkf("--oc=CP932", "\\u2460") # CIRCLED DIGIT ONE
|
|
929
929
|
expected = "Windows-31J" # CP932 == Windows-31J in Ruby 1.9+
|
|
930
930
|
assert_guess_encoding(expected, str)
|
|
931
931
|
end
|
|
932
932
|
def test_guess_encoding_utf8_1()
|
|
933
|
-
str = NKF.nkf("
|
|
933
|
+
str = NKF.nkf("--utf8", "日本語とLatinの文字")
|
|
934
934
|
expected = "UTF-8"
|
|
935
935
|
assert_guess_encoding(expected, str)
|
|
936
936
|
end
|
|
937
937
|
def test_guess_encoding_utf8_2()
|
|
938
|
-
str = NKF.nkf("
|
|
938
|
+
str = NKF.nkf("--utf8", "いろは\nにほへと\n")
|
|
939
939
|
expected = "UTF-8"
|
|
940
940
|
assert_guess_encoding(expected, str)
|
|
941
941
|
end
|