docdiff 0.6.2 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +27 -49
- data/README.md +351 -0
- data/README_ja.md +351 -0
- data/Rakefile +2 -42
- data/bin/docdiff +53 -30
- data/{docdiff.conf.example → doc/example/docdiff.conf.example} +4 -3
- data/doc/man/docdiff.adoc +146 -0
- data/doc/news.md +180 -0
- data/doc/shell_completion/_docdiff.zsh +51 -0
- data/doc/shell_completion/docdiff.bash +68 -0
- data/docdiff.gemspec +1 -0
- data/lib/doc_diff.rb +13 -0
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +4 -4
- data/test/charstring_test.rb +121 -121
- data/test/docdiff_test.rb +1 -1
- data/test/document_test.rb +109 -109
- data/test/fixture/01_ja_utf8_lf.txt +2 -0
- data/test/fixture/02_ja_utf8_lf.txt +2 -0
- data/test/view_test.rb +135 -111
- metadata +39 -36
- data/devutil/changelog.sh +0 -40
- data/index.html +0 -181
- data/langfilter.rb +0 -10
- data/readme.html +0 -750
- data/readme.md +0 -185
- /data/{docdiffwebui.cgi → doc/example/docdiffwebui.cgi} +0 -0
- /data/{docdiffwebui.html → doc/example/docdiffwebui.html} +0 -0
- /data/{img/docdiff-screenshot-format-html-digest-firefox.png → doc/img/screenshot-format-html-digest-firefox.png} +0 -0
- /data/{img/docdiff-screenshot-format-html-firefox.png → doc/img/screenshot-format-html-firefox.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-cmdexe-en.png → doc/img/screenshot-format-tty-cmdexe-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-cmdexe-ja.png → doc/img/screenshot-format-tty-cmdexe-ja.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-rxvtunicode-en.png → doc/img/screenshot-format-tty-rxvtunicode-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-rxvtunicode-ja.png → doc/img/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-xterm-en.png → doc/img/screenshot-format-tty-xterm-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-xterm-ja.png → doc/img/screenshot-format-tty-xterm-ja.png} +0 -0
- /data/{img/docdiff-screenshot-resolution-linewordchar-xterm.png → doc/img/screenshot-resolution-linewordchar-xterm.png} +0 -0
- /data/{sample/01.en.ascii.cr → test/fixture/01_en_ascii_cr.txt} +0 -0
- /data/{sample/01.en.ascii.crlf → test/fixture/01_en_ascii_crlf.txt} +0 -0
- /data/{sample/01.en.ascii.lf → test/fixture/01_en_ascii_lf.txt} +0 -0
- /data/{sample/01.ja.eucjp.lf → test/fixture/01_ja_eucjp_lf.txt} +0 -0
- /data/{sample/01.ja.sjis.cr → test/fixture/01_ja_sjis_cr.txt} +0 -0
- /data/{sample/01.ja.sjis.crlf → test/fixture/01_ja_sjis_crlf.txt} +0 -0
- /data/{sample/01.ja.utf8.crlf → test/fixture/01_ja_utf8_crlf.txt} +0 -0
- /data/{sample/02.en.ascii.cr → test/fixture/02_en_ascii_cr.txt} +0 -0
- /data/{sample/02.en.ascii.crlf → test/fixture/02_en_ascii_crlf.txt} +0 -0
- /data/{sample/02.en.ascii.lf → test/fixture/02_en_ascii_lf.txt} +0 -0
- /data/{sample/02.ja.eucjp.lf → test/fixture/02_ja_eucjp_lf.txt} +0 -0
- /data/{sample/02.ja.sjis.cr → test/fixture/02_ja_sjis_cr.txt} +0 -0
- /data/{sample/02.ja.sjis.crlf → test/fixture/02_ja_sjis_crlf.txt} +0 -0
- /data/{sample/02.ja.utf8.crlf → test/fixture/02_ja_utf8_crlf.txt} +0 -0
- /data/{sample/humpty_dumpty01.ascii.lf → test/fixture/humpty_dumpty01_ascii_lf.txt} +0 -0
- /data/{sample/humpty_dumpty02.ascii.lf → test/fixture/humpty_dumpty02_ascii_lf.txt} +0 -0
data/test/docdiff_test.rb
CHANGED
|
@@ -190,7 +190,7 @@ class TC_DocDiff < Test::Unit::TestCase
|
|
|
190
190
|
def test_cli()
|
|
191
191
|
expected = "Hello, my name is [-Watanabe.-]{+matz.+}\n"
|
|
192
192
|
cmd = "ruby -I lib bin/docdiff --wdiff" +
|
|
193
|
-
"
|
|
193
|
+
" test/fixture/01_en_ascii_lf.txt test/fixture/02_en_ascii_lf.txt"
|
|
194
194
|
actual = `#{cmd}`.scan(/^.*?$\n/m).first
|
|
195
195
|
assert_equal(expected, actual)
|
|
196
196
|
end
|
data/test/document_test.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/ruby
|
|
2
|
-
# -*- coding:
|
|
2
|
+
# -*- coding: utf-8; -*-
|
|
3
3
|
|
|
4
4
|
# frozen_string_literal: false
|
|
5
5
|
|
|
@@ -223,396 +223,396 @@ class TC_DocDiff_Document < Test::Unit::TestCase
|
|
|
223
223
|
|
|
224
224
|
# test EUCJP module
|
|
225
225
|
def test_eucjp_split_to_word()
|
|
226
|
-
doc = Document.new(NKF.nkf("
|
|
227
|
-
expected = ["
|
|
226
|
+
doc = Document.new(NKF.nkf("--euc", "日本語の文字foo bar"))
|
|
227
|
+
expected = ["日本語の","文字","foo ","bar"].map{|c| NKF.nkf("--euc", c)}
|
|
228
228
|
assert_equal(expected, doc.split_to_word)
|
|
229
229
|
end
|
|
230
230
|
def test_eucjp_split_to_word_kanhira()
|
|
231
|
-
doc = Document.new(NKF.nkf("
|
|
232
|
-
expected = ["
|
|
231
|
+
doc = Document.new(NKF.nkf("--euc", "日本語の文字"))
|
|
232
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
233
233
|
assert_equal(expected, doc.split_to_word)
|
|
234
234
|
end
|
|
235
235
|
def test_eucjp_split_to_word_katahira()
|
|
236
|
-
doc = Document.new(NKF.nkf("
|
|
237
|
-
expected = ["
|
|
236
|
+
doc = Document.new(NKF.nkf("--euc", "カタカナの文字"))
|
|
237
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
238
238
|
assert_equal(expected, doc.split_to_word)
|
|
239
239
|
end
|
|
240
240
|
def test_eucjp_split_to_word_kataonbiki()
|
|
241
|
-
doc = Document.new(NKF.nkf("
|
|
242
|
-
expected = ["
|
|
241
|
+
doc = Document.new(NKF.nkf("--euc", "ルビー色の石"), "EUC-JP")
|
|
242
|
+
expected = ["ルビー", "色の", "石"].map{|c| NKF.nkf("--euc", c)}
|
|
243
243
|
assert_equal(expected, doc.split_to_word)
|
|
244
244
|
end
|
|
245
245
|
def test_eucjp_split_to_word_hiraonbiki()
|
|
246
|
-
doc = Document.new(NKF.nkf("
|
|
247
|
-
expected = (["
|
|
246
|
+
doc = Document.new(NKF.nkf("--euc", "わールビーだ"), "EUC-JP")
|
|
247
|
+
expected = (["わー", "ルビーだ"]).map{|c| NKF.nkf("--euc", c)}
|
|
248
248
|
assert_equal(expected, doc.split_to_word)
|
|
249
249
|
end
|
|
250
250
|
def test_eucjp_split_to_word_latinmix()
|
|
251
|
-
doc = Document.new(NKF.nkf("
|
|
252
|
-
expected = ["
|
|
251
|
+
doc = Document.new(NKF.nkf("--euc", "日本語とLatinの文字"))
|
|
252
|
+
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
253
253
|
assert_equal(expected, doc.split_to_word)
|
|
254
254
|
end
|
|
255
255
|
def test_eucjp_split_to_char()
|
|
256
|
-
doc = Document.new(NKF.nkf("
|
|
257
|
-
expected = ["
|
|
256
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b"))
|
|
257
|
+
expected = ["日","本","語","a"," ","b"].map{|c|NKF.nkf("--euc",c)}
|
|
258
258
|
assert_equal(expected, doc.split_to_char)
|
|
259
259
|
end
|
|
260
260
|
def test_eucjp_split_to_char_with_cr()
|
|
261
|
-
doc = Document.new(NKF.nkf("
|
|
262
|
-
expected = ["
|
|
261
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r"))
|
|
262
|
+
expected = ["日","本","語","a"," ","b","\r"].map{|c|NKF.nkf("--euc",c)}
|
|
263
263
|
assert_equal(expected, doc.split_to_char)
|
|
264
264
|
end
|
|
265
265
|
def test_eucjp_split_to_char_with_lf()
|
|
266
|
-
doc = Document.new(NKF.nkf("
|
|
267
|
-
expected = ["
|
|
266
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\n"))
|
|
267
|
+
expected = ["日","本","語","a"," ","b","\n"].map{|c|NKF.nkf("--euc",c)}
|
|
268
268
|
assert_equal(expected, doc.split_to_char)
|
|
269
269
|
end
|
|
270
270
|
def test_eucjp_split_to_char_with_crlf()
|
|
271
|
-
doc = Document.new(NKF.nkf("
|
|
272
|
-
expected = ["
|
|
271
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
272
|
+
expected = ["日","本","語","a"," ","b","\r\n"].map{|c|NKF.nkf("--euc",c)}
|
|
273
273
|
assert_equal(expected, doc.split_to_char)
|
|
274
274
|
end
|
|
275
275
|
def test_eucjp_count_char()
|
|
276
|
-
doc = Document.new(NKF.nkf("
|
|
276
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
277
277
|
expected = 7
|
|
278
278
|
assert_equal(expected, doc.count_char)
|
|
279
279
|
end
|
|
280
280
|
def test_eucjp_count_latin_graph_char()
|
|
281
|
-
doc = Document.new(NKF.nkf("
|
|
281
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
282
282
|
expected = 2
|
|
283
283
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
284
284
|
end
|
|
285
285
|
def test_eucjp_count_ja_graph_char()
|
|
286
|
-
doc = Document.new(NKF.nkf("
|
|
286
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
287
287
|
expected = 3
|
|
288
288
|
assert_equal(expected, doc.count_ja_graph_char)
|
|
289
289
|
end
|
|
290
290
|
def test_eucjp_count_graph_char()
|
|
291
|
-
doc = Document.new(NKF.nkf("
|
|
291
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
292
292
|
expected = 5
|
|
293
293
|
assert_equal(expected, doc.count_graph_char)
|
|
294
294
|
end
|
|
295
295
|
def test_eucjp_count_latin_blank_char()
|
|
296
|
-
doc = Document.new(NKF.nkf("
|
|
296
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\ta b\r\n"))
|
|
297
297
|
expected = 2
|
|
298
298
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
299
299
|
end
|
|
300
300
|
def test_eucjp_count_ja_blank_char()
|
|
301
|
-
doc = Document.new(NKF.nkf("
|
|
301
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
|
|
302
302
|
expected = 1
|
|
303
303
|
assert_equal(expected, doc.count_ja_blank_char)
|
|
304
304
|
end
|
|
305
305
|
def test_eucjp_count_blank_char()
|
|
306
|
-
doc = Document.new(NKF.nkf("
|
|
306
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
|
|
307
307
|
expected = 3
|
|
308
308
|
assert_equal(expected, doc.count_blank_char)
|
|
309
309
|
end
|
|
310
310
|
def test_eucjp_count_word()
|
|
311
|
-
doc = Document.new(NKF.nkf("
|
|
311
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
312
312
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
313
313
|
assert_equal(expected, doc.count_word)
|
|
314
314
|
end
|
|
315
315
|
def test_eucjp_count_ja_word()
|
|
316
|
-
doc = Document.new(NKF.nkf("
|
|
316
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
317
317
|
expected = 3
|
|
318
318
|
assert_equal(expected, doc.count_ja_word)
|
|
319
319
|
end
|
|
320
320
|
def test_eucjp_count_latin_valid_word()
|
|
321
|
-
doc = Document.new(NKF.nkf("
|
|
321
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
322
322
|
expected = 2
|
|
323
323
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
324
324
|
end
|
|
325
325
|
def test_eucjp_count_ja_valid_word()
|
|
326
|
-
doc = Document.new(NKF.nkf("
|
|
326
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
327
327
|
expected = 2
|
|
328
328
|
assert_equal(expected, doc.count_ja_valid_word)
|
|
329
329
|
end
|
|
330
330
|
def test_eucjp_count_valid_word()
|
|
331
|
-
doc = Document.new(NKF.nkf("
|
|
331
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
332
332
|
expected = 4
|
|
333
333
|
assert_equal(expected, doc.count_valid_word)
|
|
334
334
|
end
|
|
335
335
|
def test_eucjp_count_line()
|
|
336
|
-
doc = Document.new(NKF.nkf("
|
|
336
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
337
337
|
expected = 6
|
|
338
338
|
assert_equal(expected, doc.count_line)
|
|
339
339
|
end
|
|
340
340
|
def test_eucjp_count_graph_line()
|
|
341
|
-
doc = Document.new(NKF.nkf("
|
|
341
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
342
342
|
expected = 3
|
|
343
343
|
assert_equal(expected, doc.count_graph_line)
|
|
344
344
|
end
|
|
345
345
|
def test_eucjp_count_empty_line()
|
|
346
|
-
doc = Document.new(NKF.nkf("
|
|
346
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
347
347
|
expected = 1
|
|
348
348
|
assert_equal(expected, doc.count_empty_line)
|
|
349
349
|
end
|
|
350
350
|
def test_eucjp_count_blank_line()
|
|
351
|
-
doc = Document.new(NKF.nkf("
|
|
351
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
352
352
|
expected = 2
|
|
353
353
|
assert_equal(expected, doc.count_blank_line)
|
|
354
354
|
end
|
|
355
355
|
|
|
356
356
|
# test SJIS module
|
|
357
357
|
def test_sjis_split_to_word()
|
|
358
|
-
doc = Document.new(NKF.nkf("
|
|
359
|
-
expected = ["
|
|
358
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語の文字foo bar"))
|
|
359
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map{|c|NKF.nkf("--sjis",c)}
|
|
360
360
|
assert_equal(expected, doc.split_to_word)
|
|
361
361
|
end
|
|
362
362
|
def test_sjisplit_s_to_word_kanhira()
|
|
363
|
-
doc = Document.new(NKF.nkf("
|
|
364
|
-
expected = ["
|
|
363
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語の文字"))
|
|
364
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
365
365
|
assert_equal(expected, doc.split_to_word)
|
|
366
366
|
end
|
|
367
367
|
def test_sjis_split_to_word_katahira()
|
|
368
|
-
doc = Document.new(NKF.nkf("
|
|
369
|
-
expected = ["
|
|
368
|
+
doc = Document.new(NKF.nkf("--sjis", "カタカナの文字"))
|
|
369
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
370
370
|
assert_equal(expected, doc.split_to_word)
|
|
371
371
|
end
|
|
372
372
|
def test_sjis_split_to_word_kataonbiki()
|
|
373
|
-
doc = Document.new(NKF.nkf("
|
|
374
|
-
expected = ["
|
|
373
|
+
doc = Document.new(NKF.nkf("--sjis", "ルビーの指輪"))
|
|
374
|
+
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--sjis", c)}
|
|
375
375
|
assert_equal(expected, doc.split_to_word)
|
|
376
376
|
end
|
|
377
377
|
def test_sjis_split_to_word_hiraonbiki()
|
|
378
|
-
doc = Document.new(NKF.nkf("
|
|
379
|
-
expected = ["
|
|
378
|
+
doc = Document.new(NKF.nkf("--sjis", "わールビーだ"))
|
|
379
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--sjis", c)}
|
|
380
380
|
assert_equal(expected, doc.split_to_word)
|
|
381
381
|
end
|
|
382
382
|
def test_sjis_split_to_word_latinmix()
|
|
383
|
-
doc = Document.new(NKF.nkf("
|
|
384
|
-
expected = ["
|
|
383
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語とLatinの文字"))
|
|
384
|
+
expected = ["日本語と","Latin","の","文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
385
385
|
assert_equal(expected, doc.split_to_word)
|
|
386
386
|
end
|
|
387
387
|
def test_sjis_split_to_char()
|
|
388
|
-
doc = Document.new(NKF.nkf("
|
|
389
|
-
expected = ["
|
|
388
|
+
doc = Document.new(NKF.nkf("--sjis", "表計算a b"))
|
|
389
|
+
expected = ["表","計","算","a"," ","b"].map{|c|NKF.nkf("--sjis",c)}
|
|
390
390
|
assert_equal(expected, doc.split_to_char)
|
|
391
391
|
end
|
|
392
392
|
def test_sjis_split_to_char_with_cr()
|
|
393
|
-
doc = Document.new(NKF.nkf("
|
|
394
|
-
expected = ["
|
|
393
|
+
doc = Document.new(NKF.nkf("--sjis", "表計算a b\r"))
|
|
394
|
+
expected = ["表","計","算","a"," ","b","\r"].map{|c|NKF.nkf("--sjis",c)}
|
|
395
395
|
assert_equal(expected, doc.split_to_char)
|
|
396
396
|
end
|
|
397
397
|
def test_sjis_split_to_char_with_lf()
|
|
398
|
-
doc = Document.new(NKF.nkf("
|
|
399
|
-
expected = ["
|
|
398
|
+
doc = Document.new(NKF.nkf("--sjis", "表計算a b\n"))
|
|
399
|
+
expected = ["表","計","算","a"," ","b","\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
400
400
|
assert_equal(expected, doc.split_to_char)
|
|
401
401
|
end
|
|
402
402
|
def test_sjis_split_to_char_with_crlf()
|
|
403
|
-
doc = Document.new(NKF.nkf("
|
|
404
|
-
expected = ["
|
|
403
|
+
doc = Document.new(NKF.nkf("--sjis", "表計算a b\r\n"))
|
|
404
|
+
expected = ["表","計","算","a"," ","b","\r\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
405
405
|
assert_equal(expected, doc.split_to_char)
|
|
406
406
|
end
|
|
407
407
|
def test_sjis_count_char()
|
|
408
|
-
doc = Document.new(NKF.nkf("
|
|
408
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
409
409
|
expected = 7
|
|
410
410
|
assert_equal(expected, doc.count_char)
|
|
411
411
|
end
|
|
412
412
|
def test_sjis_count_latin_graph_char()
|
|
413
|
-
doc = Document.new(NKF.nkf("
|
|
413
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
414
414
|
expected = 2
|
|
415
415
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
416
416
|
end
|
|
417
417
|
def test_sjis_count_ja_graph_char()
|
|
418
|
-
doc = Document.new(NKF.nkf("
|
|
418
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
419
419
|
expected = 3
|
|
420
420
|
assert_equal(expected, doc.count_ja_graph_char)
|
|
421
421
|
end
|
|
422
422
|
def test_sjis_count_graph_char()
|
|
423
|
-
doc = Document.new(NKF.nkf("
|
|
423
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
424
424
|
expected = 5
|
|
425
425
|
assert_equal(expected, doc.count_graph_char)
|
|
426
426
|
end
|
|
427
427
|
def test_sjis_count_latin_blank_char()
|
|
428
|
-
doc = Document.new(NKF.nkf("
|
|
428
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\ta b\r\n"))
|
|
429
429
|
expected = 2
|
|
430
430
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
431
431
|
end
|
|
432
432
|
def test_sjis_count_ja_blank_char()
|
|
433
|
-
doc = Document.new(NKF.nkf("
|
|
433
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
|
|
434
434
|
expected = 1
|
|
435
435
|
assert_equal(expected, doc.count_ja_blank_char)
|
|
436
436
|
end
|
|
437
437
|
def test_sjis_count_blank_char()
|
|
438
|
-
doc = Document.new(NKF.nkf("
|
|
438
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
|
|
439
439
|
expected = 3
|
|
440
440
|
assert_equal(expected, doc.count_blank_char)
|
|
441
441
|
end
|
|
442
442
|
def test_sjis_count_word()
|
|
443
|
-
doc = Document.new(NKF.nkf("
|
|
443
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
444
444
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
445
445
|
assert_equal(expected, doc.count_word)
|
|
446
446
|
end
|
|
447
447
|
def test_sjis_count_ja_word()
|
|
448
|
-
doc = Document.new(NKF.nkf("
|
|
448
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
449
449
|
expected = 3
|
|
450
450
|
assert_equal(expected, doc.count_ja_word)
|
|
451
451
|
end
|
|
452
452
|
def test_sjis_count_latin_valid_word()
|
|
453
|
-
doc = Document.new(NKF.nkf("
|
|
453
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
454
454
|
expected = 2
|
|
455
455
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
456
456
|
end
|
|
457
457
|
def test_sjis_count_ja_valid_word()
|
|
458
|
-
doc = Document.new(NKF.nkf("
|
|
458
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
459
459
|
expected = 2
|
|
460
460
|
assert_equal(expected, doc.count_ja_valid_word)
|
|
461
461
|
end
|
|
462
462
|
def test_sjis_count_valid_word()
|
|
463
|
-
doc = Document.new(NKF.nkf("
|
|
463
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
464
464
|
expected = 4
|
|
465
465
|
assert_equal(expected, doc.count_valid_word)
|
|
466
466
|
end
|
|
467
467
|
def test_sjis_count_line()
|
|
468
|
-
doc = Document.new(NKF.nkf("
|
|
468
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
469
469
|
expected = 6
|
|
470
470
|
assert_equal(expected, doc.count_line)
|
|
471
471
|
end
|
|
472
472
|
def test_sjis_count_graph_line()
|
|
473
|
-
doc = Document.new(NKF.nkf("
|
|
473
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
474
474
|
expected = 3
|
|
475
475
|
assert_equal(expected, doc.count_graph_line)
|
|
476
476
|
end
|
|
477
477
|
def test_sjis_count_empty_line()
|
|
478
|
-
doc = Document.new(NKF.nkf("
|
|
478
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
479
479
|
expected = 1
|
|
480
480
|
assert_equal(expected, doc.count_empty_line)
|
|
481
481
|
end
|
|
482
482
|
def test_sjis_count_blank_line()
|
|
483
|
-
doc = Document.new(NKF.nkf("
|
|
483
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
484
484
|
expected = 2
|
|
485
485
|
assert_equal(expected, doc.count_blank_line)
|
|
486
486
|
end
|
|
487
487
|
|
|
488
488
|
# test UTF8 module
|
|
489
489
|
def test_utf8_split_to_word()
|
|
490
|
-
doc = Document.new(NKF.nkf("
|
|
491
|
-
expected = ["
|
|
490
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語の文字foo bar"))
|
|
491
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map{|c| NKF.nkf("--utf8", c)}
|
|
492
492
|
assert_equal(expected, doc.split_to_word)
|
|
493
493
|
end
|
|
494
494
|
def test_utf8_split_to_word_kanhira()
|
|
495
|
-
doc = Document.new(NKF.nkf("
|
|
496
|
-
expected = ["
|
|
495
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語の文字"))
|
|
496
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
497
497
|
assert_equal(expected, doc.split_to_word)
|
|
498
498
|
end
|
|
499
499
|
def test_utf8_split_to_word_katahira()
|
|
500
|
-
doc = Document.new(NKF.nkf("
|
|
501
|
-
expected = ["
|
|
500
|
+
doc = Document.new(NKF.nkf("--utf8", "カタカナの文字"))
|
|
501
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
502
502
|
assert_equal(expected, doc.split_to_word)
|
|
503
503
|
end
|
|
504
504
|
def test_utf8_split_to_word_kataonbiki()
|
|
505
|
-
doc = Document.new(NKF.nkf("
|
|
506
|
-
expected = ["
|
|
505
|
+
doc = Document.new(NKF.nkf("--utf8", "ルビーの指輪"))
|
|
506
|
+
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--utf8", c)}
|
|
507
507
|
assert_equal(expected, doc.split_to_word)
|
|
508
508
|
end
|
|
509
509
|
def test_utf8_split_to_word_hiraonbiki()
|
|
510
|
-
doc = Document.new(NKF.nkf("
|
|
511
|
-
expected = ["
|
|
510
|
+
doc = Document.new(NKF.nkf("--utf8", "わールビーだ"))
|
|
511
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--utf8", c)}
|
|
512
512
|
assert_equal(expected, doc.split_to_word)
|
|
513
513
|
end
|
|
514
514
|
def test_utf8_split_to_word_latinmix()
|
|
515
|
-
doc = Document.new(NKF.nkf("
|
|
516
|
-
expected = ["
|
|
515
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語とLatinの文字"))
|
|
516
|
+
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
517
517
|
assert_equal(expected, doc.split_to_word)
|
|
518
518
|
end
|
|
519
519
|
def test_utf8_split_to_char()
|
|
520
|
-
doc = Document.new(NKF.nkf("
|
|
521
|
-
expected = ["
|
|
520
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b"), "UTF-8")
|
|
521
|
+
expected = ["日", "本", "語", "a", " ", "b"].map{|c| NKF.nkf("--utf8", c)}
|
|
522
522
|
assert_equal(expected, doc.split_to_char)
|
|
523
523
|
end
|
|
524
524
|
def test_utf8_split_to_char_with_cr()
|
|
525
|
-
doc = Document.new(NKF.nkf("
|
|
526
|
-
expected = ["
|
|
525
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r"), "UTF-8")
|
|
526
|
+
expected = ["日","本","語","a"," ","b","\r"].map{|c| NKF.nkf("--utf8", c)}
|
|
527
527
|
assert_equal(expected, doc.split_to_char)
|
|
528
528
|
end
|
|
529
529
|
def test_utf8_split_to_char_with_lf()
|
|
530
|
-
doc = Document.new(NKF.nkf("
|
|
531
|
-
expected = ["
|
|
530
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\n"), "UTF-8")
|
|
531
|
+
expected = ["日","本","語","a"," ","b","\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
532
532
|
assert_equal(expected, doc.split_to_char)
|
|
533
533
|
end
|
|
534
534
|
def test_utf8_split_to_char_with_crlf()
|
|
535
|
-
doc = Document.new(NKF.nkf("
|
|
536
|
-
expected = ["
|
|
535
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
536
|
+
expected = ["日","本","語","a"," ","b","\r\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
537
537
|
assert_equal(expected, doc.split_to_char)
|
|
538
538
|
end
|
|
539
539
|
def test_utf8_count_char()
|
|
540
|
-
doc = Document.new(NKF.nkf("
|
|
540
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
541
541
|
expected = 7
|
|
542
542
|
assert_equal(expected, doc.count_char)
|
|
543
543
|
end
|
|
544
544
|
def test_utf8_count_latin_graph_char()
|
|
545
|
-
doc = Document.new(NKF.nkf("
|
|
545
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
546
546
|
expected = 2
|
|
547
547
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
548
548
|
end
|
|
549
549
|
def test_utf8_count_ja_graph_char()
|
|
550
|
-
doc = Document.new(NKF.nkf("
|
|
550
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
551
551
|
expected = 3
|
|
552
552
|
assert_equal(expected, doc.count_ja_graph_char)
|
|
553
553
|
end
|
|
554
554
|
def test_utf8_count_graph_char()
|
|
555
|
-
doc = Document.new(NKF.nkf("
|
|
555
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
556
556
|
expected = 5
|
|
557
557
|
assert_equal(expected, doc.count_graph_char)
|
|
558
558
|
end
|
|
559
559
|
def test_utf8_count_latin_blank_char()
|
|
560
|
-
doc = Document.new(NKF.nkf("
|
|
560
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\ta b\r\n"))
|
|
561
561
|
expected = 2
|
|
562
562
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
563
563
|
end
|
|
564
564
|
def test_utf8_count_ja_blank_char()
|
|
565
|
-
doc = Document.new(NKF.nkf("
|
|
565
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
|
|
566
566
|
expected = 1
|
|
567
567
|
assert_equal(expected, doc.count_ja_blank_char)
|
|
568
568
|
end
|
|
569
569
|
def test_utf8_count_blank_char()
|
|
570
|
-
doc = Document.new(NKF.nkf("
|
|
570
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
|
|
571
571
|
expected = 3
|
|
572
572
|
assert_equal(expected, doc.count_blank_char)
|
|
573
573
|
end
|
|
574
574
|
def test_utf8_count_word()
|
|
575
|
-
doc = Document.new(NKF.nkf("
|
|
575
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
576
576
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
577
577
|
assert_equal(expected, doc.count_word)
|
|
578
578
|
end
|
|
579
579
|
def test_utf8_count_ja_word()
|
|
580
|
-
doc = Document.new(NKF.nkf("
|
|
580
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
581
581
|
expected = 3
|
|
582
582
|
assert_equal(expected, doc.count_ja_word)
|
|
583
583
|
end
|
|
584
584
|
def test_utf8_count_latin_valid_word()
|
|
585
|
-
doc = Document.new(NKF.nkf("
|
|
585
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
586
586
|
expected = 2
|
|
587
587
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
588
588
|
end
|
|
589
589
|
def test_utf8_count_ja_valid_word()
|
|
590
|
-
doc = Document.new(NKF.nkf("
|
|
590
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
591
591
|
expected = 2
|
|
592
592
|
assert_equal(expected, doc.count_ja_valid_word)
|
|
593
593
|
end
|
|
594
594
|
def test_utf8_count_valid_word()
|
|
595
|
-
doc = Document.new(NKF.nkf("
|
|
595
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
596
596
|
expected = 4
|
|
597
597
|
assert_equal(expected, doc.count_valid_word)
|
|
598
598
|
end
|
|
599
599
|
def test_utf8_count_line()
|
|
600
|
-
doc = Document.new(NKF.nkf("
|
|
600
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
601
601
|
expected = 6
|
|
602
602
|
assert_equal(expected, doc.count_line)
|
|
603
603
|
end
|
|
604
604
|
def test_utf8_count_graph_line()
|
|
605
|
-
doc = Document.new(NKF.nkf("
|
|
605
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
606
606
|
expected = 3
|
|
607
607
|
assert_equal(expected, doc.count_graph_line)
|
|
608
608
|
end
|
|
609
609
|
def test_utf8_count_empty_line()
|
|
610
|
-
doc = Document.new(NKF.nkf("
|
|
610
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
611
611
|
expected = 1
|
|
612
612
|
assert_equal(expected, doc.count_empty_line)
|
|
613
613
|
end
|
|
614
614
|
def test_utf8_count_blank_line()
|
|
615
|
-
doc = Document.new(NKF.nkf("
|
|
615
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
616
616
|
expected = 2
|
|
617
617
|
assert_equal(expected, doc.count_blank_line)
|
|
618
618
|
end
|