docdiff 0.6.1 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +27 -49
- data/README.md +351 -0
- data/README_ja.md +351 -0
- data/Rakefile +2 -42
- data/bin/docdiff +66 -29
- data/{docdiff.conf.example → doc/example/docdiff.conf.example} +4 -3
- data/doc/man/docdiff.adoc +146 -0
- data/doc/news.md +180 -0
- data/doc/shell_completion/_docdiff.zsh +51 -0
- data/doc/shell_completion/docdiff.bash +68 -0
- data/docdiff.gemspec +2 -1
- data/lib/doc_diff.rb +13 -0
- data/lib/docdiff/charstring.rb +4 -3
- data/lib/docdiff/diff/unidiff.rb +0 -1
- data/lib/docdiff/encoding/en_ascii.rb +12 -39
- data/lib/docdiff/encoding/ja_eucjp.rb +12 -39
- data/lib/docdiff/encoding/ja_sjis.rb +12 -39
- data/lib/docdiff/encoding/ja_utf8.rb +12 -39
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +16 -8
- data/test/charstring_test.rb +124 -121
- data/test/docdiff_test.rb +5 -3
- data/test/document_test.rb +112 -109
- data/test/fixture/01_ja_utf8_lf.txt +2 -0
- data/test/fixture/02_ja_utf8_lf.txt +2 -0
- data/test/view_test.rb +135 -111
- metadata +41 -43
- data/devutil/changelog.sh +0 -40
- data/index.html +0 -181
- data/langfilter.rb +0 -10
- data/lib/viewdiff.rb +0 -379
- data/readme.html +0 -733
- data/readme.md +0 -184
- data/test/viewdiff_test.rb +0 -911
- /data/{docdiffwebui.cgi → doc/example/docdiffwebui.cgi} +0 -0
- /data/{docdiffwebui.html → doc/example/docdiffwebui.html} +0 -0
- /data/{img/docdiff-screenshot-format-html-digest-firefox.png → doc/img/screenshot-format-html-digest-firefox.png} +0 -0
- /data/{img/docdiff-screenshot-format-html-firefox.png → doc/img/screenshot-format-html-firefox.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-cmdexe-en.png → doc/img/screenshot-format-tty-cmdexe-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-cmdexe-ja.png → doc/img/screenshot-format-tty-cmdexe-ja.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-rxvtunicode-en.png → doc/img/screenshot-format-tty-rxvtunicode-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-rxvtunicode-ja.png → doc/img/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-xterm-en.png → doc/img/screenshot-format-tty-xterm-en.png} +0 -0
- /data/{img/docdiff-screenshot-format-tty-xterm-ja.png → doc/img/screenshot-format-tty-xterm-ja.png} +0 -0
- /data/{img/docdiff-screenshot-resolution-linewordchar-xterm.png → doc/img/screenshot-resolution-linewordchar-xterm.png} +0 -0
- /data/{sample/01.en.ascii.cr → test/fixture/01_en_ascii_cr.txt} +0 -0
- /data/{sample/01.en.ascii.crlf → test/fixture/01_en_ascii_crlf.txt} +0 -0
- /data/{sample/01.en.ascii.lf → test/fixture/01_en_ascii_lf.txt} +0 -0
- /data/{sample/01.ja.eucjp.lf → test/fixture/01_ja_eucjp_lf.txt} +0 -0
- /data/{sample/01.ja.sjis.cr → test/fixture/01_ja_sjis_cr.txt} +0 -0
- /data/{sample/01.ja.sjis.crlf → test/fixture/01_ja_sjis_crlf.txt} +0 -0
- /data/{sample/01.ja.utf8.crlf → test/fixture/01_ja_utf8_crlf.txt} +0 -0
- /data/{sample/02.en.ascii.cr → test/fixture/02_en_ascii_cr.txt} +0 -0
- /data/{sample/02.en.ascii.crlf → test/fixture/02_en_ascii_crlf.txt} +0 -0
- /data/{sample/02.en.ascii.lf → test/fixture/02_en_ascii_lf.txt} +0 -0
- /data/{sample/02.ja.eucjp.lf → test/fixture/02_ja_eucjp_lf.txt} +0 -0
- /data/{sample/02.ja.sjis.cr → test/fixture/02_ja_sjis_cr.txt} +0 -0
- /data/{sample/02.ja.sjis.crlf → test/fixture/02_ja_sjis_crlf.txt} +0 -0
- /data/{sample/02.ja.utf8.crlf → test/fixture/02_ja_utf8_crlf.txt} +0 -0
- /data/{sample/humpty_dumpty01.ascii.lf → test/fixture/humpty_dumpty01_ascii_lf.txt} +0 -0
- /data/{sample/humpty_dumpty02.ascii.lf → test/fixture/humpty_dumpty02_ascii_lf.txt} +0 -0
data/test/docdiff_test.rb
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/ruby
|
|
2
2
|
# -*- coding: us-ascii; -*-
|
|
3
|
+
|
|
4
|
+
# frozen_string_literal: false
|
|
5
|
+
|
|
3
6
|
require 'test/unit'
|
|
4
7
|
require 'docdiff'
|
|
5
8
|
require 'nkf'
|
|
6
9
|
|
|
7
|
-
class
|
|
10
|
+
class TC_DocDiff < Test::Unit::TestCase
|
|
8
11
|
Document = DocDiff::Document
|
|
9
12
|
|
|
10
13
|
def setup()
|
|
@@ -126,7 +129,6 @@ class TC_DocDiff_Document < Test::Unit::TestCase
|
|
|
126
129
|
"",
|
|
127
130
|
nil].join
|
|
128
131
|
expected = {:foo1=>true, :foo2=>"bar baz", :foo3=>123, :foo4=>false}
|
|
129
|
-
docdiff = DocDiff.new
|
|
130
132
|
assert_equal(expected,
|
|
131
133
|
DocDiff.parse_config_file_content(content))
|
|
132
134
|
end
|
|
@@ -188,7 +190,7 @@ class TC_DocDiff_Document < Test::Unit::TestCase
|
|
|
188
190
|
def test_cli()
|
|
189
191
|
expected = "Hello, my name is [-Watanabe.-]{+matz.+}\n"
|
|
190
192
|
cmd = "ruby -I lib bin/docdiff --wdiff" +
|
|
191
|
-
"
|
|
193
|
+
" test/fixture/01_en_ascii_lf.txt test/fixture/02_en_ascii_lf.txt"
|
|
192
194
|
actual = `#{cmd}`.scan(/^.*?$\n/m).first
|
|
193
195
|
assert_equal(expected, actual)
|
|
194
196
|
end
|
data/test/document_test.rb
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/ruby
|
|
2
|
-
# -*- coding:
|
|
2
|
+
# -*- coding: utf-8; -*-
|
|
3
|
+
|
|
4
|
+
# frozen_string_literal: false
|
|
5
|
+
|
|
3
6
|
require 'test/unit'
|
|
4
7
|
require 'docdiff/document'
|
|
5
8
|
require 'nkf'
|
|
@@ -220,396 +223,396 @@ class TC_DocDiff_Document < Test::Unit::TestCase
|
|
|
220
223
|
|
|
221
224
|
# test EUCJP module
|
|
222
225
|
def test_eucjp_split_to_word()
|
|
223
|
-
doc = Document.new(NKF.nkf("
|
|
224
|
-
expected = ["
|
|
226
|
+
doc = Document.new(NKF.nkf("--euc", "日本語の文字foo bar"))
|
|
227
|
+
expected = ["日本語の","文字","foo ","bar"].map{|c| NKF.nkf("--euc", c)}
|
|
225
228
|
assert_equal(expected, doc.split_to_word)
|
|
226
229
|
end
|
|
227
230
|
def test_eucjp_split_to_word_kanhira()
|
|
228
|
-
doc = Document.new(NKF.nkf("
|
|
229
|
-
expected = ["
|
|
231
|
+
doc = Document.new(NKF.nkf("--euc", "日本語の文字"))
|
|
232
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
230
233
|
assert_equal(expected, doc.split_to_word)
|
|
231
234
|
end
|
|
232
235
|
def test_eucjp_split_to_word_katahira()
|
|
233
|
-
doc = Document.new(NKF.nkf("
|
|
234
|
-
expected = ["
|
|
236
|
+
doc = Document.new(NKF.nkf("--euc", "カタカナの文字"))
|
|
237
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
235
238
|
assert_equal(expected, doc.split_to_word)
|
|
236
239
|
end
|
|
237
240
|
def test_eucjp_split_to_word_kataonbiki()
|
|
238
|
-
doc = Document.new(NKF.nkf("
|
|
239
|
-
expected = ["
|
|
241
|
+
doc = Document.new(NKF.nkf("--euc", "ルビー色の石"), "EUC-JP")
|
|
242
|
+
expected = ["ルビー", "色の", "石"].map{|c| NKF.nkf("--euc", c)}
|
|
240
243
|
assert_equal(expected, doc.split_to_word)
|
|
241
244
|
end
|
|
242
245
|
def test_eucjp_split_to_word_hiraonbiki()
|
|
243
|
-
doc = Document.new(NKF.nkf("
|
|
244
|
-
expected = (["
|
|
246
|
+
doc = Document.new(NKF.nkf("--euc", "わールビーだ"), "EUC-JP")
|
|
247
|
+
expected = (["わー", "ルビーだ"]).map{|c| NKF.nkf("--euc", c)}
|
|
245
248
|
assert_equal(expected, doc.split_to_word)
|
|
246
249
|
end
|
|
247
250
|
def test_eucjp_split_to_word_latinmix()
|
|
248
|
-
doc = Document.new(NKF.nkf("
|
|
249
|
-
expected = ["
|
|
251
|
+
doc = Document.new(NKF.nkf("--euc", "日本語とLatinの文字"))
|
|
252
|
+
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
250
253
|
assert_equal(expected, doc.split_to_word)
|
|
251
254
|
end
|
|
252
255
|
def test_eucjp_split_to_char()
|
|
253
|
-
doc = Document.new(NKF.nkf("
|
|
254
|
-
expected = ["
|
|
256
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b"))
|
|
257
|
+
expected = ["日","本","語","a"," ","b"].map{|c|NKF.nkf("--euc",c)}
|
|
255
258
|
assert_equal(expected, doc.split_to_char)
|
|
256
259
|
end
|
|
257
260
|
def test_eucjp_split_to_char_with_cr()
|
|
258
|
-
doc = Document.new(NKF.nkf("
|
|
259
|
-
expected = ["
|
|
261
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r"))
|
|
262
|
+
expected = ["日","本","語","a"," ","b","\r"].map{|c|NKF.nkf("--euc",c)}
|
|
260
263
|
assert_equal(expected, doc.split_to_char)
|
|
261
264
|
end
|
|
262
265
|
def test_eucjp_split_to_char_with_lf()
|
|
263
|
-
doc = Document.new(NKF.nkf("
|
|
264
|
-
expected = ["
|
|
266
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\n"))
|
|
267
|
+
expected = ["日","本","語","a"," ","b","\n"].map{|c|NKF.nkf("--euc",c)}
|
|
265
268
|
assert_equal(expected, doc.split_to_char)
|
|
266
269
|
end
|
|
267
270
|
def test_eucjp_split_to_char_with_crlf()
|
|
268
|
-
doc = Document.new(NKF.nkf("
|
|
269
|
-
expected = ["
|
|
271
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
272
|
+
expected = ["日","本","語","a"," ","b","\r\n"].map{|c|NKF.nkf("--euc",c)}
|
|
270
273
|
assert_equal(expected, doc.split_to_char)
|
|
271
274
|
end
|
|
272
275
|
def test_eucjp_count_char()
|
|
273
|
-
doc = Document.new(NKF.nkf("
|
|
276
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
274
277
|
expected = 7
|
|
275
278
|
assert_equal(expected, doc.count_char)
|
|
276
279
|
end
|
|
277
280
|
def test_eucjp_count_latin_graph_char()
|
|
278
|
-
doc = Document.new(NKF.nkf("
|
|
281
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
279
282
|
expected = 2
|
|
280
283
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
281
284
|
end
|
|
282
285
|
def test_eucjp_count_ja_graph_char()
|
|
283
|
-
doc = Document.new(NKF.nkf("
|
|
286
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
284
287
|
expected = 3
|
|
285
288
|
assert_equal(expected, doc.count_ja_graph_char)
|
|
286
289
|
end
|
|
287
290
|
def test_eucjp_count_graph_char()
|
|
288
|
-
doc = Document.new(NKF.nkf("
|
|
291
|
+
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
289
292
|
expected = 5
|
|
290
293
|
assert_equal(expected, doc.count_graph_char)
|
|
291
294
|
end
|
|
292
295
|
def test_eucjp_count_latin_blank_char()
|
|
293
|
-
doc = Document.new(NKF.nkf("
|
|
296
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\ta b\r\n"))
|
|
294
297
|
expected = 2
|
|
295
298
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
296
299
|
end
|
|
297
300
|
def test_eucjp_count_ja_blank_char()
|
|
298
|
-
doc = Document.new(NKF.nkf("
|
|
301
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
|
|
299
302
|
expected = 1
|
|
300
303
|
assert_equal(expected, doc.count_ja_blank_char)
|
|
301
304
|
end
|
|
302
305
|
def test_eucjp_count_blank_char()
|
|
303
|
-
doc = Document.new(NKF.nkf("
|
|
306
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
|
|
304
307
|
expected = 3
|
|
305
308
|
assert_equal(expected, doc.count_blank_char)
|
|
306
309
|
end
|
|
307
310
|
def test_eucjp_count_word()
|
|
308
|
-
doc = Document.new(NKF.nkf("
|
|
311
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
309
312
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
310
313
|
assert_equal(expected, doc.count_word)
|
|
311
314
|
end
|
|
312
315
|
def test_eucjp_count_ja_word()
|
|
313
|
-
doc = Document.new(NKF.nkf("
|
|
316
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
314
317
|
expected = 3
|
|
315
318
|
assert_equal(expected, doc.count_ja_word)
|
|
316
319
|
end
|
|
317
320
|
def test_eucjp_count_latin_valid_word()
|
|
318
|
-
doc = Document.new(NKF.nkf("
|
|
321
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
319
322
|
expected = 2
|
|
320
323
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
321
324
|
end
|
|
322
325
|
def test_eucjp_count_ja_valid_word()
|
|
323
|
-
doc = Document.new(NKF.nkf("
|
|
326
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
324
327
|
expected = 2
|
|
325
328
|
assert_equal(expected, doc.count_ja_valid_word)
|
|
326
329
|
end
|
|
327
330
|
def test_eucjp_count_valid_word()
|
|
328
|
-
doc = Document.new(NKF.nkf("
|
|
331
|
+
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
329
332
|
expected = 4
|
|
330
333
|
assert_equal(expected, doc.count_valid_word)
|
|
331
334
|
end
|
|
332
335
|
def test_eucjp_count_line()
|
|
333
|
-
doc = Document.new(NKF.nkf("
|
|
336
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
334
337
|
expected = 6
|
|
335
338
|
assert_equal(expected, doc.count_line)
|
|
336
339
|
end
|
|
337
340
|
def test_eucjp_count_graph_line()
|
|
338
|
-
doc = Document.new(NKF.nkf("
|
|
341
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
339
342
|
expected = 3
|
|
340
343
|
assert_equal(expected, doc.count_graph_line)
|
|
341
344
|
end
|
|
342
345
|
def test_eucjp_count_empty_line()
|
|
343
|
-
doc = Document.new(NKF.nkf("
|
|
346
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
344
347
|
expected = 1
|
|
345
348
|
assert_equal(expected, doc.count_empty_line)
|
|
346
349
|
end
|
|
347
350
|
def test_eucjp_count_blank_line()
|
|
348
|
-
doc = Document.new(NKF.nkf("
|
|
351
|
+
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
349
352
|
expected = 2
|
|
350
353
|
assert_equal(expected, doc.count_blank_line)
|
|
351
354
|
end
|
|
352
355
|
|
|
353
356
|
# test SJIS module
|
|
354
357
|
def test_sjis_split_to_word()
|
|
355
|
-
doc = Document.new(NKF.nkf("
|
|
356
|
-
expected = ["
|
|
358
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語の文字foo bar"))
|
|
359
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map{|c|NKF.nkf("--sjis",c)}
|
|
357
360
|
assert_equal(expected, doc.split_to_word)
|
|
358
361
|
end
|
|
359
362
|
def test_sjisplit_s_to_word_kanhira()
|
|
360
|
-
doc = Document.new(NKF.nkf("
|
|
361
|
-
expected = ["
|
|
363
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語の文字"))
|
|
364
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
362
365
|
assert_equal(expected, doc.split_to_word)
|
|
363
366
|
end
|
|
364
367
|
def test_sjis_split_to_word_katahira()
|
|
365
|
-
doc = Document.new(NKF.nkf("
|
|
366
|
-
expected = ["
|
|
368
|
+
doc = Document.new(NKF.nkf("--sjis", "カタカナの文字"))
|
|
369
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
367
370
|
assert_equal(expected, doc.split_to_word)
|
|
368
371
|
end
|
|
369
372
|
def test_sjis_split_to_word_kataonbiki()
|
|
370
|
-
doc = Document.new(NKF.nkf("
|
|
371
|
-
expected = ["
|
|
373
|
+
doc = Document.new(NKF.nkf("--sjis", "ルビーの指輪"))
|
|
374
|
+
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--sjis", c)}
|
|
372
375
|
assert_equal(expected, doc.split_to_word)
|
|
373
376
|
end
|
|
374
377
|
def test_sjis_split_to_word_hiraonbiki()
|
|
375
|
-
doc = Document.new(NKF.nkf("
|
|
376
|
-
expected = ["
|
|
378
|
+
doc = Document.new(NKF.nkf("--sjis", "わールビーだ"))
|
|
379
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--sjis", c)}
|
|
377
380
|
assert_equal(expected, doc.split_to_word)
|
|
378
381
|
end
|
|
379
382
|
def test_sjis_split_to_word_latinmix()
|
|
380
|
-
doc = Document.new(NKF.nkf("
|
|
381
|
-
expected = ["
|
|
383
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語とLatinの文字"))
|
|
384
|
+
expected = ["日本語と","Latin","の","文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
382
385
|
assert_equal(expected, doc.split_to_word)
|
|
383
386
|
end
|
|
384
387
|
def test_sjis_split_to_char()
|
|
385
|
-
doc = Document.new(NKF.nkf("
|
|
386
|
-
expected = ["
|
|
388
|
+
doc = Document.new(NKF.nkf("--sjis", "表計算a b"))
|
|
389
|
+
expected = ["表","計","算","a"," ","b"].map{|c|NKF.nkf("--sjis",c)}
|
|
387
390
|
assert_equal(expected, doc.split_to_char)
|
|
388
391
|
end
|
|
389
392
|
def test_sjis_split_to_char_with_cr()
|
|
390
|
-
doc = Document.new(NKF.nkf("
|
|
391
|
-
expected = ["
|
|
393
|
+
doc = Document.new(NKF.nkf("--sjis", "表計算a b\r"))
|
|
394
|
+
expected = ["表","計","算","a"," ","b","\r"].map{|c|NKF.nkf("--sjis",c)}
|
|
392
395
|
assert_equal(expected, doc.split_to_char)
|
|
393
396
|
end
|
|
394
397
|
def test_sjis_split_to_char_with_lf()
|
|
395
|
-
doc = Document.new(NKF.nkf("
|
|
396
|
-
expected = ["
|
|
398
|
+
doc = Document.new(NKF.nkf("--sjis", "表計算a b\n"))
|
|
399
|
+
expected = ["表","計","算","a"," ","b","\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
397
400
|
assert_equal(expected, doc.split_to_char)
|
|
398
401
|
end
|
|
399
402
|
def test_sjis_split_to_char_with_crlf()
|
|
400
|
-
doc = Document.new(NKF.nkf("
|
|
401
|
-
expected = ["
|
|
403
|
+
doc = Document.new(NKF.nkf("--sjis", "表計算a b\r\n"))
|
|
404
|
+
expected = ["表","計","算","a"," ","b","\r\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
402
405
|
assert_equal(expected, doc.split_to_char)
|
|
403
406
|
end
|
|
404
407
|
def test_sjis_count_char()
|
|
405
|
-
doc = Document.new(NKF.nkf("
|
|
408
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
406
409
|
expected = 7
|
|
407
410
|
assert_equal(expected, doc.count_char)
|
|
408
411
|
end
|
|
409
412
|
def test_sjis_count_latin_graph_char()
|
|
410
|
-
doc = Document.new(NKF.nkf("
|
|
413
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
411
414
|
expected = 2
|
|
412
415
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
413
416
|
end
|
|
414
417
|
def test_sjis_count_ja_graph_char()
|
|
415
|
-
doc = Document.new(NKF.nkf("
|
|
418
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
416
419
|
expected = 3
|
|
417
420
|
assert_equal(expected, doc.count_ja_graph_char)
|
|
418
421
|
end
|
|
419
422
|
def test_sjis_count_graph_char()
|
|
420
|
-
doc = Document.new(NKF.nkf("
|
|
423
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
421
424
|
expected = 5
|
|
422
425
|
assert_equal(expected, doc.count_graph_char)
|
|
423
426
|
end
|
|
424
427
|
def test_sjis_count_latin_blank_char()
|
|
425
|
-
doc = Document.new(NKF.nkf("
|
|
428
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\ta b\r\n"))
|
|
426
429
|
expected = 2
|
|
427
430
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
428
431
|
end
|
|
429
432
|
def test_sjis_count_ja_blank_char()
|
|
430
|
-
doc = Document.new(NKF.nkf("
|
|
433
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
|
|
431
434
|
expected = 1
|
|
432
435
|
assert_equal(expected, doc.count_ja_blank_char)
|
|
433
436
|
end
|
|
434
437
|
def test_sjis_count_blank_char()
|
|
435
|
-
doc = Document.new(NKF.nkf("
|
|
438
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
|
|
436
439
|
expected = 3
|
|
437
440
|
assert_equal(expected, doc.count_blank_char)
|
|
438
441
|
end
|
|
439
442
|
def test_sjis_count_word()
|
|
440
|
-
doc = Document.new(NKF.nkf("
|
|
443
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
441
444
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
442
445
|
assert_equal(expected, doc.count_word)
|
|
443
446
|
end
|
|
444
447
|
def test_sjis_count_ja_word()
|
|
445
|
-
doc = Document.new(NKF.nkf("
|
|
448
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
446
449
|
expected = 3
|
|
447
450
|
assert_equal(expected, doc.count_ja_word)
|
|
448
451
|
end
|
|
449
452
|
def test_sjis_count_latin_valid_word()
|
|
450
|
-
doc = Document.new(NKF.nkf("
|
|
453
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
451
454
|
expected = 2
|
|
452
455
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
453
456
|
end
|
|
454
457
|
def test_sjis_count_ja_valid_word()
|
|
455
|
-
doc = Document.new(NKF.nkf("
|
|
458
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
456
459
|
expected = 2
|
|
457
460
|
assert_equal(expected, doc.count_ja_valid_word)
|
|
458
461
|
end
|
|
459
462
|
def test_sjis_count_valid_word()
|
|
460
|
-
doc = Document.new(NKF.nkf("
|
|
463
|
+
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
461
464
|
expected = 4
|
|
462
465
|
assert_equal(expected, doc.count_valid_word)
|
|
463
466
|
end
|
|
464
467
|
def test_sjis_count_line()
|
|
465
|
-
doc = Document.new(NKF.nkf("
|
|
468
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
466
469
|
expected = 6
|
|
467
470
|
assert_equal(expected, doc.count_line)
|
|
468
471
|
end
|
|
469
472
|
def test_sjis_count_graph_line()
|
|
470
|
-
doc = Document.new(NKF.nkf("
|
|
473
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
471
474
|
expected = 3
|
|
472
475
|
assert_equal(expected, doc.count_graph_line)
|
|
473
476
|
end
|
|
474
477
|
def test_sjis_count_empty_line()
|
|
475
|
-
doc = Document.new(NKF.nkf("
|
|
478
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
476
479
|
expected = 1
|
|
477
480
|
assert_equal(expected, doc.count_empty_line)
|
|
478
481
|
end
|
|
479
482
|
def test_sjis_count_blank_line()
|
|
480
|
-
doc = Document.new(NKF.nkf("
|
|
483
|
+
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
481
484
|
expected = 2
|
|
482
485
|
assert_equal(expected, doc.count_blank_line)
|
|
483
486
|
end
|
|
484
487
|
|
|
485
488
|
# test UTF8 module
|
|
486
489
|
def test_utf8_split_to_word()
|
|
487
|
-
doc = Document.new(NKF.nkf("
|
|
488
|
-
expected = ["
|
|
490
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語の文字foo bar"))
|
|
491
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map{|c| NKF.nkf("--utf8", c)}
|
|
489
492
|
assert_equal(expected, doc.split_to_word)
|
|
490
493
|
end
|
|
491
494
|
def test_utf8_split_to_word_kanhira()
|
|
492
|
-
doc = Document.new(NKF.nkf("
|
|
493
|
-
expected = ["
|
|
495
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語の文字"))
|
|
496
|
+
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
494
497
|
assert_equal(expected, doc.split_to_word)
|
|
495
498
|
end
|
|
496
499
|
def test_utf8_split_to_word_katahira()
|
|
497
|
-
doc = Document.new(NKF.nkf("
|
|
498
|
-
expected = ["
|
|
500
|
+
doc = Document.new(NKF.nkf("--utf8", "カタカナの文字"))
|
|
501
|
+
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
499
502
|
assert_equal(expected, doc.split_to_word)
|
|
500
503
|
end
|
|
501
504
|
def test_utf8_split_to_word_kataonbiki()
|
|
502
|
-
doc = Document.new(NKF.nkf("
|
|
503
|
-
expected = ["
|
|
505
|
+
doc = Document.new(NKF.nkf("--utf8", "ルビーの指輪"))
|
|
506
|
+
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--utf8", c)}
|
|
504
507
|
assert_equal(expected, doc.split_to_word)
|
|
505
508
|
end
|
|
506
509
|
def test_utf8_split_to_word_hiraonbiki()
|
|
507
|
-
doc = Document.new(NKF.nkf("
|
|
508
|
-
expected = ["
|
|
510
|
+
doc = Document.new(NKF.nkf("--utf8", "わールビーだ"))
|
|
511
|
+
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--utf8", c)}
|
|
509
512
|
assert_equal(expected, doc.split_to_word)
|
|
510
513
|
end
|
|
511
514
|
def test_utf8_split_to_word_latinmix()
|
|
512
|
-
doc = Document.new(NKF.nkf("
|
|
513
|
-
expected = ["
|
|
515
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語とLatinの文字"))
|
|
516
|
+
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
514
517
|
assert_equal(expected, doc.split_to_word)
|
|
515
518
|
end
|
|
516
519
|
def test_utf8_split_to_char()
|
|
517
|
-
doc = Document.new(NKF.nkf("
|
|
518
|
-
expected = ["
|
|
520
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b"), "UTF-8")
|
|
521
|
+
expected = ["日", "本", "語", "a", " ", "b"].map{|c| NKF.nkf("--utf8", c)}
|
|
519
522
|
assert_equal(expected, doc.split_to_char)
|
|
520
523
|
end
|
|
521
524
|
def test_utf8_split_to_char_with_cr()
|
|
522
|
-
doc = Document.new(NKF.nkf("
|
|
523
|
-
expected = ["
|
|
525
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r"), "UTF-8")
|
|
526
|
+
expected = ["日","本","語","a"," ","b","\r"].map{|c| NKF.nkf("--utf8", c)}
|
|
524
527
|
assert_equal(expected, doc.split_to_char)
|
|
525
528
|
end
|
|
526
529
|
def test_utf8_split_to_char_with_lf()
|
|
527
|
-
doc = Document.new(NKF.nkf("
|
|
528
|
-
expected = ["
|
|
530
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\n"), "UTF-8")
|
|
531
|
+
expected = ["日","本","語","a"," ","b","\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
529
532
|
assert_equal(expected, doc.split_to_char)
|
|
530
533
|
end
|
|
531
534
|
def test_utf8_split_to_char_with_crlf()
|
|
532
|
-
doc = Document.new(NKF.nkf("
|
|
533
|
-
expected = ["
|
|
535
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
536
|
+
expected = ["日","本","語","a"," ","b","\r\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
534
537
|
assert_equal(expected, doc.split_to_char)
|
|
535
538
|
end
|
|
536
539
|
def test_utf8_count_char()
|
|
537
|
-
doc = Document.new(NKF.nkf("
|
|
540
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
538
541
|
expected = 7
|
|
539
542
|
assert_equal(expected, doc.count_char)
|
|
540
543
|
end
|
|
541
544
|
def test_utf8_count_latin_graph_char()
|
|
542
|
-
doc = Document.new(NKF.nkf("
|
|
545
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
543
546
|
expected = 2
|
|
544
547
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
545
548
|
end
|
|
546
549
|
def test_utf8_count_ja_graph_char()
|
|
547
|
-
doc = Document.new(NKF.nkf("
|
|
550
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
548
551
|
expected = 3
|
|
549
552
|
assert_equal(expected, doc.count_ja_graph_char)
|
|
550
553
|
end
|
|
551
554
|
def test_utf8_count_graph_char()
|
|
552
|
-
doc = Document.new(NKF.nkf("
|
|
555
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
553
556
|
expected = 5
|
|
554
557
|
assert_equal(expected, doc.count_graph_char)
|
|
555
558
|
end
|
|
556
559
|
def test_utf8_count_latin_blank_char()
|
|
557
|
-
doc = Document.new(NKF.nkf("
|
|
560
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\ta b\r\n"))
|
|
558
561
|
expected = 2
|
|
559
562
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
560
563
|
end
|
|
561
564
|
def test_utf8_count_ja_blank_char()
|
|
562
|
-
doc = Document.new(NKF.nkf("
|
|
565
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
|
|
563
566
|
expected = 1
|
|
564
567
|
assert_equal(expected, doc.count_ja_blank_char)
|
|
565
568
|
end
|
|
566
569
|
def test_utf8_count_blank_char()
|
|
567
|
-
doc = Document.new(NKF.nkf("
|
|
570
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
|
|
568
571
|
expected = 3
|
|
569
572
|
assert_equal(expected, doc.count_blank_char)
|
|
570
573
|
end
|
|
571
574
|
def test_utf8_count_word()
|
|
572
|
-
doc = Document.new(NKF.nkf("
|
|
575
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
573
576
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
574
577
|
assert_equal(expected, doc.count_word)
|
|
575
578
|
end
|
|
576
579
|
def test_utf8_count_ja_word()
|
|
577
|
-
doc = Document.new(NKF.nkf("
|
|
580
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
578
581
|
expected = 3
|
|
579
582
|
assert_equal(expected, doc.count_ja_word)
|
|
580
583
|
end
|
|
581
584
|
def test_utf8_count_latin_valid_word()
|
|
582
|
-
doc = Document.new(NKF.nkf("
|
|
585
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
583
586
|
expected = 2
|
|
584
587
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
585
588
|
end
|
|
586
589
|
def test_utf8_count_ja_valid_word()
|
|
587
|
-
doc = Document.new(NKF.nkf("
|
|
590
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
588
591
|
expected = 2
|
|
589
592
|
assert_equal(expected, doc.count_ja_valid_word)
|
|
590
593
|
end
|
|
591
594
|
def test_utf8_count_valid_word()
|
|
592
|
-
doc = Document.new(NKF.nkf("
|
|
595
|
+
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
593
596
|
expected = 4
|
|
594
597
|
assert_equal(expected, doc.count_valid_word)
|
|
595
598
|
end
|
|
596
599
|
def test_utf8_count_line()
|
|
597
|
-
doc = Document.new(NKF.nkf("
|
|
600
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
598
601
|
expected = 6
|
|
599
602
|
assert_equal(expected, doc.count_line)
|
|
600
603
|
end
|
|
601
604
|
def test_utf8_count_graph_line()
|
|
602
|
-
doc = Document.new(NKF.nkf("
|
|
605
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
603
606
|
expected = 3
|
|
604
607
|
assert_equal(expected, doc.count_graph_line)
|
|
605
608
|
end
|
|
606
609
|
def test_utf8_count_empty_line()
|
|
607
|
-
doc = Document.new(NKF.nkf("
|
|
610
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
608
611
|
expected = 1
|
|
609
612
|
assert_equal(expected, doc.count_empty_line)
|
|
610
613
|
end
|
|
611
614
|
def test_utf8_count_blank_line()
|
|
612
|
-
doc = Document.new(NKF.nkf("
|
|
615
|
+
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
613
616
|
expected = 2
|
|
614
617
|
assert_equal(expected, doc.count_blank_line)
|
|
615
618
|
end
|