docdiff 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -7
  3. data/Guardfile +4 -4
  4. data/Makefile +1 -1
  5. data/Rakefile +6 -6
  6. data/bin/docdiff +1 -1
  7. data/devutil/Rakefile +12 -5
  8. data/devutil/char_by_charclass.rb +43 -20
  9. data/devutil/charclass_by_char.rb +40 -19
  10. data/devutil/jis0208.rb +263 -231
  11. data/devutil/jis0208_test.rb +196 -0
  12. data/doc/news.md +8 -0
  13. data/docdiff.gemspec +12 -10
  14. data/lib/doc_diff.rb +59 -60
  15. data/lib/docdiff/charstring.rb +225 -241
  16. data/lib/docdiff/cli.rb +285 -250
  17. data/lib/docdiff/diff/contours.rb +1 -1
  18. data/lib/docdiff/diff/editscript.rb +1 -1
  19. data/lib/docdiff/diff/rcsdiff.rb +1 -1
  20. data/lib/docdiff/diff/shortestpath.rb +1 -1
  21. data/lib/docdiff/diff/speculative.rb +1 -1
  22. data/lib/docdiff/diff/subsequence.rb +1 -1
  23. data/lib/docdiff/diff/unidiff.rb +1 -1
  24. data/lib/docdiff/diff.rb +1 -1
  25. data/lib/docdiff/difference.rb +71 -70
  26. data/lib/docdiff/document.rb +129 -109
  27. data/lib/docdiff/encoding/en_ascii.rb +64 -58
  28. data/lib/docdiff/encoding/ja_eucjp.rb +250 -235
  29. data/lib/docdiff/encoding/ja_sjis.rb +240 -226
  30. data/lib/docdiff/encoding/ja_utf8.rb +6952 -6939
  31. data/lib/docdiff/version.rb +1 -1
  32. data/lib/docdiff/view.rb +522 -438
  33. data/lib/docdiff.rb +2 -2
  34. data/test/charstring_test.rb +475 -351
  35. data/test/cli_test.rb +103 -101
  36. data/test/diff_test.rb +15 -16
  37. data/test/difference_test.rb +40 -31
  38. data/test/docdiff_test.rb +162 -136
  39. data/test/document_test.rb +280 -175
  40. data/test/test_helper.rb +2 -1
  41. data/test/view_test.rb +636 -497
  42. metadata +8 -8
  43. data/devutil/testjis0208.rb +0 -38
@@ -3,625 +3,730 @@
3
3
 
4
4
  # frozen_string_literal: false
5
5
 
6
- require 'test/unit'
7
- require 'docdiff/document'
8
- require 'nkf'
6
+ require "test/unit"
7
+ require "docdiff/document"
8
+ require "nkf"
9
9
 
10
- class TC_DocDiff_Document < Test::Unit::TestCase
10
+ class TestDocument < Test::Unit::TestCase
11
11
  Document = DocDiff::Document
12
12
  CharString = DocDiff::CharString
13
13
 
14
- def setup()
15
- #
14
+ def setup
16
15
  end
17
16
 
18
- def test_encoding()
17
+ def test_encoding
19
18
  doc = Document.new("Foo bar.\nBaz quux.")
20
- doc.encoding = 'US-ASCII'
21
- doc.eol = 'LF'
22
- expected = 'US-ASCII'
19
+ doc.encoding = "US-ASCII"
20
+ doc.eol = "LF"
21
+ expected = "US-ASCII"
23
22
  assert_equal(expected, doc.encoding)
24
23
  end
25
- def test_encoding_auto()
24
+
25
+ def test_encoding_auto
26
26
  doc = Document.new("Foo bar.\nBaz quux.".encode("US-ASCII"))
27
- expected = 'US-ASCII'
27
+ expected = "US-ASCII"
28
28
  assert_equal(expected, doc.encoding)
29
29
  end
30
- def test_eol()
30
+
31
+ def test_eol
31
32
  doc = Document.new("Foo bar.\nBaz quux.")
32
- doc.encoding = 'US-ASCII'
33
- doc.eol = 'LF'
34
- expected = 'LF'
33
+ doc.encoding = "US-ASCII"
34
+ doc.eol = "LF"
35
+ expected = "LF"
35
36
  assert_equal(expected, doc.eol)
36
37
  end
37
- def test_eol_auto_lf()
38
+
39
+ def test_eol_auto_lf
38
40
  doc = Document.new("Foo bar.\nBaz quux.")
39
- expected = 'LF'
41
+ expected = "LF"
40
42
  assert_equal(expected, doc.eol)
41
43
  end
42
- def test_eol_auto_none()
44
+
45
+ def test_eol_auto_none
43
46
  doc = Document.new("Foo bar.")
44
47
  expected = "NONE"
45
48
  assert_equal(expected, doc.eol)
46
49
  end
47
- def test_eol_char_lf()
50
+
51
+ def test_eol_char_lf
48
52
  doc = Document.new("Foo bar.\nBaz quux.")
49
- # doc.encoding = "US-ASCII"
50
- # doc.eol = "LF"
53
+ # doc.encoding = "US-ASCII"
54
+ # doc.eol = "LF"
51
55
  expected = "\n"
52
56
  assert_equal(expected, doc.eol_char)
53
57
  end
54
- def test_split_by_line()
58
+
59
+ def test_split_by_line
55
60
  doc = Document.new("Hello, my name is Watanabe.\nI am just another Ruby porter.\n")
56
61
  expected = ["Hello, my name is Watanabe.\n", "I am just another Ruby porter.\n"]
57
62
  assert_equal(expected, doc.split_to_line)
58
63
  end
59
64
 
60
65
  # test eol split_to_line() method
61
- def test_cr_split_to_line()
66
+ def test_cr_split_to_line
62
67
  doc = Document.new("foo\rbar\r")
63
68
  expected = ["foo\r", "bar\r"]
64
69
  assert_equal(expected, doc.split_to_line)
65
70
  end
66
- def test_cr_split_to_line_chomped_lastline()
71
+
72
+ def test_cr_split_to_line_chomped_lastline
67
73
  doc = Document.new("foo\rbar")
68
74
  expected = ["foo\r", "bar"]
69
75
  assert_equal(expected, doc.split_to_line)
70
76
  end
71
- def test_cr_split_to_line_empty_line()
77
+
78
+ def test_cr_split_to_line_empty_line
72
79
  doc = Document.new("foo\r\rbar\r")
73
80
  expected = ["foo\r", "\r", "bar\r"]
74
81
  assert_equal(expected, doc.split_to_line)
75
82
  end
76
- def test_lf_split_to_line()
83
+
84
+ def test_lf_split_to_line
77
85
  doc = Document.new("foo\nbar\n")
78
86
  expected = ["foo\n", "bar\n"]
79
87
  assert_equal(expected, doc.split_to_line)
80
88
  end
81
- def test_lf_split_to_line_chomped_lastline()
89
+
90
+ def test_lf_split_to_line_chomped_lastline
82
91
  doc = Document.new("foo\nbar")
83
92
  expected = ["foo\n", "bar"]
84
93
  assert_equal(expected, doc.split_to_line)
85
94
  end
86
- def test_lf_split_to_line_empty_line()
95
+
96
+ def test_lf_split_to_line_empty_line
87
97
  doc = Document.new("foo\n\nbar\n")
88
98
  expected = ["foo\n", "\n", "bar\n"]
89
99
  assert_equal(expected, doc.split_to_line)
90
100
  end
91
- def test_crlf_split_to_line()
101
+
102
+ def test_crlf_split_to_line
92
103
  doc = Document.new("foo\r\nbar\r\n")
93
104
  expected = ["foo\r\n", "bar\r\n"]
94
105
  assert_equal(expected, doc.split_to_line)
95
106
  end
96
- def test_crlf_split_to_line_chomped_lastline()
107
+
108
+ def test_crlf_split_to_line_chomped_lastline
97
109
  doc = Document.new("foo\r\nbar")
98
110
  expected = ["foo\r\n", "bar"]
99
111
  assert_equal(expected, doc.split_to_line)
100
112
  end
101
- def test_crlf_split_to_line_empty_line()
113
+
114
+ def test_crlf_split_to_line_empty_line
102
115
  doc = Document.new("foo\r\n\r\nbar\r\n")
103
116
  expected = ["foo\r\n", "\r\n", "bar\r\n"]
104
117
  assert_equal(expected, doc.split_to_line)
105
118
  end
106
119
 
107
120
  # test ASCII module
108
- def test_ascii_split_to_word()
121
+ def test_ascii_split_to_word
109
122
  doc = Document.new("foo bar")
110
123
  expected = ["foo ", "bar"]
111
124
  assert_equal(expected, doc.split_to_word)
112
125
  end
113
- def test_ascii_split_to_word_withsymbol()
126
+
127
+ def test_ascii_split_to_word_withsymbol
114
128
  doc = Document.new("foo (bar) baz-baz")
115
129
  expected = ["foo ", "(bar) ", "baz-baz"]
116
130
  assert_equal(expected, doc.split_to_word)
117
131
  end
118
- def test_ascii_split_to_word_withquote()
132
+
133
+ def test_ascii_split_to_word_withquote
119
134
  doc = Document.new("foo's 'foo' \"bar\" 'baz.'")
120
135
  expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
121
136
  assert_equal(expected, doc.split_to_word)
122
137
  end
123
- def test_ascii_split_to_word_withlongspace()
138
+
139
+ def test_ascii_split_to_word_withlongspace
124
140
  doc = Document.new(" foo bar")
125
141
  expected = [" ", "foo ", " ", "bar"]
126
142
  assert_equal(expected, doc.split_to_word)
127
143
  end
128
- def test_ascii_split_to_word_withdash()
144
+
145
+ def test_ascii_split_to_word_withdash
129
146
  doc = Document.new("foo -- bar, baz - quux")
130
147
  expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
131
148
  assert_equal(expected, doc.split_to_word)
132
149
  end
133
- def test_ascii_split_to_char()
150
+
151
+ def test_ascii_split_to_char
134
152
  doc = Document.new("foo bar")
135
- expected = ["f","o","o"," ","b","a","r"]
153
+ expected = ["f", "o", "o", " ", "b", "a", "r"]
136
154
  assert_equal(expected, doc.split_to_char)
137
155
  end
138
- def test_ascii_split_to_char_with_eol_cr()
156
+
157
+ def test_ascii_split_to_char_with_eol_cr
139
158
  doc = Document.new("foo bar\r")
140
- expected = ["f","o","o"," ","b","a","r","\r"]
159
+ expected = ["f", "o", "o", " ", "b", "a", "r", "\r"]
141
160
  assert_equal(expected, doc.split_to_char)
142
161
  end
143
- def test_ascii_split_to_char_with_eol_lf()
162
+
163
+ def test_ascii_split_to_char_with_eol_lf
144
164
  doc = Document.new("foo bar\n")
145
- expected = ["f","o","o"," ","b","a","r","\n"]
165
+ expected = ["f", "o", "o", " ", "b", "a", "r", "\n"]
146
166
  assert_equal(expected, doc.split_to_char)
147
167
  end
148
- def test_ascii_split_to_char_with_eol_crlf()
168
+
169
+ def test_ascii_split_to_char_with_eol_crlf
149
170
  doc = Document.new("foo bar\r\n")
150
- expected = ["f","o","o"," ","b","a","r","\r\n"]
171
+ expected = ["f", "o", "o", " ", "b", "a", "r", "\r\n"]
151
172
  assert_equal(expected, doc.split_to_char)
152
173
  end
153
- def test_ascii_split_to_byte()
174
+
175
+ def test_ascii_split_to_byte
154
176
  doc = Document.new("foo bar\r\n")
155
- expected = ["f","o","o"," ","b","a","r","\r","\n"]
177
+ expected = ["f", "o", "o", " ", "b", "a", "r", "\r", "\n"]
156
178
  assert_equal(expected, doc.split_to_byte)
157
179
  end
158
- def test_ascii_count_byte()
180
+
181
+ def test_ascii_count_byte
159
182
  doc = Document.new("foo bar\r\n")
160
183
  expected = 9
161
184
  assert_equal(expected, doc.count_byte)
162
185
  end
163
- def test_ascii_count_char()
186
+
187
+ def test_ascii_count_char
164
188
  doc = Document.new("foo bar\r\nbaz quux\r\n")
165
189
  expected = 17
166
190
  assert_equal(expected, doc.count_char)
167
191
  end
168
- def test_ascii_count_latin_graph_char()
192
+
193
+ def test_ascii_count_latin_graph_char
169
194
  doc = Document.new("foo bar\r\nbaz quux\r\n")
170
195
  expected = 13
171
196
  assert_equal(expected, doc.count_latin_graph_char)
172
197
  end
173
- def test_ascii_count_graph_char()
198
+
199
+ def test_ascii_count_graph_char
174
200
  doc = Document.new("foo bar\r\nbaz quux\r\n")
175
201
  expected = 13
176
202
  assert_equal(expected, doc.count_graph_char)
177
203
  end
178
- def test_ascii_count_latin_blank_char()
204
+
205
+ def test_ascii_count_latin_blank_char
179
206
  doc = Document.new("foo bar\r\nbaz\tquux\r\n")
180
207
  expected = 2
181
208
  assert_equal(expected, doc.count_latin_blank_char)
182
209
  end
183
- def test_ascii_count_blank_char()
210
+
211
+ def test_ascii_count_blank_char
184
212
  doc = Document.new("foo bar\r\nbaz\tquux\r\n")
185
213
  expected = 2
186
214
  assert_equal(expected, doc.count_blank_char)
187
215
  end
188
- def test_ascii_count_word()
216
+
217
+ def test_ascii_count_word
189
218
  doc = Document.new("foo bar \r\nbaz quux\r\n")
190
219
  expected = 6
191
220
  assert_equal(expected, doc.count_word)
192
221
  end
193
- def test_ascii_count_latin_word()
222
+
223
+ def test_ascii_count_latin_word
194
224
  doc = Document.new("foo bar \r\nbaz quux\r\n")
195
- expected = 5 # " " is also counted as a word
225
+ expected = 5 # " " is also counted as a word
196
226
  assert_equal(expected, doc.count_latin_word)
197
227
  end
198
- def test_ascii_count_latin_valid_word()
228
+
229
+ def test_ascii_count_latin_valid_word
199
230
  doc = Document.new("1 foo \r\n%%% ()\r\n")
200
231
  expected = 2
201
232
  assert_equal(expected, doc.count_latin_valid_word)
202
233
  end
203
- def test_ascii_count_line()
234
+
235
+ def test_ascii_count_line
204
236
  doc = Document.new("foo\r\nbar")
205
237
  expected = 2
206
238
  assert_equal(expected, doc.count_line)
207
239
  end
208
- def test_ascii_count_graph_line()
240
+
241
+ def test_ascii_count_graph_line
209
242
  doc = Document.new("foo\r\n ")
210
243
  expected = 1
211
244
  assert_equal(expected, doc.count_graph_line)
212
245
  end
213
- def test_ascii_count_empty_line()
246
+
247
+ def test_ascii_count_empty_line
214
248
  doc = Document.new("foo\r\n \r\n\t\r\n\r\n")
215
249
  expected = 1
216
250
  assert_equal(expected, doc.count_empty_line)
217
251
  end
218
- def test_ascii_count_blank_line()
252
+
253
+ def test_ascii_count_blank_line
219
254
  doc = Document.new("\r\n \r\n\t\r\n ")
220
255
  expected = 3
221
256
  assert_equal(expected, doc.count_blank_line)
222
257
  end
223
258
 
224
259
  # test EUCJP module
225
- def test_eucjp_split_to_word()
260
+ def test_eucjp_split_to_word
226
261
  doc = Document.new(NKF.nkf("--euc", "日本語の文字foo bar"))
227
- expected = ["日本語の","文字","foo ","bar"].map{|c| NKF.nkf("--euc", c)}
262
+ expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--euc", c) }
228
263
  assert_equal(expected, doc.split_to_word)
229
264
  end
230
- def test_eucjp_split_to_word_kanhira()
265
+
266
+ def test_eucjp_split_to_word_kanhira
231
267
  doc = Document.new(NKF.nkf("--euc", "日本語の文字"))
232
- expected = ["日本語の", "文字"].map{|c| NKF.nkf("--euc", c)}
268
+ expected = ["日本語の", "文字"].map { |c| NKF.nkf("--euc", c) }
233
269
  assert_equal(expected, doc.split_to_word)
234
270
  end
235
- def test_eucjp_split_to_word_katahira()
271
+
272
+ def test_eucjp_split_to_word_katahira
236
273
  doc = Document.new(NKF.nkf("--euc", "カタカナの文字"))
237
- expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--euc", c)}
274
+ expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--euc", c) }
238
275
  assert_equal(expected, doc.split_to_word)
239
276
  end
240
- def test_eucjp_split_to_word_kataonbiki()
277
+
278
+ def test_eucjp_split_to_word_kataonbiki
241
279
  doc = Document.new(NKF.nkf("--euc", "ルビー色の石"), "EUC-JP")
242
- expected = ["ルビー", "色の", "石"].map{|c| NKF.nkf("--euc", c)}
280
+ expected = ["ルビー", "色の", "石"].map { |c| NKF.nkf("--euc", c) }
243
281
  assert_equal(expected, doc.split_to_word)
244
282
  end
245
- def test_eucjp_split_to_word_hiraonbiki()
283
+
284
+ def test_eucjp_split_to_word_hiraonbiki
246
285
  doc = Document.new(NKF.nkf("--euc", "わールビーだ"), "EUC-JP")
247
- expected = (["わー", "ルビーだ"]).map{|c| NKF.nkf("--euc", c)}
286
+ expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--euc", c) }
248
287
  assert_equal(expected, doc.split_to_word)
249
288
  end
250
- def test_eucjp_split_to_word_latinmix()
289
+
290
+ def test_eucjp_split_to_word_latinmix
251
291
  doc = Document.new(NKF.nkf("--euc", "日本語とLatinの文字"))
252
- expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--euc", c)}
292
+ expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--euc", c) }
253
293
  assert_equal(expected, doc.split_to_word)
254
294
  end
255
- def test_eucjp_split_to_char()
295
+
296
+ def test_eucjp_split_to_char
256
297
  doc = Document.new(NKF.nkf("--euc", "日本語a b"))
257
- expected = ["日","本","語","a"," ","b"].map{|c|NKF.nkf("--euc",c)}
298
+ expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--euc", c) }
258
299
  assert_equal(expected, doc.split_to_char)
259
300
  end
260
- def test_eucjp_split_to_char_with_cr()
301
+
302
+ def test_eucjp_split_to_char_with_cr
261
303
  doc = Document.new(NKF.nkf("--euc", "日本語a b\r"))
262
- expected = ["日","本","語","a"," ","b","\r"].map{|c|NKF.nkf("--euc",c)}
304
+ expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--euc", c) }
263
305
  assert_equal(expected, doc.split_to_char)
264
306
  end
265
- def test_eucjp_split_to_char_with_lf()
307
+
308
+ def test_eucjp_split_to_char_with_lf
266
309
  doc = Document.new(NKF.nkf("--euc", "日本語a b\n"))
267
- expected = ["日","本","語","a"," ","b","\n"].map{|c|NKF.nkf("--euc",c)}
310
+ expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--euc", c) }
268
311
  assert_equal(expected, doc.split_to_char)
269
312
  end
270
- def test_eucjp_split_to_char_with_crlf()
313
+
314
+ def test_eucjp_split_to_char_with_crlf
271
315
  doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
272
- expected = ["日","本","語","a"," ","b","\r\n"].map{|c|NKF.nkf("--euc",c)}
316
+ expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--euc", c) }
273
317
  assert_equal(expected, doc.split_to_char)
274
318
  end
275
- def test_eucjp_count_char()
319
+
320
+ def test_eucjp_count_char
276
321
  doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
277
322
  expected = 7
278
323
  assert_equal(expected, doc.count_char)
279
324
  end
280
- def test_eucjp_count_latin_graph_char()
325
+
326
+ def test_eucjp_count_latin_graph_char
281
327
  doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
282
328
  expected = 2
283
329
  assert_equal(expected, doc.count_latin_graph_char)
284
330
  end
285
- def test_eucjp_count_ja_graph_char()
331
+
332
+ def test_eucjp_count_ja_graph_char
286
333
  doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
287
334
  expected = 3
288
335
  assert_equal(expected, doc.count_ja_graph_char)
289
336
  end
290
- def test_eucjp_count_graph_char()
337
+
338
+ def test_eucjp_count_graph_char
291
339
  doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
292
340
  expected = 5
293
341
  assert_equal(expected, doc.count_graph_char)
294
342
  end
295
- def test_eucjp_count_latin_blank_char()
343
+
344
+ def test_eucjp_count_latin_blank_char
296
345
  doc = Document.new(NKF.nkf("--euc", "日本語\ta b\r\n"))
297
346
  expected = 2
298
347
  assert_equal(expected, doc.count_latin_blank_char)
299
348
  end
300
- def test_eucjp_count_ja_blank_char()
349
+
350
+ def test_eucjp_count_ja_blank_char
301
351
  doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
302
352
  expected = 1
303
353
  assert_equal(expected, doc.count_ja_blank_char)
304
354
  end
305
- def test_eucjp_count_blank_char()
355
+
356
+ def test_eucjp_count_blank_char
306
357
  doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
307
358
  expected = 3
308
359
  assert_equal(expected, doc.count_blank_char)
309
360
  end
310
- def test_eucjp_count_word()
361
+
362
+ def test_eucjp_count_word
311
363
  doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
312
364
  expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
313
365
  assert_equal(expected, doc.count_word)
314
366
  end
315
- def test_eucjp_count_ja_word()
367
+
368
+ def test_eucjp_count_ja_word
316
369
  doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
317
370
  expected = 3
318
371
  assert_equal(expected, doc.count_ja_word)
319
372
  end
320
- def test_eucjp_count_latin_valid_word()
373
+
374
+ def test_eucjp_count_latin_valid_word
321
375
  doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
322
376
  expected = 2
323
377
  assert_equal(expected, doc.count_latin_valid_word)
324
378
  end
325
- def test_eucjp_count_ja_valid_word()
379
+
380
+ def test_eucjp_count_ja_valid_word
326
381
  doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
327
382
  expected = 2
328
383
  assert_equal(expected, doc.count_ja_valid_word)
329
384
  end
330
- def test_eucjp_count_valid_word()
385
+
386
+ def test_eucjp_count_valid_word
331
387
  doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
332
388
  expected = 4
333
389
  assert_equal(expected, doc.count_valid_word)
334
390
  end
335
- def test_eucjp_count_line()
391
+
392
+ def test_eucjp_count_line
336
393
  doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
337
394
  expected = 6
338
395
  assert_equal(expected, doc.count_line)
339
396
  end
340
- def test_eucjp_count_graph_line()
397
+
398
+ def test_eucjp_count_graph_line
341
399
  doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
342
400
  expected = 3
343
401
  assert_equal(expected, doc.count_graph_line)
344
402
  end
345
- def test_eucjp_count_empty_line()
403
+
404
+ def test_eucjp_count_empty_line
346
405
  doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
347
406
  expected = 1
348
407
  assert_equal(expected, doc.count_empty_line)
349
408
  end
350
- def test_eucjp_count_blank_line()
409
+
410
+ def test_eucjp_count_blank_line
351
411
  doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
352
412
  expected = 2
353
413
  assert_equal(expected, doc.count_blank_line)
354
414
  end
355
415
 
356
416
  # test SJIS module
357
- def test_sjis_split_to_word()
417
+ def test_sjis_split_to_word
358
418
  doc = Document.new(NKF.nkf("--sjis", "日本語の文字foo bar"))
359
- expected = ["日本語の", "文字", "foo ", "bar"].map{|c|NKF.nkf("--sjis",c)}
419
+ expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--sjis", c) }
360
420
  assert_equal(expected, doc.split_to_word)
361
421
  end
362
- def test_sjisplit_s_to_word_kanhira()
422
+
423
+ def test_sjisplit_s_to_word_kanhira
363
424
  doc = Document.new(NKF.nkf("--sjis", "日本語の文字"))
364
- expected = ["日本語の", "文字"].map{|c| NKF.nkf("--sjis", c)}
425
+ expected = ["日本語の", "文字"].map { |c| NKF.nkf("--sjis", c) }
365
426
  assert_equal(expected, doc.split_to_word)
366
427
  end
367
- def test_sjis_split_to_word_katahira()
428
+
429
+ def test_sjis_split_to_word_katahira
368
430
  doc = Document.new(NKF.nkf("--sjis", "カタカナの文字"))
369
- expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--sjis", c)}
431
+ expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--sjis", c) }
370
432
  assert_equal(expected, doc.split_to_word)
371
433
  end
372
- def test_sjis_split_to_word_kataonbiki()
434
+
435
+ def test_sjis_split_to_word_kataonbiki
373
436
  doc = Document.new(NKF.nkf("--sjis", "ルビーの指輪"))
374
- expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--sjis", c)}
437
+ expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--sjis", c) }
375
438
  assert_equal(expected, doc.split_to_word)
376
439
  end
377
- def test_sjis_split_to_word_hiraonbiki()
440
+
441
+ def test_sjis_split_to_word_hiraonbiki
378
442
  doc = Document.new(NKF.nkf("--sjis", "わールビーだ"))
379
- expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--sjis", c)}
443
+ expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--sjis", c) }
380
444
  assert_equal(expected, doc.split_to_word)
381
445
  end
382
- def test_sjis_split_to_word_latinmix()
446
+
447
+ def test_sjis_split_to_word_latinmix
383
448
  doc = Document.new(NKF.nkf("--sjis", "日本語とLatinの文字"))
384
- expected = ["日本語と","Latin","の","文字"].map{|c| NKF.nkf("--sjis", c)}
449
+ expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--sjis", c) }
385
450
  assert_equal(expected, doc.split_to_word)
386
451
  end
387
- def test_sjis_split_to_char()
452
+
453
+ def test_sjis_split_to_char
388
454
  doc = Document.new(NKF.nkf("--sjis", "表計算a b"))
389
- expected = ["表","計","算","a"," ","b"].map{|c|NKF.nkf("--sjis",c)}
455
+ expected = ["表", "計", "算", "a", " ", "b"].map { |c| NKF.nkf("--sjis", c) }
390
456
  assert_equal(expected, doc.split_to_char)
391
457
  end
392
- def test_sjis_split_to_char_with_cr()
458
+
459
+ def test_sjis_split_to_char_with_cr
393
460
  doc = Document.new(NKF.nkf("--sjis", "表計算a b\r"))
394
- expected = ["表","計","算","a"," ","b","\r"].map{|c|NKF.nkf("--sjis",c)}
461
+ expected = ["表", "計", "算", "a", " ", "b", "\r"].map { |c| NKF.nkf("--sjis", c) }
395
462
  assert_equal(expected, doc.split_to_char)
396
463
  end
397
- def test_sjis_split_to_char_with_lf()
464
+
465
+ def test_sjis_split_to_char_with_lf
398
466
  doc = Document.new(NKF.nkf("--sjis", "表計算a b\n"))
399
- expected = ["表","計","算","a"," ","b","\n"].map{|c|NKF.nkf("--sjis",c)}
467
+ expected = ["表", "計", "算", "a", " ", "b", "\n"].map { |c| NKF.nkf("--sjis", c) }
400
468
  assert_equal(expected, doc.split_to_char)
401
469
  end
402
- def test_sjis_split_to_char_with_crlf()
470
+
471
+ def test_sjis_split_to_char_with_crlf
403
472
  doc = Document.new(NKF.nkf("--sjis", "表計算a b\r\n"))
404
- expected = ["表","計","算","a"," ","b","\r\n"].map{|c|NKF.nkf("--sjis",c)}
473
+ expected = ["表", "計", "算", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--sjis", c) }
405
474
  assert_equal(expected, doc.split_to_char)
406
475
  end
407
- def test_sjis_count_char()
476
+
477
+ def test_sjis_count_char
408
478
  doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
409
479
  expected = 7
410
480
  assert_equal(expected, doc.count_char)
411
481
  end
412
- def test_sjis_count_latin_graph_char()
482
+
483
+ def test_sjis_count_latin_graph_char
413
484
  doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
414
485
  expected = 2
415
486
  assert_equal(expected, doc.count_latin_graph_char)
416
487
  end
417
- def test_sjis_count_ja_graph_char()
488
+
489
+ def test_sjis_count_ja_graph_char
418
490
  doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
419
491
  expected = 3
420
492
  assert_equal(expected, doc.count_ja_graph_char)
421
493
  end
422
- def test_sjis_count_graph_char()
494
+
495
+ def test_sjis_count_graph_char
423
496
  doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
424
497
  expected = 5
425
498
  assert_equal(expected, doc.count_graph_char)
426
499
  end
427
- def test_sjis_count_latin_blank_char()
500
+
501
+ def test_sjis_count_latin_blank_char
428
502
  doc = Document.new(NKF.nkf("--sjis", "日本語\ta b\r\n"))
429
503
  expected = 2
430
504
  assert_equal(expected, doc.count_latin_blank_char)
431
505
  end
432
- def test_sjis_count_ja_blank_char()
506
+
507
+ def test_sjis_count_ja_blank_char
433
508
  doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
434
509
  expected = 1
435
510
  assert_equal(expected, doc.count_ja_blank_char)
436
511
  end
437
- def test_sjis_count_blank_char()
512
+
513
+ def test_sjis_count_blank_char
438
514
  doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
439
515
  expected = 3
440
516
  assert_equal(expected, doc.count_blank_char)
441
517
  end
442
- def test_sjis_count_word()
518
+
519
+ def test_sjis_count_word
443
520
  doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
444
521
  expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
445
522
  assert_equal(expected, doc.count_word)
446
523
  end
447
- def test_sjis_count_ja_word()
524
+
525
+ def test_sjis_count_ja_word
448
526
  doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
449
527
  expected = 3
450
528
  assert_equal(expected, doc.count_ja_word)
451
529
  end
452
- def test_sjis_count_latin_valid_word()
530
+
531
+ def test_sjis_count_latin_valid_word
453
532
  doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
454
533
  expected = 2
455
534
  assert_equal(expected, doc.count_latin_valid_word)
456
535
  end
457
- def test_sjis_count_ja_valid_word()
536
+
537
+ def test_sjis_count_ja_valid_word
458
538
  doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
459
539
  expected = 2
460
540
  assert_equal(expected, doc.count_ja_valid_word)
461
541
  end
462
- def test_sjis_count_valid_word()
542
+
543
+ def test_sjis_count_valid_word
463
544
  doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
464
545
  expected = 4
465
546
  assert_equal(expected, doc.count_valid_word)
466
547
  end
467
- def test_sjis_count_line()
548
+
549
+ def test_sjis_count_line
468
550
  doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
469
551
  expected = 6
470
552
  assert_equal(expected, doc.count_line)
471
553
  end
472
- def test_sjis_count_graph_line()
554
+
555
+ def test_sjis_count_graph_line
473
556
  doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
474
557
  expected = 3
475
558
  assert_equal(expected, doc.count_graph_line)
476
559
  end
477
- def test_sjis_count_empty_line()
560
+
561
+ def test_sjis_count_empty_line
478
562
  doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
479
563
  expected = 1
480
564
  assert_equal(expected, doc.count_empty_line)
481
565
  end
482
- def test_sjis_count_blank_line()
566
+
567
+ def test_sjis_count_blank_line
483
568
  doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
484
569
  expected = 2
485
570
  assert_equal(expected, doc.count_blank_line)
486
571
  end
487
572
 
488
573
  # test UTF8 module
489
- def test_utf8_split_to_word()
574
+ def test_utf8_split_to_word
490
575
  doc = Document.new(NKF.nkf("--utf8", "日本語の文字foo bar"))
491
- expected = ["日本語の", "文字", "foo ", "bar"].map{|c| NKF.nkf("--utf8", c)}
576
+ expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--utf8", c) }
492
577
  assert_equal(expected, doc.split_to_word)
493
578
  end
494
- def test_utf8_split_to_word_kanhira()
579
+
580
+ def test_utf8_split_to_word_kanhira
495
581
  doc = Document.new(NKF.nkf("--utf8", "日本語の文字"))
496
- expected = ["日本語の", "文字"].map{|c| NKF.nkf("--utf8", c)}
582
+ expected = ["日本語の", "文字"].map { |c| NKF.nkf("--utf8", c) }
497
583
  assert_equal(expected, doc.split_to_word)
498
584
  end
499
- def test_utf8_split_to_word_katahira()
585
+
586
+ def test_utf8_split_to_word_katahira
500
587
  doc = Document.new(NKF.nkf("--utf8", "カタカナの文字"))
501
- expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--utf8", c)}
588
+ expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--utf8", c) }
502
589
  assert_equal(expected, doc.split_to_word)
503
590
  end
504
- def test_utf8_split_to_word_kataonbiki()
591
+
592
+ def test_utf8_split_to_word_kataonbiki
505
593
  doc = Document.new(NKF.nkf("--utf8", "ルビーの指輪"))
506
- expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--utf8", c)}
594
+ expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--utf8", c) }
507
595
  assert_equal(expected, doc.split_to_word)
508
596
  end
509
- def test_utf8_split_to_word_hiraonbiki()
597
+
598
+ def test_utf8_split_to_word_hiraonbiki
510
599
  doc = Document.new(NKF.nkf("--utf8", "わールビーだ"))
511
- expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--utf8", c)}
600
+ expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--utf8", c) }
512
601
  assert_equal(expected, doc.split_to_word)
513
602
  end
514
- def test_utf8_split_to_word_latinmix()
603
+
604
+ def test_utf8_split_to_word_latinmix
515
605
  doc = Document.new(NKF.nkf("--utf8", "日本語とLatinの文字"))
516
- expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--utf8", c)}
606
+ expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--utf8", c) }
517
607
  assert_equal(expected, doc.split_to_word)
518
608
  end
519
- def test_utf8_split_to_char()
609
+
610
+ def test_utf8_split_to_char
520
611
  doc = Document.new(NKF.nkf("--utf8", "日本語a b"), "UTF-8")
521
- expected = ["日", "本", "語", "a", " ", "b"].map{|c| NKF.nkf("--utf8", c)}
612
+ expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--utf8", c) }
522
613
  assert_equal(expected, doc.split_to_char)
523
614
  end
524
- def test_utf8_split_to_char_with_cr()
615
+
616
+ def test_utf8_split_to_char_with_cr
525
617
  doc = Document.new(NKF.nkf("--utf8", "日本語a b\r"), "UTF-8")
526
- expected = ["日","本","語","a"," ","b","\r"].map{|c| NKF.nkf("--utf8", c)}
618
+ expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--utf8", c) }
527
619
  assert_equal(expected, doc.split_to_char)
528
620
  end
529
- def test_utf8_split_to_char_with_lf()
621
+
622
+ def test_utf8_split_to_char_with_lf
530
623
  doc = Document.new(NKF.nkf("--utf8", "日本語a b\n"), "UTF-8")
531
- expected = ["日","本","語","a"," ","b","\n"].map{|c| NKF.nkf("--utf8", c)}
624
+ expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--utf8", c) }
532
625
  assert_equal(expected, doc.split_to_char)
533
626
  end
534
- def test_utf8_split_to_char_with_crlf()
627
+
628
+ def test_utf8_split_to_char_with_crlf
535
629
  doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
536
- expected = ["日","本","語","a"," ","b","\r\n"].map{|c| NKF.nkf("--utf8", c)}
630
+ expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--utf8", c) }
537
631
  assert_equal(expected, doc.split_to_char)
538
632
  end
539
- def test_utf8_count_char()
633
+
634
+ def test_utf8_count_char
540
635
  doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
541
636
  expected = 7
542
637
  assert_equal(expected, doc.count_char)
543
638
  end
544
- def test_utf8_count_latin_graph_char()
639
+
640
+ def test_utf8_count_latin_graph_char
545
641
  doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
546
642
  expected = 2
547
643
  assert_equal(expected, doc.count_latin_graph_char)
548
644
  end
549
- def test_utf8_count_ja_graph_char()
645
+
646
+ def test_utf8_count_ja_graph_char
550
647
  doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
551
648
  expected = 3
552
649
  assert_equal(expected, doc.count_ja_graph_char)
553
650
  end
554
- def test_utf8_count_graph_char()
651
+
652
+ def test_utf8_count_graph_char
555
653
  doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
556
654
  expected = 5
557
655
  assert_equal(expected, doc.count_graph_char)
558
656
  end
559
- def test_utf8_count_latin_blank_char()
657
+
658
+ def test_utf8_count_latin_blank_char
560
659
  doc = Document.new(NKF.nkf("--utf8", "日本語\ta b\r\n"))
561
660
  expected = 2
562
661
  assert_equal(expected, doc.count_latin_blank_char)
563
662
  end
564
- def test_utf8_count_ja_blank_char()
663
+
664
+ def test_utf8_count_ja_blank_char
565
665
  doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
566
666
  expected = 1
567
667
  assert_equal(expected, doc.count_ja_blank_char)
568
668
  end
569
- def test_utf8_count_blank_char()
669
+
670
+ def test_utf8_count_blank_char
570
671
  doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
571
672
  expected = 3
572
673
  assert_equal(expected, doc.count_blank_char)
573
674
  end
574
- def test_utf8_count_word()
675
+
676
+ def test_utf8_count_word
575
677
  doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
576
678
  expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
577
679
  assert_equal(expected, doc.count_word)
578
680
  end
579
- def test_utf8_count_ja_word()
681
+
682
+ def test_utf8_count_ja_word
580
683
  doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
581
684
  expected = 3
582
685
  assert_equal(expected, doc.count_ja_word)
583
686
  end
584
- def test_utf8_count_latin_valid_word()
687
+
688
+ def test_utf8_count_latin_valid_word
585
689
  doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
586
690
  expected = 2
587
691
  assert_equal(expected, doc.count_latin_valid_word)
588
692
  end
589
- def test_utf8_count_ja_valid_word()
693
+
694
+ def test_utf8_count_ja_valid_word
590
695
  doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
591
696
  expected = 2
592
697
  assert_equal(expected, doc.count_ja_valid_word)
593
698
  end
594
- def test_utf8_count_valid_word()
699
+
700
+ def test_utf8_count_valid_word
595
701
  doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
596
702
  expected = 4
597
703
  assert_equal(expected, doc.count_valid_word)
598
704
  end
599
- def test_utf8_count_line()
705
+
706
+ def test_utf8_count_line
600
707
  doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
601
708
  expected = 6
602
709
  assert_equal(expected, doc.count_line)
603
710
  end
604
- def test_utf8_count_graph_line()
711
+
712
+ def test_utf8_count_graph_line
605
713
  doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
606
714
  expected = 3
607
715
  assert_equal(expected, doc.count_graph_line)
608
716
  end
609
- def test_utf8_count_empty_line()
717
+
718
+ def test_utf8_count_empty_line
610
719
  doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
611
720
  expected = 1
612
721
  assert_equal(expected, doc.count_empty_line)
613
722
  end
614
- def test_utf8_count_blank_line()
723
+
724
+ def test_utf8_count_blank_line
615
725
  doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
616
726
  expected = 2
617
727
  assert_equal(expected, doc.count_blank_line)
618
728
  end
619
729
 
620
-
621
-
622
-
623
- def teardown()
624
- #
730
+ def teardown
625
731
  end
626
-
627
732
  end