docdiff 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -7
- data/Guardfile +4 -4
- data/Makefile +1 -1
- data/Rakefile +6 -6
- data/bin/docdiff +1 -1
- data/devutil/Rakefile +12 -5
- data/devutil/char_by_charclass.rb +43 -20
- data/devutil/charclass_by_char.rb +40 -19
- data/devutil/jis0208.rb +263 -231
- data/devutil/jis0208_test.rb +196 -0
- data/doc/news.md +8 -0
- data/docdiff.gemspec +12 -10
- data/lib/doc_diff.rb +59 -60
- data/lib/docdiff/charstring.rb +225 -241
- data/lib/docdiff/cli.rb +285 -250
- data/lib/docdiff/diff/contours.rb +1 -1
- data/lib/docdiff/diff/editscript.rb +1 -1
- data/lib/docdiff/diff/rcsdiff.rb +1 -1
- data/lib/docdiff/diff/shortestpath.rb +1 -1
- data/lib/docdiff/diff/speculative.rb +1 -1
- data/lib/docdiff/diff/subsequence.rb +1 -1
- data/lib/docdiff/diff/unidiff.rb +1 -1
- data/lib/docdiff/diff.rb +1 -1
- data/lib/docdiff/difference.rb +71 -70
- data/lib/docdiff/document.rb +129 -109
- data/lib/docdiff/encoding/en_ascii.rb +64 -58
- data/lib/docdiff/encoding/ja_eucjp.rb +250 -235
- data/lib/docdiff/encoding/ja_sjis.rb +240 -226
- data/lib/docdiff/encoding/ja_utf8.rb +6952 -6939
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +522 -438
- data/lib/docdiff.rb +2 -2
- data/test/charstring_test.rb +475 -351
- data/test/cli_test.rb +103 -101
- data/test/diff_test.rb +15 -16
- data/test/difference_test.rb +40 -31
- data/test/docdiff_test.rb +162 -136
- data/test/document_test.rb +280 -175
- data/test/test_helper.rb +2 -1
- data/test/view_test.rb +636 -497
- metadata +8 -8
- data/devutil/testjis0208.rb +0 -38
data/test/document_test.rb
CHANGED
|
@@ -3,625 +3,730 @@
|
|
|
3
3
|
|
|
4
4
|
# frozen_string_literal: false
|
|
5
5
|
|
|
6
|
-
require
|
|
7
|
-
require
|
|
8
|
-
require
|
|
6
|
+
require "test/unit"
|
|
7
|
+
require "docdiff/document"
|
|
8
|
+
require "nkf"
|
|
9
9
|
|
|
10
|
-
class
|
|
10
|
+
class TestDocument < Test::Unit::TestCase
|
|
11
11
|
Document = DocDiff::Document
|
|
12
12
|
CharString = DocDiff::CharString
|
|
13
13
|
|
|
14
|
-
def setup
|
|
15
|
-
#
|
|
14
|
+
def setup
|
|
16
15
|
end
|
|
17
16
|
|
|
18
|
-
def test_encoding
|
|
17
|
+
def test_encoding
|
|
19
18
|
doc = Document.new("Foo bar.\nBaz quux.")
|
|
20
|
-
doc.encoding =
|
|
21
|
-
doc.eol =
|
|
22
|
-
expected =
|
|
19
|
+
doc.encoding = "US-ASCII"
|
|
20
|
+
doc.eol = "LF"
|
|
21
|
+
expected = "US-ASCII"
|
|
23
22
|
assert_equal(expected, doc.encoding)
|
|
24
23
|
end
|
|
25
|
-
|
|
24
|
+
|
|
25
|
+
def test_encoding_auto
|
|
26
26
|
doc = Document.new("Foo bar.\nBaz quux.".encode("US-ASCII"))
|
|
27
|
-
expected =
|
|
27
|
+
expected = "US-ASCII"
|
|
28
28
|
assert_equal(expected, doc.encoding)
|
|
29
29
|
end
|
|
30
|
-
|
|
30
|
+
|
|
31
|
+
def test_eol
|
|
31
32
|
doc = Document.new("Foo bar.\nBaz quux.")
|
|
32
|
-
doc.encoding =
|
|
33
|
-
doc.eol =
|
|
34
|
-
expected =
|
|
33
|
+
doc.encoding = "US-ASCII"
|
|
34
|
+
doc.eol = "LF"
|
|
35
|
+
expected = "LF"
|
|
35
36
|
assert_equal(expected, doc.eol)
|
|
36
37
|
end
|
|
37
|
-
|
|
38
|
+
|
|
39
|
+
def test_eol_auto_lf
|
|
38
40
|
doc = Document.new("Foo bar.\nBaz quux.")
|
|
39
|
-
expected =
|
|
41
|
+
expected = "LF"
|
|
40
42
|
assert_equal(expected, doc.eol)
|
|
41
43
|
end
|
|
42
|
-
|
|
44
|
+
|
|
45
|
+
def test_eol_auto_none
|
|
43
46
|
doc = Document.new("Foo bar.")
|
|
44
47
|
expected = "NONE"
|
|
45
48
|
assert_equal(expected, doc.eol)
|
|
46
49
|
end
|
|
47
|
-
|
|
50
|
+
|
|
51
|
+
def test_eol_char_lf
|
|
48
52
|
doc = Document.new("Foo bar.\nBaz quux.")
|
|
49
|
-
#
|
|
50
|
-
#
|
|
53
|
+
# doc.encoding = "US-ASCII"
|
|
54
|
+
# doc.eol = "LF"
|
|
51
55
|
expected = "\n"
|
|
52
56
|
assert_equal(expected, doc.eol_char)
|
|
53
57
|
end
|
|
54
|
-
|
|
58
|
+
|
|
59
|
+
def test_split_by_line
|
|
55
60
|
doc = Document.new("Hello, my name is Watanabe.\nI am just another Ruby porter.\n")
|
|
56
61
|
expected = ["Hello, my name is Watanabe.\n", "I am just another Ruby porter.\n"]
|
|
57
62
|
assert_equal(expected, doc.split_to_line)
|
|
58
63
|
end
|
|
59
64
|
|
|
60
65
|
# test eol split_to_line() method
|
|
61
|
-
def test_cr_split_to_line
|
|
66
|
+
def test_cr_split_to_line
|
|
62
67
|
doc = Document.new("foo\rbar\r")
|
|
63
68
|
expected = ["foo\r", "bar\r"]
|
|
64
69
|
assert_equal(expected, doc.split_to_line)
|
|
65
70
|
end
|
|
66
|
-
|
|
71
|
+
|
|
72
|
+
def test_cr_split_to_line_chomped_lastline
|
|
67
73
|
doc = Document.new("foo\rbar")
|
|
68
74
|
expected = ["foo\r", "bar"]
|
|
69
75
|
assert_equal(expected, doc.split_to_line)
|
|
70
76
|
end
|
|
71
|
-
|
|
77
|
+
|
|
78
|
+
def test_cr_split_to_line_empty_line
|
|
72
79
|
doc = Document.new("foo\r\rbar\r")
|
|
73
80
|
expected = ["foo\r", "\r", "bar\r"]
|
|
74
81
|
assert_equal(expected, doc.split_to_line)
|
|
75
82
|
end
|
|
76
|
-
|
|
83
|
+
|
|
84
|
+
def test_lf_split_to_line
|
|
77
85
|
doc = Document.new("foo\nbar\n")
|
|
78
86
|
expected = ["foo\n", "bar\n"]
|
|
79
87
|
assert_equal(expected, doc.split_to_line)
|
|
80
88
|
end
|
|
81
|
-
|
|
89
|
+
|
|
90
|
+
def test_lf_split_to_line_chomped_lastline
|
|
82
91
|
doc = Document.new("foo\nbar")
|
|
83
92
|
expected = ["foo\n", "bar"]
|
|
84
93
|
assert_equal(expected, doc.split_to_line)
|
|
85
94
|
end
|
|
86
|
-
|
|
95
|
+
|
|
96
|
+
def test_lf_split_to_line_empty_line
|
|
87
97
|
doc = Document.new("foo\n\nbar\n")
|
|
88
98
|
expected = ["foo\n", "\n", "bar\n"]
|
|
89
99
|
assert_equal(expected, doc.split_to_line)
|
|
90
100
|
end
|
|
91
|
-
|
|
101
|
+
|
|
102
|
+
def test_crlf_split_to_line
|
|
92
103
|
doc = Document.new("foo\r\nbar\r\n")
|
|
93
104
|
expected = ["foo\r\n", "bar\r\n"]
|
|
94
105
|
assert_equal(expected, doc.split_to_line)
|
|
95
106
|
end
|
|
96
|
-
|
|
107
|
+
|
|
108
|
+
def test_crlf_split_to_line_chomped_lastline
|
|
97
109
|
doc = Document.new("foo\r\nbar")
|
|
98
110
|
expected = ["foo\r\n", "bar"]
|
|
99
111
|
assert_equal(expected, doc.split_to_line)
|
|
100
112
|
end
|
|
101
|
-
|
|
113
|
+
|
|
114
|
+
def test_crlf_split_to_line_empty_line
|
|
102
115
|
doc = Document.new("foo\r\n\r\nbar\r\n")
|
|
103
116
|
expected = ["foo\r\n", "\r\n", "bar\r\n"]
|
|
104
117
|
assert_equal(expected, doc.split_to_line)
|
|
105
118
|
end
|
|
106
119
|
|
|
107
120
|
# test ASCII module
|
|
108
|
-
def test_ascii_split_to_word
|
|
121
|
+
def test_ascii_split_to_word
|
|
109
122
|
doc = Document.new("foo bar")
|
|
110
123
|
expected = ["foo ", "bar"]
|
|
111
124
|
assert_equal(expected, doc.split_to_word)
|
|
112
125
|
end
|
|
113
|
-
|
|
126
|
+
|
|
127
|
+
def test_ascii_split_to_word_withsymbol
|
|
114
128
|
doc = Document.new("foo (bar) baz-baz")
|
|
115
129
|
expected = ["foo ", "(bar) ", "baz-baz"]
|
|
116
130
|
assert_equal(expected, doc.split_to_word)
|
|
117
131
|
end
|
|
118
|
-
|
|
132
|
+
|
|
133
|
+
def test_ascii_split_to_word_withquote
|
|
119
134
|
doc = Document.new("foo's 'foo' \"bar\" 'baz.'")
|
|
120
135
|
expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
|
|
121
136
|
assert_equal(expected, doc.split_to_word)
|
|
122
137
|
end
|
|
123
|
-
|
|
138
|
+
|
|
139
|
+
def test_ascii_split_to_word_withlongspace
|
|
124
140
|
doc = Document.new(" foo bar")
|
|
125
141
|
expected = [" ", "foo ", " ", "bar"]
|
|
126
142
|
assert_equal(expected, doc.split_to_word)
|
|
127
143
|
end
|
|
128
|
-
|
|
144
|
+
|
|
145
|
+
def test_ascii_split_to_word_withdash
|
|
129
146
|
doc = Document.new("foo -- bar, baz - quux")
|
|
130
147
|
expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
|
|
131
148
|
assert_equal(expected, doc.split_to_word)
|
|
132
149
|
end
|
|
133
|
-
|
|
150
|
+
|
|
151
|
+
def test_ascii_split_to_char
|
|
134
152
|
doc = Document.new("foo bar")
|
|
135
|
-
expected = ["f","o","o"," ","b","a","r"]
|
|
153
|
+
expected = ["f", "o", "o", " ", "b", "a", "r"]
|
|
136
154
|
assert_equal(expected, doc.split_to_char)
|
|
137
155
|
end
|
|
138
|
-
|
|
156
|
+
|
|
157
|
+
def test_ascii_split_to_char_with_eol_cr
|
|
139
158
|
doc = Document.new("foo bar\r")
|
|
140
|
-
expected = ["f","o","o"," ","b","a","r","\r"]
|
|
159
|
+
expected = ["f", "o", "o", " ", "b", "a", "r", "\r"]
|
|
141
160
|
assert_equal(expected, doc.split_to_char)
|
|
142
161
|
end
|
|
143
|
-
|
|
162
|
+
|
|
163
|
+
def test_ascii_split_to_char_with_eol_lf
|
|
144
164
|
doc = Document.new("foo bar\n")
|
|
145
|
-
expected = ["f","o","o"," ","b","a","r","\n"]
|
|
165
|
+
expected = ["f", "o", "o", " ", "b", "a", "r", "\n"]
|
|
146
166
|
assert_equal(expected, doc.split_to_char)
|
|
147
167
|
end
|
|
148
|
-
|
|
168
|
+
|
|
169
|
+
def test_ascii_split_to_char_with_eol_crlf
|
|
149
170
|
doc = Document.new("foo bar\r\n")
|
|
150
|
-
expected = ["f","o","o"," ","b","a","r","\r\n"]
|
|
171
|
+
expected = ["f", "o", "o", " ", "b", "a", "r", "\r\n"]
|
|
151
172
|
assert_equal(expected, doc.split_to_char)
|
|
152
173
|
end
|
|
153
|
-
|
|
174
|
+
|
|
175
|
+
def test_ascii_split_to_byte
|
|
154
176
|
doc = Document.new("foo bar\r\n")
|
|
155
|
-
expected = ["f","o","o"," ","b","a","r","\r","\n"]
|
|
177
|
+
expected = ["f", "o", "o", " ", "b", "a", "r", "\r", "\n"]
|
|
156
178
|
assert_equal(expected, doc.split_to_byte)
|
|
157
179
|
end
|
|
158
|
-
|
|
180
|
+
|
|
181
|
+
def test_ascii_count_byte
|
|
159
182
|
doc = Document.new("foo bar\r\n")
|
|
160
183
|
expected = 9
|
|
161
184
|
assert_equal(expected, doc.count_byte)
|
|
162
185
|
end
|
|
163
|
-
|
|
186
|
+
|
|
187
|
+
def test_ascii_count_char
|
|
164
188
|
doc = Document.new("foo bar\r\nbaz quux\r\n")
|
|
165
189
|
expected = 17
|
|
166
190
|
assert_equal(expected, doc.count_char)
|
|
167
191
|
end
|
|
168
|
-
|
|
192
|
+
|
|
193
|
+
def test_ascii_count_latin_graph_char
|
|
169
194
|
doc = Document.new("foo bar\r\nbaz quux\r\n")
|
|
170
195
|
expected = 13
|
|
171
196
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
172
197
|
end
|
|
173
|
-
|
|
198
|
+
|
|
199
|
+
def test_ascii_count_graph_char
|
|
174
200
|
doc = Document.new("foo bar\r\nbaz quux\r\n")
|
|
175
201
|
expected = 13
|
|
176
202
|
assert_equal(expected, doc.count_graph_char)
|
|
177
203
|
end
|
|
178
|
-
|
|
204
|
+
|
|
205
|
+
def test_ascii_count_latin_blank_char
|
|
179
206
|
doc = Document.new("foo bar\r\nbaz\tquux\r\n")
|
|
180
207
|
expected = 2
|
|
181
208
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
182
209
|
end
|
|
183
|
-
|
|
210
|
+
|
|
211
|
+
def test_ascii_count_blank_char
|
|
184
212
|
doc = Document.new("foo bar\r\nbaz\tquux\r\n")
|
|
185
213
|
expected = 2
|
|
186
214
|
assert_equal(expected, doc.count_blank_char)
|
|
187
215
|
end
|
|
188
|
-
|
|
216
|
+
|
|
217
|
+
def test_ascii_count_word
|
|
189
218
|
doc = Document.new("foo bar \r\nbaz quux\r\n")
|
|
190
219
|
expected = 6
|
|
191
220
|
assert_equal(expected, doc.count_word)
|
|
192
221
|
end
|
|
193
|
-
|
|
222
|
+
|
|
223
|
+
def test_ascii_count_latin_word
|
|
194
224
|
doc = Document.new("foo bar \r\nbaz quux\r\n")
|
|
195
|
-
expected = 5
|
|
225
|
+
expected = 5 # " " is also counted as a word
|
|
196
226
|
assert_equal(expected, doc.count_latin_word)
|
|
197
227
|
end
|
|
198
|
-
|
|
228
|
+
|
|
229
|
+
def test_ascii_count_latin_valid_word
|
|
199
230
|
doc = Document.new("1 foo \r\n%%% ()\r\n")
|
|
200
231
|
expected = 2
|
|
201
232
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
202
233
|
end
|
|
203
|
-
|
|
234
|
+
|
|
235
|
+
def test_ascii_count_line
|
|
204
236
|
doc = Document.new("foo\r\nbar")
|
|
205
237
|
expected = 2
|
|
206
238
|
assert_equal(expected, doc.count_line)
|
|
207
239
|
end
|
|
208
|
-
|
|
240
|
+
|
|
241
|
+
def test_ascii_count_graph_line
|
|
209
242
|
doc = Document.new("foo\r\n ")
|
|
210
243
|
expected = 1
|
|
211
244
|
assert_equal(expected, doc.count_graph_line)
|
|
212
245
|
end
|
|
213
|
-
|
|
246
|
+
|
|
247
|
+
def test_ascii_count_empty_line
|
|
214
248
|
doc = Document.new("foo\r\n \r\n\t\r\n\r\n")
|
|
215
249
|
expected = 1
|
|
216
250
|
assert_equal(expected, doc.count_empty_line)
|
|
217
251
|
end
|
|
218
|
-
|
|
252
|
+
|
|
253
|
+
def test_ascii_count_blank_line
|
|
219
254
|
doc = Document.new("\r\n \r\n\t\r\n ")
|
|
220
255
|
expected = 3
|
|
221
256
|
assert_equal(expected, doc.count_blank_line)
|
|
222
257
|
end
|
|
223
258
|
|
|
224
259
|
# test EUCJP module
|
|
225
|
-
def test_eucjp_split_to_word
|
|
260
|
+
def test_eucjp_split_to_word
|
|
226
261
|
doc = Document.new(NKF.nkf("--euc", "日本語の文字foo bar"))
|
|
227
|
-
expected = ["日本語の","文字","foo ","bar"].map{|c| NKF.nkf("--euc", c)}
|
|
262
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--euc", c) }
|
|
228
263
|
assert_equal(expected, doc.split_to_word)
|
|
229
264
|
end
|
|
230
|
-
|
|
265
|
+
|
|
266
|
+
def test_eucjp_split_to_word_kanhira
|
|
231
267
|
doc = Document.new(NKF.nkf("--euc", "日本語の文字"))
|
|
232
|
-
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
268
|
+
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
233
269
|
assert_equal(expected, doc.split_to_word)
|
|
234
270
|
end
|
|
235
|
-
|
|
271
|
+
|
|
272
|
+
def test_eucjp_split_to_word_katahira
|
|
236
273
|
doc = Document.new(NKF.nkf("--euc", "カタカナの文字"))
|
|
237
|
-
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
274
|
+
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
238
275
|
assert_equal(expected, doc.split_to_word)
|
|
239
276
|
end
|
|
240
|
-
|
|
277
|
+
|
|
278
|
+
def test_eucjp_split_to_word_kataonbiki
|
|
241
279
|
doc = Document.new(NKF.nkf("--euc", "ルビー色の石"), "EUC-JP")
|
|
242
|
-
expected = ["ルビー", "色の", "石"].map{|c| NKF.nkf("--euc", c)}
|
|
280
|
+
expected = ["ルビー", "色の", "石"].map { |c| NKF.nkf("--euc", c) }
|
|
243
281
|
assert_equal(expected, doc.split_to_word)
|
|
244
282
|
end
|
|
245
|
-
|
|
283
|
+
|
|
284
|
+
def test_eucjp_split_to_word_hiraonbiki
|
|
246
285
|
doc = Document.new(NKF.nkf("--euc", "わールビーだ"), "EUC-JP")
|
|
247
|
-
expected =
|
|
286
|
+
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--euc", c) }
|
|
248
287
|
assert_equal(expected, doc.split_to_word)
|
|
249
288
|
end
|
|
250
|
-
|
|
289
|
+
|
|
290
|
+
def test_eucjp_split_to_word_latinmix
|
|
251
291
|
doc = Document.new(NKF.nkf("--euc", "日本語とLatinの文字"))
|
|
252
|
-
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
292
|
+
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
253
293
|
assert_equal(expected, doc.split_to_word)
|
|
254
294
|
end
|
|
255
|
-
|
|
295
|
+
|
|
296
|
+
def test_eucjp_split_to_char
|
|
256
297
|
doc = Document.new(NKF.nkf("--euc", "日本語a b"))
|
|
257
|
-
expected = ["日","本","語","a"," ","b"].map{|c|NKF.nkf("--euc",c)}
|
|
298
|
+
expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--euc", c) }
|
|
258
299
|
assert_equal(expected, doc.split_to_char)
|
|
259
300
|
end
|
|
260
|
-
|
|
301
|
+
|
|
302
|
+
def test_eucjp_split_to_char_with_cr
|
|
261
303
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r"))
|
|
262
|
-
expected = ["日","本","語","a"," ","b","\r"].map{|c|NKF.nkf("--euc",c)}
|
|
304
|
+
expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--euc", c) }
|
|
263
305
|
assert_equal(expected, doc.split_to_char)
|
|
264
306
|
end
|
|
265
|
-
|
|
307
|
+
|
|
308
|
+
def test_eucjp_split_to_char_with_lf
|
|
266
309
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\n"))
|
|
267
|
-
expected = ["日","本","語","a"," ","b","\n"].map{|c|NKF.nkf("--euc",c)}
|
|
310
|
+
expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--euc", c) }
|
|
268
311
|
assert_equal(expected, doc.split_to_char)
|
|
269
312
|
end
|
|
270
|
-
|
|
313
|
+
|
|
314
|
+
def test_eucjp_split_to_char_with_crlf
|
|
271
315
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
272
|
-
expected = ["日","本","語","a"," ","b","\r\n"].map{|c|NKF.nkf("--euc",c)}
|
|
316
|
+
expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--euc", c) }
|
|
273
317
|
assert_equal(expected, doc.split_to_char)
|
|
274
318
|
end
|
|
275
|
-
|
|
319
|
+
|
|
320
|
+
def test_eucjp_count_char
|
|
276
321
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
277
322
|
expected = 7
|
|
278
323
|
assert_equal(expected, doc.count_char)
|
|
279
324
|
end
|
|
280
|
-
|
|
325
|
+
|
|
326
|
+
def test_eucjp_count_latin_graph_char
|
|
281
327
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
282
328
|
expected = 2
|
|
283
329
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
284
330
|
end
|
|
285
|
-
|
|
331
|
+
|
|
332
|
+
def test_eucjp_count_ja_graph_char
|
|
286
333
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
287
334
|
expected = 3
|
|
288
335
|
assert_equal(expected, doc.count_ja_graph_char)
|
|
289
336
|
end
|
|
290
|
-
|
|
337
|
+
|
|
338
|
+
def test_eucjp_count_graph_char
|
|
291
339
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
292
340
|
expected = 5
|
|
293
341
|
assert_equal(expected, doc.count_graph_char)
|
|
294
342
|
end
|
|
295
|
-
|
|
343
|
+
|
|
344
|
+
def test_eucjp_count_latin_blank_char
|
|
296
345
|
doc = Document.new(NKF.nkf("--euc", "日本語\ta b\r\n"))
|
|
297
346
|
expected = 2
|
|
298
347
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
299
348
|
end
|
|
300
|
-
|
|
349
|
+
|
|
350
|
+
def test_eucjp_count_ja_blank_char
|
|
301
351
|
doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
|
|
302
352
|
expected = 1
|
|
303
353
|
assert_equal(expected, doc.count_ja_blank_char)
|
|
304
354
|
end
|
|
305
|
-
|
|
355
|
+
|
|
356
|
+
def test_eucjp_count_blank_char
|
|
306
357
|
doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
|
|
307
358
|
expected = 3
|
|
308
359
|
assert_equal(expected, doc.count_blank_char)
|
|
309
360
|
end
|
|
310
|
-
|
|
361
|
+
|
|
362
|
+
def test_eucjp_count_word
|
|
311
363
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
312
364
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
313
365
|
assert_equal(expected, doc.count_word)
|
|
314
366
|
end
|
|
315
|
-
|
|
367
|
+
|
|
368
|
+
def test_eucjp_count_ja_word
|
|
316
369
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
317
370
|
expected = 3
|
|
318
371
|
assert_equal(expected, doc.count_ja_word)
|
|
319
372
|
end
|
|
320
|
-
|
|
373
|
+
|
|
374
|
+
def test_eucjp_count_latin_valid_word
|
|
321
375
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
322
376
|
expected = 2
|
|
323
377
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
324
378
|
end
|
|
325
|
-
|
|
379
|
+
|
|
380
|
+
def test_eucjp_count_ja_valid_word
|
|
326
381
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
327
382
|
expected = 2
|
|
328
383
|
assert_equal(expected, doc.count_ja_valid_word)
|
|
329
384
|
end
|
|
330
|
-
|
|
385
|
+
|
|
386
|
+
def test_eucjp_count_valid_word
|
|
331
387
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
332
388
|
expected = 4
|
|
333
389
|
assert_equal(expected, doc.count_valid_word)
|
|
334
390
|
end
|
|
335
|
-
|
|
391
|
+
|
|
392
|
+
def test_eucjp_count_line
|
|
336
393
|
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
337
394
|
expected = 6
|
|
338
395
|
assert_equal(expected, doc.count_line)
|
|
339
396
|
end
|
|
340
|
-
|
|
397
|
+
|
|
398
|
+
def test_eucjp_count_graph_line
|
|
341
399
|
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
342
400
|
expected = 3
|
|
343
401
|
assert_equal(expected, doc.count_graph_line)
|
|
344
402
|
end
|
|
345
|
-
|
|
403
|
+
|
|
404
|
+
def test_eucjp_count_empty_line
|
|
346
405
|
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
347
406
|
expected = 1
|
|
348
407
|
assert_equal(expected, doc.count_empty_line)
|
|
349
408
|
end
|
|
350
|
-
|
|
409
|
+
|
|
410
|
+
def test_eucjp_count_blank_line
|
|
351
411
|
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
352
412
|
expected = 2
|
|
353
413
|
assert_equal(expected, doc.count_blank_line)
|
|
354
414
|
end
|
|
355
415
|
|
|
356
416
|
# test SJIS module
|
|
357
|
-
def test_sjis_split_to_word
|
|
417
|
+
def test_sjis_split_to_word
|
|
358
418
|
doc = Document.new(NKF.nkf("--sjis", "日本語の文字foo bar"))
|
|
359
|
-
expected = ["日本語の", "文字", "foo ", "bar"].map{|c|NKF.nkf("--sjis",c)}
|
|
419
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--sjis", c) }
|
|
360
420
|
assert_equal(expected, doc.split_to_word)
|
|
361
421
|
end
|
|
362
|
-
|
|
422
|
+
|
|
423
|
+
def test_sjisplit_s_to_word_kanhira
|
|
363
424
|
doc = Document.new(NKF.nkf("--sjis", "日本語の文字"))
|
|
364
|
-
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
425
|
+
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
365
426
|
assert_equal(expected, doc.split_to_word)
|
|
366
427
|
end
|
|
367
|
-
|
|
428
|
+
|
|
429
|
+
def test_sjis_split_to_word_katahira
|
|
368
430
|
doc = Document.new(NKF.nkf("--sjis", "カタカナの文字"))
|
|
369
|
-
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
431
|
+
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
370
432
|
assert_equal(expected, doc.split_to_word)
|
|
371
433
|
end
|
|
372
|
-
|
|
434
|
+
|
|
435
|
+
def test_sjis_split_to_word_kataonbiki
|
|
373
436
|
doc = Document.new(NKF.nkf("--sjis", "ルビーの指輪"))
|
|
374
|
-
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--sjis", c)}
|
|
437
|
+
expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--sjis", c) }
|
|
375
438
|
assert_equal(expected, doc.split_to_word)
|
|
376
439
|
end
|
|
377
|
-
|
|
440
|
+
|
|
441
|
+
def test_sjis_split_to_word_hiraonbiki
|
|
378
442
|
doc = Document.new(NKF.nkf("--sjis", "わールビーだ"))
|
|
379
|
-
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--sjis", c)}
|
|
443
|
+
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--sjis", c) }
|
|
380
444
|
assert_equal(expected, doc.split_to_word)
|
|
381
445
|
end
|
|
382
|
-
|
|
446
|
+
|
|
447
|
+
def test_sjis_split_to_word_latinmix
|
|
383
448
|
doc = Document.new(NKF.nkf("--sjis", "日本語とLatinの文字"))
|
|
384
|
-
expected = ["日本語と","Latin","の","文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
449
|
+
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
385
450
|
assert_equal(expected, doc.split_to_word)
|
|
386
451
|
end
|
|
387
|
-
|
|
452
|
+
|
|
453
|
+
def test_sjis_split_to_char
|
|
388
454
|
doc = Document.new(NKF.nkf("--sjis", "表計算a b"))
|
|
389
|
-
expected = ["表","計","算","a"," ","b"].map{|c|NKF.nkf("--sjis",c)}
|
|
455
|
+
expected = ["表", "計", "算", "a", " ", "b"].map { |c| NKF.nkf("--sjis", c) }
|
|
390
456
|
assert_equal(expected, doc.split_to_char)
|
|
391
457
|
end
|
|
392
|
-
|
|
458
|
+
|
|
459
|
+
def test_sjis_split_to_char_with_cr
|
|
393
460
|
doc = Document.new(NKF.nkf("--sjis", "表計算a b\r"))
|
|
394
|
-
expected = ["表","計","算","a"," ","b","\r"].map{|c|NKF.nkf("--sjis",c)}
|
|
461
|
+
expected = ["表", "計", "算", "a", " ", "b", "\r"].map { |c| NKF.nkf("--sjis", c) }
|
|
395
462
|
assert_equal(expected, doc.split_to_char)
|
|
396
463
|
end
|
|
397
|
-
|
|
464
|
+
|
|
465
|
+
def test_sjis_split_to_char_with_lf
|
|
398
466
|
doc = Document.new(NKF.nkf("--sjis", "表計算a b\n"))
|
|
399
|
-
expected = ["表","計","算","a"," ","b","\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
467
|
+
expected = ["表", "計", "算", "a", " ", "b", "\n"].map { |c| NKF.nkf("--sjis", c) }
|
|
400
468
|
assert_equal(expected, doc.split_to_char)
|
|
401
469
|
end
|
|
402
|
-
|
|
470
|
+
|
|
471
|
+
def test_sjis_split_to_char_with_crlf
|
|
403
472
|
doc = Document.new(NKF.nkf("--sjis", "表計算a b\r\n"))
|
|
404
|
-
expected = ["表","計","算","a"," ","b","\r\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
473
|
+
expected = ["表", "計", "算", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--sjis", c) }
|
|
405
474
|
assert_equal(expected, doc.split_to_char)
|
|
406
475
|
end
|
|
407
|
-
|
|
476
|
+
|
|
477
|
+
def test_sjis_count_char
|
|
408
478
|
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
409
479
|
expected = 7
|
|
410
480
|
assert_equal(expected, doc.count_char)
|
|
411
481
|
end
|
|
412
|
-
|
|
482
|
+
|
|
483
|
+
def test_sjis_count_latin_graph_char
|
|
413
484
|
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
414
485
|
expected = 2
|
|
415
486
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
416
487
|
end
|
|
417
|
-
|
|
488
|
+
|
|
489
|
+
def test_sjis_count_ja_graph_char
|
|
418
490
|
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
419
491
|
expected = 3
|
|
420
492
|
assert_equal(expected, doc.count_ja_graph_char)
|
|
421
493
|
end
|
|
422
|
-
|
|
494
|
+
|
|
495
|
+
def test_sjis_count_graph_char
|
|
423
496
|
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
424
497
|
expected = 5
|
|
425
498
|
assert_equal(expected, doc.count_graph_char)
|
|
426
499
|
end
|
|
427
|
-
|
|
500
|
+
|
|
501
|
+
def test_sjis_count_latin_blank_char
|
|
428
502
|
doc = Document.new(NKF.nkf("--sjis", "日本語\ta b\r\n"))
|
|
429
503
|
expected = 2
|
|
430
504
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
431
505
|
end
|
|
432
|
-
|
|
506
|
+
|
|
507
|
+
def test_sjis_count_ja_blank_char
|
|
433
508
|
doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
|
|
434
509
|
expected = 1
|
|
435
510
|
assert_equal(expected, doc.count_ja_blank_char)
|
|
436
511
|
end
|
|
437
|
-
|
|
512
|
+
|
|
513
|
+
def test_sjis_count_blank_char
|
|
438
514
|
doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
|
|
439
515
|
expected = 3
|
|
440
516
|
assert_equal(expected, doc.count_blank_char)
|
|
441
517
|
end
|
|
442
|
-
|
|
518
|
+
|
|
519
|
+
def test_sjis_count_word
|
|
443
520
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
444
521
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
445
522
|
assert_equal(expected, doc.count_word)
|
|
446
523
|
end
|
|
447
|
-
|
|
524
|
+
|
|
525
|
+
def test_sjis_count_ja_word
|
|
448
526
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
449
527
|
expected = 3
|
|
450
528
|
assert_equal(expected, doc.count_ja_word)
|
|
451
529
|
end
|
|
452
|
-
|
|
530
|
+
|
|
531
|
+
def test_sjis_count_latin_valid_word
|
|
453
532
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
454
533
|
expected = 2
|
|
455
534
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
456
535
|
end
|
|
457
|
-
|
|
536
|
+
|
|
537
|
+
def test_sjis_count_ja_valid_word
|
|
458
538
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
459
539
|
expected = 2
|
|
460
540
|
assert_equal(expected, doc.count_ja_valid_word)
|
|
461
541
|
end
|
|
462
|
-
|
|
542
|
+
|
|
543
|
+
def test_sjis_count_valid_word
|
|
463
544
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
464
545
|
expected = 4
|
|
465
546
|
assert_equal(expected, doc.count_valid_word)
|
|
466
547
|
end
|
|
467
|
-
|
|
548
|
+
|
|
549
|
+
def test_sjis_count_line
|
|
468
550
|
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
469
551
|
expected = 6
|
|
470
552
|
assert_equal(expected, doc.count_line)
|
|
471
553
|
end
|
|
472
|
-
|
|
554
|
+
|
|
555
|
+
def test_sjis_count_graph_line
|
|
473
556
|
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
474
557
|
expected = 3
|
|
475
558
|
assert_equal(expected, doc.count_graph_line)
|
|
476
559
|
end
|
|
477
|
-
|
|
560
|
+
|
|
561
|
+
def test_sjis_count_empty_line
|
|
478
562
|
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
479
563
|
expected = 1
|
|
480
564
|
assert_equal(expected, doc.count_empty_line)
|
|
481
565
|
end
|
|
482
|
-
|
|
566
|
+
|
|
567
|
+
def test_sjis_count_blank_line
|
|
483
568
|
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
484
569
|
expected = 2
|
|
485
570
|
assert_equal(expected, doc.count_blank_line)
|
|
486
571
|
end
|
|
487
572
|
|
|
488
573
|
# test UTF8 module
|
|
489
|
-
def test_utf8_split_to_word
|
|
574
|
+
def test_utf8_split_to_word
|
|
490
575
|
doc = Document.new(NKF.nkf("--utf8", "日本語の文字foo bar"))
|
|
491
|
-
expected = ["日本語の", "文字", "foo ", "bar"].map{|c| NKF.nkf("--utf8", c)}
|
|
576
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--utf8", c) }
|
|
492
577
|
assert_equal(expected, doc.split_to_word)
|
|
493
578
|
end
|
|
494
|
-
|
|
579
|
+
|
|
580
|
+
def test_utf8_split_to_word_kanhira
|
|
495
581
|
doc = Document.new(NKF.nkf("--utf8", "日本語の文字"))
|
|
496
|
-
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
582
|
+
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
497
583
|
assert_equal(expected, doc.split_to_word)
|
|
498
584
|
end
|
|
499
|
-
|
|
585
|
+
|
|
586
|
+
def test_utf8_split_to_word_katahira
|
|
500
587
|
doc = Document.new(NKF.nkf("--utf8", "カタカナの文字"))
|
|
501
|
-
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
588
|
+
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
502
589
|
assert_equal(expected, doc.split_to_word)
|
|
503
590
|
end
|
|
504
|
-
|
|
591
|
+
|
|
592
|
+
def test_utf8_split_to_word_kataonbiki
|
|
505
593
|
doc = Document.new(NKF.nkf("--utf8", "ルビーの指輪"))
|
|
506
|
-
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--utf8", c)}
|
|
594
|
+
expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--utf8", c) }
|
|
507
595
|
assert_equal(expected, doc.split_to_word)
|
|
508
596
|
end
|
|
509
|
-
|
|
597
|
+
|
|
598
|
+
def test_utf8_split_to_word_hiraonbiki
|
|
510
599
|
doc = Document.new(NKF.nkf("--utf8", "わールビーだ"))
|
|
511
|
-
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--utf8", c)}
|
|
600
|
+
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--utf8", c) }
|
|
512
601
|
assert_equal(expected, doc.split_to_word)
|
|
513
602
|
end
|
|
514
|
-
|
|
603
|
+
|
|
604
|
+
def test_utf8_split_to_word_latinmix
|
|
515
605
|
doc = Document.new(NKF.nkf("--utf8", "日本語とLatinの文字"))
|
|
516
|
-
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
606
|
+
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
517
607
|
assert_equal(expected, doc.split_to_word)
|
|
518
608
|
end
|
|
519
|
-
|
|
609
|
+
|
|
610
|
+
def test_utf8_split_to_char
|
|
520
611
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b"), "UTF-8")
|
|
521
|
-
expected = ["日", "本", "語", "a", " ", "b"].map{|c| NKF.nkf("--utf8", c)}
|
|
612
|
+
expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--utf8", c) }
|
|
522
613
|
assert_equal(expected, doc.split_to_char)
|
|
523
614
|
end
|
|
524
|
-
|
|
615
|
+
|
|
616
|
+
def test_utf8_split_to_char_with_cr
|
|
525
617
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r"), "UTF-8")
|
|
526
|
-
expected = ["日","本","語","a"," ","b","\r"].map{|c| NKF.nkf("--utf8", c)}
|
|
618
|
+
expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--utf8", c) }
|
|
527
619
|
assert_equal(expected, doc.split_to_char)
|
|
528
620
|
end
|
|
529
|
-
|
|
621
|
+
|
|
622
|
+
def test_utf8_split_to_char_with_lf
|
|
530
623
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\n"), "UTF-8")
|
|
531
|
-
expected = ["日","本","語","a"," ","b","\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
624
|
+
expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--utf8", c) }
|
|
532
625
|
assert_equal(expected, doc.split_to_char)
|
|
533
626
|
end
|
|
534
|
-
|
|
627
|
+
|
|
628
|
+
def test_utf8_split_to_char_with_crlf
|
|
535
629
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
536
|
-
expected = ["日","本","語","a"," ","b","\r\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
630
|
+
expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--utf8", c) }
|
|
537
631
|
assert_equal(expected, doc.split_to_char)
|
|
538
632
|
end
|
|
539
|
-
|
|
633
|
+
|
|
634
|
+
def test_utf8_count_char
|
|
540
635
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
541
636
|
expected = 7
|
|
542
637
|
assert_equal(expected, doc.count_char)
|
|
543
638
|
end
|
|
544
|
-
|
|
639
|
+
|
|
640
|
+
def test_utf8_count_latin_graph_char
|
|
545
641
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
546
642
|
expected = 2
|
|
547
643
|
assert_equal(expected, doc.count_latin_graph_char)
|
|
548
644
|
end
|
|
549
|
-
|
|
645
|
+
|
|
646
|
+
def test_utf8_count_ja_graph_char
|
|
550
647
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
551
648
|
expected = 3
|
|
552
649
|
assert_equal(expected, doc.count_ja_graph_char)
|
|
553
650
|
end
|
|
554
|
-
|
|
651
|
+
|
|
652
|
+
def test_utf8_count_graph_char
|
|
555
653
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
556
654
|
expected = 5
|
|
557
655
|
assert_equal(expected, doc.count_graph_char)
|
|
558
656
|
end
|
|
559
|
-
|
|
657
|
+
|
|
658
|
+
def test_utf8_count_latin_blank_char
|
|
560
659
|
doc = Document.new(NKF.nkf("--utf8", "日本語\ta b\r\n"))
|
|
561
660
|
expected = 2
|
|
562
661
|
assert_equal(expected, doc.count_latin_blank_char)
|
|
563
662
|
end
|
|
564
|
-
|
|
663
|
+
|
|
664
|
+
def test_utf8_count_ja_blank_char
|
|
565
665
|
doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
|
|
566
666
|
expected = 1
|
|
567
667
|
assert_equal(expected, doc.count_ja_blank_char)
|
|
568
668
|
end
|
|
569
|
-
|
|
669
|
+
|
|
670
|
+
def test_utf8_count_blank_char
|
|
570
671
|
doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
|
|
571
672
|
expected = 3
|
|
572
673
|
assert_equal(expected, doc.count_blank_char)
|
|
573
674
|
end
|
|
574
|
-
|
|
675
|
+
|
|
676
|
+
def test_utf8_count_word
|
|
575
677
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
576
678
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
577
679
|
assert_equal(expected, doc.count_word)
|
|
578
680
|
end
|
|
579
|
-
|
|
681
|
+
|
|
682
|
+
def test_utf8_count_ja_word
|
|
580
683
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
581
684
|
expected = 3
|
|
582
685
|
assert_equal(expected, doc.count_ja_word)
|
|
583
686
|
end
|
|
584
|
-
|
|
687
|
+
|
|
688
|
+
def test_utf8_count_latin_valid_word
|
|
585
689
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
586
690
|
expected = 2
|
|
587
691
|
assert_equal(expected, doc.count_latin_valid_word)
|
|
588
692
|
end
|
|
589
|
-
|
|
693
|
+
|
|
694
|
+
def test_utf8_count_ja_valid_word
|
|
590
695
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
591
696
|
expected = 2
|
|
592
697
|
assert_equal(expected, doc.count_ja_valid_word)
|
|
593
698
|
end
|
|
594
|
-
|
|
699
|
+
|
|
700
|
+
def test_utf8_count_valid_word
|
|
595
701
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
596
702
|
expected = 4
|
|
597
703
|
assert_equal(expected, doc.count_valid_word)
|
|
598
704
|
end
|
|
599
|
-
|
|
705
|
+
|
|
706
|
+
def test_utf8_count_line
|
|
600
707
|
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
601
708
|
expected = 6
|
|
602
709
|
assert_equal(expected, doc.count_line)
|
|
603
710
|
end
|
|
604
|
-
|
|
711
|
+
|
|
712
|
+
def test_utf8_count_graph_line
|
|
605
713
|
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
606
714
|
expected = 3
|
|
607
715
|
assert_equal(expected, doc.count_graph_line)
|
|
608
716
|
end
|
|
609
|
-
|
|
717
|
+
|
|
718
|
+
def test_utf8_count_empty_line
|
|
610
719
|
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
611
720
|
expected = 1
|
|
612
721
|
assert_equal(expected, doc.count_empty_line)
|
|
613
722
|
end
|
|
614
|
-
|
|
723
|
+
|
|
724
|
+
def test_utf8_count_blank_line
|
|
615
725
|
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
616
726
|
expected = 2
|
|
617
727
|
assert_equal(expected, doc.count_blank_line)
|
|
618
728
|
end
|
|
619
729
|
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
def teardown()
|
|
624
|
-
#
|
|
730
|
+
def teardown
|
|
625
731
|
end
|
|
626
|
-
|
|
627
732
|
end
|