docdiff 0.6.7 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -36
- data/README_ja.md +70 -37
- data/doc/example/docdiff.conf.example +3 -0
- data/doc/img/screenshot-html.png +0 -0
- data/doc/img/screenshot-tty-char.png +0 -0
- data/doc/img/screenshot-tty-digest-block.png +0 -0
- data/doc/img/screenshot-tty-digest-license-block.png +0 -0
- data/doc/img/screenshot-tty-digest-license.png +0 -0
- data/doc/img/screenshot-tty-digest.png +0 -0
- data/doc/img/screenshot-tty-en-ja.png +0 -0
- data/doc/img/screenshot-tty-manued.png +0 -0
- data/doc/img/screenshot-tty-wdiff.png +0 -0
- data/doc/img/screenshot-tty-word-char.png +0 -0
- data/doc/man/docdiff.adoc +3 -3
- data/doc/news.md +11 -0
- data/docdiff.gemspec +1 -1
- data/lib/doc_diff.rb +5 -5
- data/lib/docdiff/charstring.rb +36 -40
- data/lib/docdiff/cli.rb +23 -10
- data/lib/docdiff/document.rb +44 -44
- data/lib/docdiff/encoding/en_ascii.rb +4 -4
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +27 -13
- data/test/charstring_test.rb +221 -221
- data/test/cli_test.rb +12 -12
- data/test/document_test.rb +223 -223
- data/test/fixture/humpty_dumpty_01_en.txt +4 -0
- data/test/fixture/humpty_dumpty_01_ja.txt +4 -0
- data/test/fixture/{humpty_dumpty01_ascii_lf.txt → humpty_dumpty_02_en.txt} +2 -2
- data/test/fixture/humpty_dumpty_02_ja.txt +4 -0
- data/test/view_test.rb +38 -31
- metadata +26 -14
- data/test/fixture/humpty_dumpty02_ascii_lf.txt +0 -4
- /data/doc/img/{screenshot-format-html-digest-firefox.png → old/screenshot-format-html-digest-firefox.png} +0 -0
- /data/doc/img/{screenshot-format-html-firefox.png → old/screenshot-format-html-firefox.png} +0 -0
- /data/doc/img/{screenshot-format-tty-cmdexe-en.png → old/screenshot-format-tty-cmdexe-en.png} +0 -0
- /data/doc/img/{screenshot-format-tty-cmdexe-ja.png → old/screenshot-format-tty-cmdexe-ja.png} +0 -0
- /data/doc/img/{screenshot-format-tty-rxvtunicode-en.png → old/screenshot-format-tty-rxvtunicode-en.png} +0 -0
- /data/doc/img/{screenshot-format-tty-rxvtunicode-ja.png → old/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
- /data/doc/img/{screenshot-format-tty-xterm-en.png → old/screenshot-format-tty-xterm-en.png} +0 -0
- /data/doc/img/{screenshot-format-tty-xterm-ja.png → old/screenshot-format-tty-xterm-ja.png} +0 -0
- /data/doc/img/{screenshot-resolution-linewordchar-xterm.png → old/screenshot-resolution-linewordchar-xterm.png} +0 -0
data/test/document_test.rb
CHANGED
|
@@ -56,675 +56,675 @@ class TestDocument < Test::Unit::TestCase
|
|
|
56
56
|
assert_equal(expected, doc.eol_char)
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
-
def
|
|
59
|
+
def test_to_lines
|
|
60
60
|
doc = Document.new("Hello, my name is Watanabe.\nI am just another Ruby porter.\n")
|
|
61
61
|
expected = ["Hello, my name is Watanabe.\n", "I am just another Ruby porter.\n"]
|
|
62
|
-
assert_equal(expected, doc.
|
|
62
|
+
assert_equal(expected, doc.to_lines)
|
|
63
63
|
end
|
|
64
64
|
|
|
65
|
-
# test eol
|
|
66
|
-
def
|
|
65
|
+
# test eol to_lines method
|
|
66
|
+
def test_cr_to_lines
|
|
67
67
|
doc = Document.new("foo\rbar\r")
|
|
68
68
|
expected = ["foo\r", "bar\r"]
|
|
69
|
-
assert_equal(expected, doc.
|
|
69
|
+
assert_equal(expected, doc.to_lines)
|
|
70
70
|
end
|
|
71
71
|
|
|
72
|
-
def
|
|
72
|
+
def test_cr_to_lines_chomped_lastline
|
|
73
73
|
doc = Document.new("foo\rbar")
|
|
74
74
|
expected = ["foo\r", "bar"]
|
|
75
|
-
assert_equal(expected, doc.
|
|
75
|
+
assert_equal(expected, doc.to_lines)
|
|
76
76
|
end
|
|
77
77
|
|
|
78
|
-
def
|
|
78
|
+
def test_cr_to_lines_empty_line
|
|
79
79
|
doc = Document.new("foo\r\rbar\r")
|
|
80
80
|
expected = ["foo\r", "\r", "bar\r"]
|
|
81
|
-
assert_equal(expected, doc.
|
|
81
|
+
assert_equal(expected, doc.to_lines)
|
|
82
82
|
end
|
|
83
83
|
|
|
84
|
-
def
|
|
84
|
+
def test_lf_to_lines
|
|
85
85
|
doc = Document.new("foo\nbar\n")
|
|
86
86
|
expected = ["foo\n", "bar\n"]
|
|
87
|
-
assert_equal(expected, doc.
|
|
87
|
+
assert_equal(expected, doc.to_lines)
|
|
88
88
|
end
|
|
89
89
|
|
|
90
|
-
def
|
|
90
|
+
def test_lf_to_lines_chomped_lastline
|
|
91
91
|
doc = Document.new("foo\nbar")
|
|
92
92
|
expected = ["foo\n", "bar"]
|
|
93
|
-
assert_equal(expected, doc.
|
|
93
|
+
assert_equal(expected, doc.to_lines)
|
|
94
94
|
end
|
|
95
95
|
|
|
96
|
-
def
|
|
96
|
+
def test_lf_to_lines_empty_line
|
|
97
97
|
doc = Document.new("foo\n\nbar\n")
|
|
98
98
|
expected = ["foo\n", "\n", "bar\n"]
|
|
99
|
-
assert_equal(expected, doc.
|
|
99
|
+
assert_equal(expected, doc.to_lines)
|
|
100
100
|
end
|
|
101
101
|
|
|
102
|
-
def
|
|
102
|
+
def test_crlf_to_lines
|
|
103
103
|
doc = Document.new("foo\r\nbar\r\n")
|
|
104
104
|
expected = ["foo\r\n", "bar\r\n"]
|
|
105
|
-
assert_equal(expected, doc.
|
|
105
|
+
assert_equal(expected, doc.to_lines)
|
|
106
106
|
end
|
|
107
107
|
|
|
108
|
-
def
|
|
108
|
+
def test_crlf_to_lines_chomped_lastline
|
|
109
109
|
doc = Document.new("foo\r\nbar")
|
|
110
110
|
expected = ["foo\r\n", "bar"]
|
|
111
|
-
assert_equal(expected, doc.
|
|
111
|
+
assert_equal(expected, doc.to_lines)
|
|
112
112
|
end
|
|
113
113
|
|
|
114
|
-
def
|
|
114
|
+
def test_crlf_to_lines_empty_line
|
|
115
115
|
doc = Document.new("foo\r\n\r\nbar\r\n")
|
|
116
116
|
expected = ["foo\r\n", "\r\n", "bar\r\n"]
|
|
117
|
-
assert_equal(expected, doc.
|
|
117
|
+
assert_equal(expected, doc.to_lines)
|
|
118
118
|
end
|
|
119
119
|
|
|
120
120
|
# test ASCII module
|
|
121
|
-
def
|
|
121
|
+
def test_ascii_to_words
|
|
122
122
|
doc = Document.new("foo bar")
|
|
123
123
|
expected = ["foo ", "bar"]
|
|
124
|
-
assert_equal(expected, doc.
|
|
124
|
+
assert_equal(expected, doc.to_words)
|
|
125
125
|
end
|
|
126
126
|
|
|
127
|
-
def
|
|
127
|
+
def test_ascii_to_words_withsymbol
|
|
128
128
|
doc = Document.new("foo (bar) baz-baz")
|
|
129
129
|
expected = ["foo ", "(bar) ", "baz-baz"]
|
|
130
|
-
assert_equal(expected, doc.
|
|
130
|
+
assert_equal(expected, doc.to_words)
|
|
131
131
|
end
|
|
132
132
|
|
|
133
|
-
def
|
|
133
|
+
def test_ascii_to_words_withquote
|
|
134
134
|
doc = Document.new("foo's 'foo' \"bar\" 'baz.'")
|
|
135
135
|
expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
|
|
136
|
-
assert_equal(expected, doc.
|
|
136
|
+
assert_equal(expected, doc.to_words)
|
|
137
137
|
end
|
|
138
138
|
|
|
139
|
-
def
|
|
139
|
+
def test_ascii_to_words_withlongspace
|
|
140
140
|
doc = Document.new(" foo bar")
|
|
141
141
|
expected = [" ", "foo ", " ", "bar"]
|
|
142
|
-
assert_equal(expected, doc.
|
|
142
|
+
assert_equal(expected, doc.to_words)
|
|
143
143
|
end
|
|
144
144
|
|
|
145
|
-
def
|
|
145
|
+
def test_ascii_to_words_withdash
|
|
146
146
|
doc = Document.new("foo -- bar, baz - quux")
|
|
147
147
|
expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
|
|
148
|
-
assert_equal(expected, doc.
|
|
148
|
+
assert_equal(expected, doc.to_words)
|
|
149
149
|
end
|
|
150
150
|
|
|
151
|
-
def
|
|
151
|
+
def test_ascii_to_chars
|
|
152
152
|
doc = Document.new("foo bar")
|
|
153
153
|
expected = ["f", "o", "o", " ", "b", "a", "r"]
|
|
154
|
-
assert_equal(expected, doc.
|
|
154
|
+
assert_equal(expected, doc.to_chars)
|
|
155
155
|
end
|
|
156
156
|
|
|
157
|
-
def
|
|
157
|
+
def test_ascii_to_chars_with_eol_cr
|
|
158
158
|
doc = Document.new("foo bar\r")
|
|
159
159
|
expected = ["f", "o", "o", " ", "b", "a", "r", "\r"]
|
|
160
|
-
assert_equal(expected, doc.
|
|
160
|
+
assert_equal(expected, doc.to_chars)
|
|
161
161
|
end
|
|
162
162
|
|
|
163
|
-
def
|
|
163
|
+
def test_ascii_to_chars_with_eol_lf
|
|
164
164
|
doc = Document.new("foo bar\n")
|
|
165
165
|
expected = ["f", "o", "o", " ", "b", "a", "r", "\n"]
|
|
166
|
-
assert_equal(expected, doc.
|
|
166
|
+
assert_equal(expected, doc.to_chars)
|
|
167
167
|
end
|
|
168
168
|
|
|
169
|
-
def
|
|
169
|
+
def test_ascii_to_chars_with_eol_crlf
|
|
170
170
|
doc = Document.new("foo bar\r\n")
|
|
171
171
|
expected = ["f", "o", "o", " ", "b", "a", "r", "\r\n"]
|
|
172
|
-
assert_equal(expected, doc.
|
|
172
|
+
assert_equal(expected, doc.to_chars)
|
|
173
173
|
end
|
|
174
174
|
|
|
175
|
-
def
|
|
175
|
+
def test_ascii_to_bytes
|
|
176
176
|
doc = Document.new("foo bar\r\n")
|
|
177
177
|
expected = ["f", "o", "o", " ", "b", "a", "r", "\r", "\n"]
|
|
178
|
-
assert_equal(expected, doc.
|
|
178
|
+
assert_equal(expected, doc.to_bytes)
|
|
179
179
|
end
|
|
180
180
|
|
|
181
|
-
def
|
|
181
|
+
def test_ascii_count_bytes
|
|
182
182
|
doc = Document.new("foo bar\r\n")
|
|
183
183
|
expected = 9
|
|
184
|
-
assert_equal(expected, doc.
|
|
184
|
+
assert_equal(expected, doc.count_bytes)
|
|
185
185
|
end
|
|
186
186
|
|
|
187
|
-
def
|
|
187
|
+
def test_ascii_count_chars
|
|
188
188
|
doc = Document.new("foo bar\r\nbaz quux\r\n")
|
|
189
189
|
expected = 17
|
|
190
|
-
assert_equal(expected, doc.
|
|
190
|
+
assert_equal(expected, doc.count_chars)
|
|
191
191
|
end
|
|
192
192
|
|
|
193
|
-
def
|
|
193
|
+
def test_ascii_count_latin_graph_chars
|
|
194
194
|
doc = Document.new("foo bar\r\nbaz quux\r\n")
|
|
195
195
|
expected = 13
|
|
196
|
-
assert_equal(expected, doc.
|
|
196
|
+
assert_equal(expected, doc.count_latin_graph_chars)
|
|
197
197
|
end
|
|
198
198
|
|
|
199
|
-
def
|
|
199
|
+
def test_ascii_count_graph_chars
|
|
200
200
|
doc = Document.new("foo bar\r\nbaz quux\r\n")
|
|
201
201
|
expected = 13
|
|
202
|
-
assert_equal(expected, doc.
|
|
202
|
+
assert_equal(expected, doc.count_graph_chars)
|
|
203
203
|
end
|
|
204
204
|
|
|
205
|
-
def
|
|
205
|
+
def test_ascii_count_latin_blank_chars
|
|
206
206
|
doc = Document.new("foo bar\r\nbaz\tquux\r\n")
|
|
207
207
|
expected = 2
|
|
208
|
-
assert_equal(expected, doc.
|
|
208
|
+
assert_equal(expected, doc.count_latin_blank_chars)
|
|
209
209
|
end
|
|
210
210
|
|
|
211
|
-
def
|
|
211
|
+
def test_ascii_count_blank_chars
|
|
212
212
|
doc = Document.new("foo bar\r\nbaz\tquux\r\n")
|
|
213
213
|
expected = 2
|
|
214
|
-
assert_equal(expected, doc.
|
|
214
|
+
assert_equal(expected, doc.count_blank_chars)
|
|
215
215
|
end
|
|
216
216
|
|
|
217
|
-
def
|
|
217
|
+
def test_ascii_count_words
|
|
218
218
|
doc = Document.new("foo bar \r\nbaz quux\r\n")
|
|
219
219
|
expected = 6
|
|
220
|
-
assert_equal(expected, doc.
|
|
220
|
+
assert_equal(expected, doc.count_words)
|
|
221
221
|
end
|
|
222
222
|
|
|
223
|
-
def
|
|
223
|
+
def test_ascii_count_latin_words
|
|
224
224
|
doc = Document.new("foo bar \r\nbaz quux\r\n")
|
|
225
225
|
expected = 5 # " " is also counted as a word
|
|
226
|
-
assert_equal(expected, doc.
|
|
226
|
+
assert_equal(expected, doc.count_latin_words)
|
|
227
227
|
end
|
|
228
228
|
|
|
229
|
-
def
|
|
229
|
+
def test_ascii_count_latin_valid_words
|
|
230
230
|
doc = Document.new("1 foo \r\n%%% ()\r\n")
|
|
231
231
|
expected = 2
|
|
232
|
-
assert_equal(expected, doc.
|
|
232
|
+
assert_equal(expected, doc.count_latin_valid_words)
|
|
233
233
|
end
|
|
234
234
|
|
|
235
|
-
def
|
|
235
|
+
def test_ascii_count_lines
|
|
236
236
|
doc = Document.new("foo\r\nbar")
|
|
237
237
|
expected = 2
|
|
238
|
-
assert_equal(expected, doc.
|
|
238
|
+
assert_equal(expected, doc.count_lines)
|
|
239
239
|
end
|
|
240
240
|
|
|
241
|
-
def
|
|
241
|
+
def test_ascii_count_graph_lines
|
|
242
242
|
doc = Document.new("foo\r\n ")
|
|
243
243
|
expected = 1
|
|
244
|
-
assert_equal(expected, doc.
|
|
244
|
+
assert_equal(expected, doc.count_graph_lines)
|
|
245
245
|
end
|
|
246
246
|
|
|
247
|
-
def
|
|
247
|
+
def test_ascii_count_empty_lines
|
|
248
248
|
doc = Document.new("foo\r\n \r\n\t\r\n\r\n")
|
|
249
249
|
expected = 1
|
|
250
|
-
assert_equal(expected, doc.
|
|
250
|
+
assert_equal(expected, doc.count_empty_lines)
|
|
251
251
|
end
|
|
252
252
|
|
|
253
|
-
def
|
|
253
|
+
def test_ascii_count_blank_lines
|
|
254
254
|
doc = Document.new("\r\n \r\n\t\r\n ")
|
|
255
255
|
expected = 3
|
|
256
|
-
assert_equal(expected, doc.
|
|
256
|
+
assert_equal(expected, doc.count_blank_lines)
|
|
257
257
|
end
|
|
258
258
|
|
|
259
259
|
# test EUCJP module
|
|
260
|
-
def
|
|
260
|
+
def test_eucjp_to_words
|
|
261
261
|
doc = Document.new(NKF.nkf("--euc", "日本語の文字foo bar"))
|
|
262
262
|
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--euc", c) }
|
|
263
|
-
assert_equal(expected, doc.
|
|
263
|
+
assert_equal(expected, doc.to_words)
|
|
264
264
|
end
|
|
265
265
|
|
|
266
|
-
def
|
|
266
|
+
def test_eucjp_to_words_kanhira
|
|
267
267
|
doc = Document.new(NKF.nkf("--euc", "日本語の文字"))
|
|
268
268
|
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
269
|
-
assert_equal(expected, doc.
|
|
269
|
+
assert_equal(expected, doc.to_words)
|
|
270
270
|
end
|
|
271
271
|
|
|
272
|
-
def
|
|
272
|
+
def test_eucjp_to_words_katahira
|
|
273
273
|
doc = Document.new(NKF.nkf("--euc", "カタカナの文字"))
|
|
274
274
|
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
275
|
-
assert_equal(expected, doc.
|
|
275
|
+
assert_equal(expected, doc.to_words)
|
|
276
276
|
end
|
|
277
277
|
|
|
278
|
-
def
|
|
278
|
+
def test_eucjp_to_words_kataonbiki
|
|
279
279
|
doc = Document.new(NKF.nkf("--euc", "ルビー色の石"), "EUC-JP")
|
|
280
280
|
expected = ["ルビー", "色の", "石"].map { |c| NKF.nkf("--euc", c) }
|
|
281
|
-
assert_equal(expected, doc.
|
|
281
|
+
assert_equal(expected, doc.to_words)
|
|
282
282
|
end
|
|
283
283
|
|
|
284
|
-
def
|
|
284
|
+
def test_eucjp_to_words_hiraonbiki
|
|
285
285
|
doc = Document.new(NKF.nkf("--euc", "わールビーだ"), "EUC-JP")
|
|
286
286
|
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--euc", c) }
|
|
287
|
-
assert_equal(expected, doc.
|
|
287
|
+
assert_equal(expected, doc.to_words)
|
|
288
288
|
end
|
|
289
289
|
|
|
290
|
-
def
|
|
290
|
+
def test_eucjp_to_words_latinmix
|
|
291
291
|
doc = Document.new(NKF.nkf("--euc", "日本語とLatinの文字"))
|
|
292
292
|
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
293
|
-
assert_equal(expected, doc.
|
|
293
|
+
assert_equal(expected, doc.to_words)
|
|
294
294
|
end
|
|
295
295
|
|
|
296
|
-
def
|
|
296
|
+
def test_eucjp_to_chars
|
|
297
297
|
doc = Document.new(NKF.nkf("--euc", "日本語a b"))
|
|
298
298
|
expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--euc", c) }
|
|
299
|
-
assert_equal(expected, doc.
|
|
299
|
+
assert_equal(expected, doc.to_chars)
|
|
300
300
|
end
|
|
301
301
|
|
|
302
|
-
def
|
|
302
|
+
def test_eucjp_to_chars_with_cr
|
|
303
303
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r"))
|
|
304
304
|
expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--euc", c) }
|
|
305
|
-
assert_equal(expected, doc.
|
|
305
|
+
assert_equal(expected, doc.to_chars)
|
|
306
306
|
end
|
|
307
307
|
|
|
308
|
-
def
|
|
308
|
+
def test_eucjp_to_chars_with_lf
|
|
309
309
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\n"))
|
|
310
310
|
expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--euc", c) }
|
|
311
|
-
assert_equal(expected, doc.
|
|
311
|
+
assert_equal(expected, doc.to_chars)
|
|
312
312
|
end
|
|
313
313
|
|
|
314
|
-
def
|
|
314
|
+
def test_eucjp_to_chars_with_crlf
|
|
315
315
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
316
316
|
expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--euc", c) }
|
|
317
|
-
assert_equal(expected, doc.
|
|
317
|
+
assert_equal(expected, doc.to_chars)
|
|
318
318
|
end
|
|
319
319
|
|
|
320
|
-
def
|
|
320
|
+
def test_eucjp_count_chars
|
|
321
321
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
322
322
|
expected = 7
|
|
323
|
-
assert_equal(expected, doc.
|
|
323
|
+
assert_equal(expected, doc.count_chars)
|
|
324
324
|
end
|
|
325
325
|
|
|
326
|
-
def
|
|
326
|
+
def test_eucjp_count_latin_graph_chars
|
|
327
327
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
328
328
|
expected = 2
|
|
329
|
-
assert_equal(expected, doc.
|
|
329
|
+
assert_equal(expected, doc.count_latin_graph_chars)
|
|
330
330
|
end
|
|
331
331
|
|
|
332
|
-
def
|
|
332
|
+
def test_eucjp_count_ja_graph_chars
|
|
333
333
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
334
334
|
expected = 3
|
|
335
|
-
assert_equal(expected, doc.
|
|
335
|
+
assert_equal(expected, doc.count_ja_graph_chars)
|
|
336
336
|
end
|
|
337
337
|
|
|
338
|
-
def
|
|
338
|
+
def test_eucjp_count_graph_chars
|
|
339
339
|
doc = Document.new(NKF.nkf("--euc", "日本語a b\r\n"))
|
|
340
340
|
expected = 5
|
|
341
|
-
assert_equal(expected, doc.
|
|
341
|
+
assert_equal(expected, doc.count_graph_chars)
|
|
342
342
|
end
|
|
343
343
|
|
|
344
|
-
def
|
|
344
|
+
def test_eucjp_count_latin_blank_chars
|
|
345
345
|
doc = Document.new(NKF.nkf("--euc", "日本語\ta b\r\n"))
|
|
346
346
|
expected = 2
|
|
347
|
-
assert_equal(expected, doc.
|
|
347
|
+
assert_equal(expected, doc.count_latin_blank_chars)
|
|
348
348
|
end
|
|
349
349
|
|
|
350
|
-
def
|
|
350
|
+
def test_eucjp_count_ja_blank_chars
|
|
351
351
|
doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
|
|
352
352
|
expected = 1
|
|
353
|
-
assert_equal(expected, doc.
|
|
353
|
+
assert_equal(expected, doc.count_ja_blank_chars)
|
|
354
354
|
end
|
|
355
355
|
|
|
356
|
-
def
|
|
356
|
+
def test_eucjp_count_blank_chars
|
|
357
357
|
doc = Document.new(NKF.nkf("--euc", "日本 語\ta b\r\n"))
|
|
358
358
|
expected = 3
|
|
359
|
-
assert_equal(expected, doc.
|
|
359
|
+
assert_equal(expected, doc.count_blank_chars)
|
|
360
360
|
end
|
|
361
361
|
|
|
362
|
-
def
|
|
362
|
+
def test_eucjp_count_words
|
|
363
363
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
364
364
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
365
|
-
assert_equal(expected, doc.
|
|
365
|
+
assert_equal(expected, doc.count_words)
|
|
366
366
|
end
|
|
367
367
|
|
|
368
|
-
def
|
|
368
|
+
def test_eucjp_count_ja_words
|
|
369
369
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
370
370
|
expected = 3
|
|
371
|
-
assert_equal(expected, doc.
|
|
371
|
+
assert_equal(expected, doc.count_ja_words)
|
|
372
372
|
end
|
|
373
373
|
|
|
374
|
-
def
|
|
374
|
+
def test_eucjp_count_latin_valid_words
|
|
375
375
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
376
376
|
expected = 2
|
|
377
|
-
assert_equal(expected, doc.
|
|
377
|
+
assert_equal(expected, doc.count_latin_valid_words)
|
|
378
378
|
end
|
|
379
379
|
|
|
380
|
-
def
|
|
380
|
+
def test_eucjp_count_ja_valid_words
|
|
381
381
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
382
382
|
expected = 2
|
|
383
|
-
assert_equal(expected, doc.
|
|
383
|
+
assert_equal(expected, doc.count_ja_valid_words)
|
|
384
384
|
end
|
|
385
385
|
|
|
386
|
-
def
|
|
386
|
+
def test_eucjp_count_valid_words
|
|
387
387
|
doc = Document.new(NKF.nkf("--euc", "日本 語a b --\r\n"))
|
|
388
388
|
expected = 4
|
|
389
|
-
assert_equal(expected, doc.
|
|
389
|
+
assert_equal(expected, doc.count_valid_words)
|
|
390
390
|
end
|
|
391
391
|
|
|
392
|
-
def
|
|
392
|
+
def test_eucjp_count_lines
|
|
393
393
|
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
394
394
|
expected = 6
|
|
395
|
-
assert_equal(expected, doc.
|
|
395
|
+
assert_equal(expected, doc.count_lines)
|
|
396
396
|
end
|
|
397
397
|
|
|
398
|
-
def
|
|
398
|
+
def test_eucjp_count_graph_lines
|
|
399
399
|
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
400
400
|
expected = 3
|
|
401
|
-
assert_equal(expected, doc.
|
|
401
|
+
assert_equal(expected, doc.count_graph_lines)
|
|
402
402
|
end
|
|
403
403
|
|
|
404
|
-
def
|
|
404
|
+
def test_eucjp_count_empty_lines
|
|
405
405
|
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
406
406
|
expected = 1
|
|
407
|
-
assert_equal(expected, doc.
|
|
407
|
+
assert_equal(expected, doc.count_empty_lines)
|
|
408
408
|
end
|
|
409
409
|
|
|
410
|
-
def
|
|
410
|
+
def test_eucjp_count_blank_lines
|
|
411
411
|
doc = Document.new(NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
412
412
|
expected = 2
|
|
413
|
-
assert_equal(expected, doc.
|
|
413
|
+
assert_equal(expected, doc.count_blank_lines)
|
|
414
414
|
end
|
|
415
415
|
|
|
416
416
|
# test SJIS module
|
|
417
|
-
def
|
|
417
|
+
def test_sjis_to_words
|
|
418
418
|
doc = Document.new(NKF.nkf("--sjis", "日本語の文字foo bar"))
|
|
419
419
|
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--sjis", c) }
|
|
420
|
-
assert_equal(expected, doc.
|
|
420
|
+
assert_equal(expected, doc.to_words)
|
|
421
421
|
end
|
|
422
422
|
|
|
423
|
-
def
|
|
423
|
+
def test_sjis_to_words_kanhira
|
|
424
424
|
doc = Document.new(NKF.nkf("--sjis", "日本語の文字"))
|
|
425
425
|
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
426
|
-
assert_equal(expected, doc.
|
|
426
|
+
assert_equal(expected, doc.to_words)
|
|
427
427
|
end
|
|
428
428
|
|
|
429
|
-
def
|
|
429
|
+
def test_sjis_to_words_katahira
|
|
430
430
|
doc = Document.new(NKF.nkf("--sjis", "カタカナの文字"))
|
|
431
431
|
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
432
|
-
assert_equal(expected, doc.
|
|
432
|
+
assert_equal(expected, doc.to_words)
|
|
433
433
|
end
|
|
434
434
|
|
|
435
|
-
def
|
|
435
|
+
def test_sjis_to_words_kataonbiki
|
|
436
436
|
doc = Document.new(NKF.nkf("--sjis", "ルビーの指輪"))
|
|
437
437
|
expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--sjis", c) }
|
|
438
|
-
assert_equal(expected, doc.
|
|
438
|
+
assert_equal(expected, doc.to_words)
|
|
439
439
|
end
|
|
440
440
|
|
|
441
|
-
def
|
|
441
|
+
def test_sjis_to_words_hiraonbiki
|
|
442
442
|
doc = Document.new(NKF.nkf("--sjis", "わールビーだ"))
|
|
443
443
|
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--sjis", c) }
|
|
444
|
-
assert_equal(expected, doc.
|
|
444
|
+
assert_equal(expected, doc.to_words)
|
|
445
445
|
end
|
|
446
446
|
|
|
447
|
-
def
|
|
447
|
+
def test_sjis_to_words_latinmix
|
|
448
448
|
doc = Document.new(NKF.nkf("--sjis", "日本語とLatinの文字"))
|
|
449
449
|
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
450
|
-
assert_equal(expected, doc.
|
|
450
|
+
assert_equal(expected, doc.to_words)
|
|
451
451
|
end
|
|
452
452
|
|
|
453
|
-
def
|
|
453
|
+
def test_sjis_to_chars
|
|
454
454
|
doc = Document.new(NKF.nkf("--sjis", "表計算a b"))
|
|
455
455
|
expected = ["表", "計", "算", "a", " ", "b"].map { |c| NKF.nkf("--sjis", c) }
|
|
456
|
-
assert_equal(expected, doc.
|
|
456
|
+
assert_equal(expected, doc.to_chars)
|
|
457
457
|
end
|
|
458
458
|
|
|
459
|
-
def
|
|
459
|
+
def test_sjis_to_chars_with_cr
|
|
460
460
|
doc = Document.new(NKF.nkf("--sjis", "表計算a b\r"))
|
|
461
461
|
expected = ["表", "計", "算", "a", " ", "b", "\r"].map { |c| NKF.nkf("--sjis", c) }
|
|
462
|
-
assert_equal(expected, doc.
|
|
462
|
+
assert_equal(expected, doc.to_chars)
|
|
463
463
|
end
|
|
464
464
|
|
|
465
|
-
def
|
|
465
|
+
def test_sjis_to_chars_with_lf
|
|
466
466
|
doc = Document.new(NKF.nkf("--sjis", "表計算a b\n"))
|
|
467
467
|
expected = ["表", "計", "算", "a", " ", "b", "\n"].map { |c| NKF.nkf("--sjis", c) }
|
|
468
|
-
assert_equal(expected, doc.
|
|
468
|
+
assert_equal(expected, doc.to_chars)
|
|
469
469
|
end
|
|
470
470
|
|
|
471
|
-
def
|
|
471
|
+
def test_sjis_to_chars_with_crlf
|
|
472
472
|
doc = Document.new(NKF.nkf("--sjis", "表計算a b\r\n"))
|
|
473
473
|
expected = ["表", "計", "算", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--sjis", c) }
|
|
474
|
-
assert_equal(expected, doc.
|
|
474
|
+
assert_equal(expected, doc.to_chars)
|
|
475
475
|
end
|
|
476
476
|
|
|
477
|
-
def
|
|
477
|
+
def test_sjis_count_chars
|
|
478
478
|
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
479
479
|
expected = 7
|
|
480
|
-
assert_equal(expected, doc.
|
|
480
|
+
assert_equal(expected, doc.count_chars)
|
|
481
481
|
end
|
|
482
482
|
|
|
483
|
-
def
|
|
483
|
+
def test_sjis_count_latin_graph_chars
|
|
484
484
|
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
485
485
|
expected = 2
|
|
486
|
-
assert_equal(expected, doc.
|
|
486
|
+
assert_equal(expected, doc.count_latin_graph_chars)
|
|
487
487
|
end
|
|
488
488
|
|
|
489
|
-
def
|
|
489
|
+
def test_sjis_count_ja_graph_chars
|
|
490
490
|
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
491
491
|
expected = 3
|
|
492
|
-
assert_equal(expected, doc.
|
|
492
|
+
assert_equal(expected, doc.count_ja_graph_chars)
|
|
493
493
|
end
|
|
494
494
|
|
|
495
|
-
def
|
|
495
|
+
def test_sjis_count_graph_chars
|
|
496
496
|
doc = Document.new(NKF.nkf("--sjis", "日本語a b\r\n"))
|
|
497
497
|
expected = 5
|
|
498
|
-
assert_equal(expected, doc.
|
|
498
|
+
assert_equal(expected, doc.count_graph_chars)
|
|
499
499
|
end
|
|
500
500
|
|
|
501
|
-
def
|
|
501
|
+
def test_sjis_count_latin_blank_chars
|
|
502
502
|
doc = Document.new(NKF.nkf("--sjis", "日本語\ta b\r\n"))
|
|
503
503
|
expected = 2
|
|
504
|
-
assert_equal(expected, doc.
|
|
504
|
+
assert_equal(expected, doc.count_latin_blank_chars)
|
|
505
505
|
end
|
|
506
506
|
|
|
507
|
-
def
|
|
507
|
+
def test_sjis_count_ja_blank_chars
|
|
508
508
|
doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
|
|
509
509
|
expected = 1
|
|
510
|
-
assert_equal(expected, doc.
|
|
510
|
+
assert_equal(expected, doc.count_ja_blank_chars)
|
|
511
511
|
end
|
|
512
512
|
|
|
513
|
-
def
|
|
513
|
+
def test_sjis_count_blank_chars
|
|
514
514
|
doc = Document.new(NKF.nkf("--sjis", "日本 語\ta b\r\n"))
|
|
515
515
|
expected = 3
|
|
516
|
-
assert_equal(expected, doc.
|
|
516
|
+
assert_equal(expected, doc.count_blank_chars)
|
|
517
517
|
end
|
|
518
518
|
|
|
519
|
-
def
|
|
519
|
+
def test_sjis_count_words
|
|
520
520
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
521
521
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
522
|
-
assert_equal(expected, doc.
|
|
522
|
+
assert_equal(expected, doc.count_words)
|
|
523
523
|
end
|
|
524
524
|
|
|
525
|
-
def
|
|
525
|
+
def test_sjis_count_ja_words
|
|
526
526
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
527
527
|
expected = 3
|
|
528
|
-
assert_equal(expected, doc.
|
|
528
|
+
assert_equal(expected, doc.count_ja_words)
|
|
529
529
|
end
|
|
530
530
|
|
|
531
|
-
def
|
|
531
|
+
def test_sjis_count_latin_valid_words
|
|
532
532
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
533
533
|
expected = 2
|
|
534
|
-
assert_equal(expected, doc.
|
|
534
|
+
assert_equal(expected, doc.count_latin_valid_words)
|
|
535
535
|
end
|
|
536
536
|
|
|
537
|
-
def
|
|
537
|
+
def test_sjis_count_ja_valid_words
|
|
538
538
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
539
539
|
expected = 2
|
|
540
|
-
assert_equal(expected, doc.
|
|
540
|
+
assert_equal(expected, doc.count_ja_valid_words)
|
|
541
541
|
end
|
|
542
542
|
|
|
543
|
-
def
|
|
543
|
+
def test_sjis_count_valid_words
|
|
544
544
|
doc = Document.new(NKF.nkf("--sjis", "日本 語a b --\r\n"))
|
|
545
545
|
expected = 4
|
|
546
|
-
assert_equal(expected, doc.
|
|
546
|
+
assert_equal(expected, doc.count_valid_words)
|
|
547
547
|
end
|
|
548
548
|
|
|
549
|
-
def
|
|
549
|
+
def test_sjis_count_lines
|
|
550
550
|
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
551
551
|
expected = 6
|
|
552
|
-
assert_equal(expected, doc.
|
|
552
|
+
assert_equal(expected, doc.count_lines)
|
|
553
553
|
end
|
|
554
554
|
|
|
555
|
-
def
|
|
555
|
+
def test_sjis_count_graph_lines
|
|
556
556
|
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
557
557
|
expected = 3
|
|
558
|
-
assert_equal(expected, doc.
|
|
558
|
+
assert_equal(expected, doc.count_graph_lines)
|
|
559
559
|
end
|
|
560
560
|
|
|
561
|
-
def
|
|
561
|
+
def test_sjis_count_empty_lines
|
|
562
562
|
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
563
563
|
expected = 1
|
|
564
|
-
assert_equal(expected, doc.
|
|
564
|
+
assert_equal(expected, doc.count_empty_lines)
|
|
565
565
|
end
|
|
566
566
|
|
|
567
|
-
def
|
|
567
|
+
def test_sjis_count_blank_lines
|
|
568
568
|
doc = Document.new(NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
569
569
|
expected = 2
|
|
570
|
-
assert_equal(expected, doc.
|
|
570
|
+
assert_equal(expected, doc.count_blank_lines)
|
|
571
571
|
end
|
|
572
572
|
|
|
573
573
|
# test UTF8 module
|
|
574
|
-
def
|
|
574
|
+
def test_utf8_to_words
|
|
575
575
|
doc = Document.new(NKF.nkf("--utf8", "日本語の文字foo bar"))
|
|
576
576
|
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--utf8", c) }
|
|
577
|
-
assert_equal(expected, doc.
|
|
577
|
+
assert_equal(expected, doc.to_words)
|
|
578
578
|
end
|
|
579
579
|
|
|
580
|
-
def
|
|
580
|
+
def test_utf8_to_words_kanhira
|
|
581
581
|
doc = Document.new(NKF.nkf("--utf8", "日本語の文字"))
|
|
582
582
|
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
583
|
-
assert_equal(expected, doc.
|
|
583
|
+
assert_equal(expected, doc.to_words)
|
|
584
584
|
end
|
|
585
585
|
|
|
586
|
-
def
|
|
586
|
+
def test_utf8_to_words_katahira
|
|
587
587
|
doc = Document.new(NKF.nkf("--utf8", "カタカナの文字"))
|
|
588
588
|
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
589
|
-
assert_equal(expected, doc.
|
|
589
|
+
assert_equal(expected, doc.to_words)
|
|
590
590
|
end
|
|
591
591
|
|
|
592
|
-
def
|
|
592
|
+
def test_utf8_to_words_kataonbiki
|
|
593
593
|
doc = Document.new(NKF.nkf("--utf8", "ルビーの指輪"))
|
|
594
594
|
expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--utf8", c) }
|
|
595
|
-
assert_equal(expected, doc.
|
|
595
|
+
assert_equal(expected, doc.to_words)
|
|
596
596
|
end
|
|
597
597
|
|
|
598
|
-
def
|
|
598
|
+
def test_utf8_to_words_hiraonbiki
|
|
599
599
|
doc = Document.new(NKF.nkf("--utf8", "わールビーだ"))
|
|
600
600
|
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--utf8", c) }
|
|
601
|
-
assert_equal(expected, doc.
|
|
601
|
+
assert_equal(expected, doc.to_words)
|
|
602
602
|
end
|
|
603
603
|
|
|
604
|
-
def
|
|
604
|
+
def test_utf8_to_words_latinmix
|
|
605
605
|
doc = Document.new(NKF.nkf("--utf8", "日本語とLatinの文字"))
|
|
606
606
|
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
607
|
-
assert_equal(expected, doc.
|
|
607
|
+
assert_equal(expected, doc.to_words)
|
|
608
608
|
end
|
|
609
609
|
|
|
610
|
-
def
|
|
610
|
+
def test_utf8_to_chars
|
|
611
611
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b"), "UTF-8")
|
|
612
612
|
expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--utf8", c) }
|
|
613
|
-
assert_equal(expected, doc.
|
|
613
|
+
assert_equal(expected, doc.to_chars)
|
|
614
614
|
end
|
|
615
615
|
|
|
616
|
-
def
|
|
616
|
+
def test_utf8_to_chars_with_cr
|
|
617
617
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r"), "UTF-8")
|
|
618
618
|
expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--utf8", c) }
|
|
619
|
-
assert_equal(expected, doc.
|
|
619
|
+
assert_equal(expected, doc.to_chars)
|
|
620
620
|
end
|
|
621
621
|
|
|
622
|
-
def
|
|
622
|
+
def test_utf8_to_chars_with_lf
|
|
623
623
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\n"), "UTF-8")
|
|
624
624
|
expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--utf8", c) }
|
|
625
|
-
assert_equal(expected, doc.
|
|
625
|
+
assert_equal(expected, doc.to_chars)
|
|
626
626
|
end
|
|
627
627
|
|
|
628
|
-
def
|
|
628
|
+
def test_utf8_to_chars_with_crlf
|
|
629
629
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
630
630
|
expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--utf8", c) }
|
|
631
|
-
assert_equal(expected, doc.
|
|
631
|
+
assert_equal(expected, doc.to_chars)
|
|
632
632
|
end
|
|
633
633
|
|
|
634
|
-
def
|
|
634
|
+
def test_utf8_count_chars
|
|
635
635
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
636
636
|
expected = 7
|
|
637
|
-
assert_equal(expected, doc.
|
|
637
|
+
assert_equal(expected, doc.count_chars)
|
|
638
638
|
end
|
|
639
639
|
|
|
640
|
-
def
|
|
640
|
+
def test_utf8_count_latin_graph_chars
|
|
641
641
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
642
642
|
expected = 2
|
|
643
|
-
assert_equal(expected, doc.
|
|
643
|
+
assert_equal(expected, doc.count_latin_graph_chars)
|
|
644
644
|
end
|
|
645
645
|
|
|
646
|
-
def
|
|
646
|
+
def test_utf8_count_ja_graph_chars
|
|
647
647
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
648
648
|
expected = 3
|
|
649
|
-
assert_equal(expected, doc.
|
|
649
|
+
assert_equal(expected, doc.count_ja_graph_chars)
|
|
650
650
|
end
|
|
651
651
|
|
|
652
|
-
def
|
|
652
|
+
def test_utf8_count_graph_chars
|
|
653
653
|
doc = Document.new(NKF.nkf("--utf8", "日本語a b\r\n"), "UTF-8")
|
|
654
654
|
expected = 5
|
|
655
|
-
assert_equal(expected, doc.
|
|
655
|
+
assert_equal(expected, doc.count_graph_chars)
|
|
656
656
|
end
|
|
657
657
|
|
|
658
|
-
def
|
|
658
|
+
def test_utf8_count_latin_blank_chars
|
|
659
659
|
doc = Document.new(NKF.nkf("--utf8", "日本語\ta b\r\n"))
|
|
660
660
|
expected = 2
|
|
661
|
-
assert_equal(expected, doc.
|
|
661
|
+
assert_equal(expected, doc.count_latin_blank_chars)
|
|
662
662
|
end
|
|
663
663
|
|
|
664
|
-
def
|
|
664
|
+
def test_utf8_count_ja_blank_chars
|
|
665
665
|
doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
|
|
666
666
|
expected = 1
|
|
667
|
-
assert_equal(expected, doc.
|
|
667
|
+
assert_equal(expected, doc.count_ja_blank_chars)
|
|
668
668
|
end
|
|
669
669
|
|
|
670
|
-
def
|
|
670
|
+
def test_utf8_count_blank_chars
|
|
671
671
|
doc = Document.new(NKF.nkf("--utf8", "日本 語\ta b\r\n"))
|
|
672
672
|
expected = 3
|
|
673
|
-
assert_equal(expected, doc.
|
|
673
|
+
assert_equal(expected, doc.count_blank_chars)
|
|
674
674
|
end
|
|
675
675
|
|
|
676
|
-
def
|
|
676
|
+
def test_utf8_count_words
|
|
677
677
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
678
678
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
679
|
-
assert_equal(expected, doc.
|
|
679
|
+
assert_equal(expected, doc.count_words)
|
|
680
680
|
end
|
|
681
681
|
|
|
682
|
-
def
|
|
682
|
+
def test_utf8_count_ja_words
|
|
683
683
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
684
684
|
expected = 3
|
|
685
|
-
assert_equal(expected, doc.
|
|
685
|
+
assert_equal(expected, doc.count_ja_words)
|
|
686
686
|
end
|
|
687
687
|
|
|
688
|
-
def
|
|
688
|
+
def test_utf8_count_latin_valid_words
|
|
689
689
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
690
690
|
expected = 2
|
|
691
|
-
assert_equal(expected, doc.
|
|
691
|
+
assert_equal(expected, doc.count_latin_valid_words)
|
|
692
692
|
end
|
|
693
693
|
|
|
694
|
-
def
|
|
694
|
+
def test_utf8_count_ja_valid_words
|
|
695
695
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
696
696
|
expected = 2
|
|
697
|
-
assert_equal(expected, doc.
|
|
697
|
+
assert_equal(expected, doc.count_ja_valid_words)
|
|
698
698
|
end
|
|
699
699
|
|
|
700
|
-
def
|
|
700
|
+
def test_utf8_count_valid_words
|
|
701
701
|
doc = Document.new(NKF.nkf("--utf8", "日本 語a b --\r\n"))
|
|
702
702
|
expected = 4
|
|
703
|
-
assert_equal(expected, doc.
|
|
703
|
+
assert_equal(expected, doc.count_valid_words)
|
|
704
704
|
end
|
|
705
705
|
|
|
706
|
-
def
|
|
706
|
+
def test_utf8_count_lines
|
|
707
707
|
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
708
708
|
expected = 6
|
|
709
|
-
assert_equal(expected, doc.
|
|
709
|
+
assert_equal(expected, doc.count_lines)
|
|
710
710
|
end
|
|
711
711
|
|
|
712
|
-
def
|
|
712
|
+
def test_utf8_count_graph_lines
|
|
713
713
|
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
714
714
|
expected = 3
|
|
715
|
-
assert_equal(expected, doc.
|
|
715
|
+
assert_equal(expected, doc.count_graph_lines)
|
|
716
716
|
end
|
|
717
717
|
|
|
718
|
-
def
|
|
718
|
+
def test_utf8_count_empty_lines
|
|
719
719
|
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
720
720
|
expected = 1
|
|
721
|
-
assert_equal(expected, doc.
|
|
721
|
+
assert_equal(expected, doc.count_empty_lines)
|
|
722
722
|
end
|
|
723
723
|
|
|
724
|
-
def
|
|
724
|
+
def test_utf8_count_blank_lines
|
|
725
725
|
doc = Document.new(NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar"))
|
|
726
726
|
expected = 2
|
|
727
|
-
assert_equal(expected, doc.
|
|
727
|
+
assert_equal(expected, doc.count_blank_lines)
|
|
728
728
|
end
|
|
729
729
|
|
|
730
730
|
def teardown
|