docdiff 0.6.7 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -36
- data/README_ja.md +70 -37
- data/doc/example/docdiff.conf.example +3 -0
- data/doc/img/screenshot-html.png +0 -0
- data/doc/img/screenshot-tty-char.png +0 -0
- data/doc/img/screenshot-tty-digest-block.png +0 -0
- data/doc/img/screenshot-tty-digest-license-block.png +0 -0
- data/doc/img/screenshot-tty-digest-license.png +0 -0
- data/doc/img/screenshot-tty-digest.png +0 -0
- data/doc/img/screenshot-tty-en-ja.png +0 -0
- data/doc/img/screenshot-tty-manued.png +0 -0
- data/doc/img/screenshot-tty-wdiff.png +0 -0
- data/doc/img/screenshot-tty-word-char.png +0 -0
- data/doc/man/docdiff.adoc +3 -3
- data/doc/news.md +11 -0
- data/docdiff.gemspec +1 -1
- data/lib/doc_diff.rb +5 -5
- data/lib/docdiff/charstring.rb +36 -40
- data/lib/docdiff/cli.rb +23 -10
- data/lib/docdiff/document.rb +44 -44
- data/lib/docdiff/encoding/en_ascii.rb +4 -4
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +27 -13
- data/test/charstring_test.rb +221 -221
- data/test/cli_test.rb +12 -12
- data/test/document_test.rb +223 -223
- data/test/fixture/humpty_dumpty_01_en.txt +4 -0
- data/test/fixture/humpty_dumpty_01_ja.txt +4 -0
- data/test/fixture/{humpty_dumpty01_ascii_lf.txt → humpty_dumpty_02_en.txt} +2 -2
- data/test/fixture/humpty_dumpty_02_ja.txt +4 -0
- data/test/view_test.rb +38 -31
- metadata +26 -14
- data/test/fixture/humpty_dumpty02_ascii_lf.txt +0 -4
- /data/doc/img/{screenshot-format-html-digest-firefox.png → old/screenshot-format-html-digest-firefox.png} +0 -0
- /data/doc/img/{screenshot-format-html-firefox.png → old/screenshot-format-html-firefox.png} +0 -0
- /data/doc/img/{screenshot-format-tty-cmdexe-en.png → old/screenshot-format-tty-cmdexe-en.png} +0 -0
- /data/doc/img/{screenshot-format-tty-cmdexe-ja.png → old/screenshot-format-tty-cmdexe-ja.png} +0 -0
- /data/doc/img/{screenshot-format-tty-rxvtunicode-en.png → old/screenshot-format-tty-rxvtunicode-en.png} +0 -0
- /data/doc/img/{screenshot-format-tty-rxvtunicode-ja.png → old/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
- /data/doc/img/{screenshot-format-tty-xterm-en.png → old/screenshot-format-tty-xterm-en.png} +0 -0
- /data/doc/img/{screenshot-format-tty-xterm-ja.png → old/screenshot-format-tty-xterm-ja.png} +0 -0
- /data/doc/img/{screenshot-resolution-linewordchar-xterm.png → old/screenshot-resolution-linewordchar-xterm.png} +0 -0
data/test/charstring_test.rb
CHANGED
|
@@ -110,853 +110,853 @@ class TestCharString < Test::Unit::TestCase
|
|
|
110
110
|
assert_equal(expected, str.eol_char)
|
|
111
111
|
end
|
|
112
112
|
|
|
113
|
-
# test eol
|
|
114
|
-
def
|
|
113
|
+
# test eol to_lines method
|
|
114
|
+
def test_cr_to_lines
|
|
115
115
|
str = "foo\rbar\r".extend(CharString)
|
|
116
116
|
str.encoding = "US-ASCII"
|
|
117
117
|
str.eol = "CR"
|
|
118
118
|
expected = ["foo\r", "bar\r"]
|
|
119
|
-
assert_equal(expected, str.
|
|
119
|
+
assert_equal(expected, str.to_lines)
|
|
120
120
|
end
|
|
121
121
|
|
|
122
|
-
def
|
|
122
|
+
def test_cr_to_lines_chomped_lastline
|
|
123
123
|
str = "foo\rbar".extend(CharString)
|
|
124
124
|
str.encoding = "US-ASCII"
|
|
125
125
|
str.eol = "CR"
|
|
126
126
|
expected = ["foo\r", "bar"]
|
|
127
|
-
assert_equal(expected, str.
|
|
127
|
+
assert_equal(expected, str.to_lines)
|
|
128
128
|
end
|
|
129
129
|
|
|
130
|
-
def
|
|
130
|
+
def test_cr_to_lines_empty_line
|
|
131
131
|
str = "foo\r\rbar\r".extend(CharString)
|
|
132
132
|
str.encoding = "US-ASCII"
|
|
133
133
|
str.eol = "CR"
|
|
134
134
|
expected = ["foo\r", "\r", "bar\r"]
|
|
135
|
-
assert_equal(expected, str.
|
|
135
|
+
assert_equal(expected, str.to_lines)
|
|
136
136
|
end
|
|
137
137
|
|
|
138
|
-
def
|
|
138
|
+
def test_lf_to_lines
|
|
139
139
|
str = "foo\nbar\n".extend(CharString)
|
|
140
140
|
str.encoding = "US-ASCII"
|
|
141
141
|
str.eol = "LF"
|
|
142
142
|
expected = ["foo\n", "bar\n"]
|
|
143
|
-
assert_equal(expected, str.
|
|
143
|
+
assert_equal(expected, str.to_lines)
|
|
144
144
|
end
|
|
145
145
|
|
|
146
|
-
def
|
|
146
|
+
def test_lf_to_lines_chomped_lastline
|
|
147
147
|
str = "foo\nbar".extend(CharString)
|
|
148
148
|
str.encoding = "US-ASCII"
|
|
149
149
|
str.eol = "LF"
|
|
150
150
|
expected = ["foo\n", "bar"]
|
|
151
|
-
assert_equal(expected, str.
|
|
151
|
+
assert_equal(expected, str.to_lines)
|
|
152
152
|
end
|
|
153
153
|
|
|
154
|
-
def
|
|
154
|
+
def test_lf_to_lines_empty_line
|
|
155
155
|
str = "foo\n\nbar\n".extend(CharString)
|
|
156
156
|
str.encoding = "US-ASCII"
|
|
157
157
|
str.eol = "LF"
|
|
158
158
|
expected = ["foo\n", "\n", "bar\n"]
|
|
159
|
-
assert_equal(expected, str.
|
|
159
|
+
assert_equal(expected, str.to_lines)
|
|
160
160
|
end
|
|
161
161
|
|
|
162
|
-
def
|
|
162
|
+
def test_crlf_to_lines
|
|
163
163
|
str = "foo\r\nbar\r\n".extend(CharString)
|
|
164
164
|
str.encoding = "US-ASCII"
|
|
165
165
|
str.eol = "CRLF"
|
|
166
166
|
expected = ["foo\r\n", "bar\r\n"]
|
|
167
|
-
assert_equal(expected, str.
|
|
167
|
+
assert_equal(expected, str.to_lines)
|
|
168
168
|
end
|
|
169
169
|
|
|
170
|
-
def
|
|
170
|
+
def test_crlf_to_lines_chomped_lastline
|
|
171
171
|
str = "foo\r\nbar".extend(CharString)
|
|
172
172
|
str.encoding = "US-ASCII"
|
|
173
173
|
str.eol = "CRLF"
|
|
174
174
|
expected = ["foo\r\n", "bar"]
|
|
175
|
-
assert_equal(expected, str.
|
|
175
|
+
assert_equal(expected, str.to_lines)
|
|
176
176
|
end
|
|
177
177
|
|
|
178
|
-
def
|
|
178
|
+
def test_crlf_to_lines_empty_line
|
|
179
179
|
str = "foo\r\n\r\nbar\r\n".extend(CharString)
|
|
180
180
|
str.encoding = "US-ASCII"
|
|
181
181
|
str.eol = "CRLF"
|
|
182
182
|
expected = ["foo\r\n", "\r\n", "bar\r\n"]
|
|
183
|
-
assert_equal(expected, str.
|
|
183
|
+
assert_equal(expected, str.to_lines)
|
|
184
184
|
end
|
|
185
185
|
|
|
186
186
|
# test ASCII module
|
|
187
|
-
def
|
|
187
|
+
def test_ascii_to_words
|
|
188
188
|
str = "foo bar".extend(CharString)
|
|
189
189
|
str.encoding = "US-ASCII"
|
|
190
190
|
expected = ["foo ", "bar"]
|
|
191
|
-
assert_equal(expected, str.
|
|
191
|
+
assert_equal(expected, str.to_words)
|
|
192
192
|
end
|
|
193
193
|
|
|
194
|
-
def
|
|
194
|
+
def test_ascii_to_words_withsymbol
|
|
195
195
|
str = "foo (bar) baz-baz".extend(CharString)
|
|
196
196
|
str.encoding = "US-ASCII"
|
|
197
197
|
expected = ["foo ", "(bar) ", "baz-baz"]
|
|
198
|
-
assert_equal(expected, str.
|
|
198
|
+
assert_equal(expected, str.to_words)
|
|
199
199
|
end
|
|
200
200
|
|
|
201
|
-
def
|
|
201
|
+
def test_ascii_to_words_withquote
|
|
202
202
|
str = "foo's 'foo' \"bar\" 'baz.'".extend(CharString)
|
|
203
203
|
str.encoding = "US-ASCII"
|
|
204
204
|
expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
|
|
205
|
-
assert_equal(expected, str.
|
|
205
|
+
assert_equal(expected, str.to_words)
|
|
206
206
|
end
|
|
207
207
|
|
|
208
|
-
def
|
|
208
|
+
def test_ascii_to_words_withlongspace
|
|
209
209
|
str = " foo bar".extend(CharString)
|
|
210
210
|
str.encoding = "US-ASCII"
|
|
211
211
|
expected = [" ", "foo ", " ", "bar"]
|
|
212
|
-
assert_equal(expected, str.
|
|
212
|
+
assert_equal(expected, str.to_words)
|
|
213
213
|
end
|
|
214
214
|
|
|
215
|
-
def
|
|
215
|
+
def test_ascii_to_words_withdash
|
|
216
216
|
str = "foo -- bar, baz - quux".extend(CharString)
|
|
217
217
|
str.encoding = "US-ASCII"
|
|
218
218
|
expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
|
|
219
|
-
assert_equal(expected, str.
|
|
219
|
+
assert_equal(expected, str.to_words)
|
|
220
220
|
end
|
|
221
221
|
|
|
222
|
-
def
|
|
222
|
+
def test_ascii_to_chars
|
|
223
223
|
str = "foo bar".extend(CharString)
|
|
224
224
|
str.encoding = "US-ASCII"
|
|
225
225
|
str.eol = "LF"
|
|
226
226
|
expected = ["f", "o", "o", " ", "b", "a", "r"]
|
|
227
|
-
assert_equal(expected, str.
|
|
227
|
+
assert_equal(expected, str.to_chars)
|
|
228
228
|
end
|
|
229
229
|
|
|
230
|
-
def
|
|
230
|
+
def test_ascii_to_chars_with_eol_cr
|
|
231
231
|
str = "foo bar\r".extend(CharString)
|
|
232
232
|
str.encoding = "US-ASCII"
|
|
233
233
|
str.eol = "CR"
|
|
234
234
|
expected = ["f", "o", "o", " ", "b", "a", "r", "\r"]
|
|
235
|
-
assert_equal(expected, str.
|
|
235
|
+
assert_equal(expected, str.to_chars)
|
|
236
236
|
end
|
|
237
237
|
|
|
238
|
-
def
|
|
238
|
+
def test_ascii_to_chars_with_eol_lf
|
|
239
239
|
str = "foo bar\n".extend(CharString)
|
|
240
240
|
str.encoding = "US-ASCII"
|
|
241
241
|
str.eol = "LF"
|
|
242
242
|
expected = ["f", "o", "o", " ", "b", "a", "r", "\n"]
|
|
243
|
-
assert_equal(expected, str.
|
|
243
|
+
assert_equal(expected, str.to_chars)
|
|
244
244
|
end
|
|
245
245
|
|
|
246
|
-
def
|
|
246
|
+
def test_ascii_to_chars_with_eol_crlf
|
|
247
247
|
str = "foo bar\r\n".extend(CharString)
|
|
248
248
|
str.encoding = "US-ASCII"
|
|
249
249
|
str.eol = "CRLF"
|
|
250
250
|
expected = ["f", "o", "o", " ", "b", "a", "r", "\r\n"]
|
|
251
|
-
assert_equal(expected, str.
|
|
251
|
+
assert_equal(expected, str.to_chars)
|
|
252
252
|
end
|
|
253
253
|
|
|
254
|
-
def
|
|
254
|
+
def test_ascii_to_bytes
|
|
255
255
|
str = "foo bar\r\n".extend(CharString)
|
|
256
256
|
str.encoding = "US-ASCII"
|
|
257
257
|
str.eol = "CRLF"
|
|
258
258
|
expected = ["f", "o", "o", " ", "b", "a", "r", "\r", "\n"]
|
|
259
|
-
assert_equal(expected, str.
|
|
259
|
+
assert_equal(expected, str.to_bytes)
|
|
260
260
|
end
|
|
261
261
|
|
|
262
|
-
def
|
|
262
|
+
def test_ascii_count_bytes
|
|
263
263
|
str = "foo bar\r\n".extend(CharString)
|
|
264
264
|
str.encoding = "US-ASCII"
|
|
265
265
|
str.eol = "CRLF"
|
|
266
266
|
expected = 9
|
|
267
|
-
assert_equal(expected, str.
|
|
267
|
+
assert_equal(expected, str.count_bytes)
|
|
268
268
|
end
|
|
269
269
|
|
|
270
|
-
def
|
|
270
|
+
def test_ascii_count_chars
|
|
271
271
|
str = "foo bar\r\nbaz quux\r\n".extend(CharString)
|
|
272
272
|
str.encoding = "US-ASCII"
|
|
273
273
|
str.eol = "CRLF"
|
|
274
274
|
expected = 17
|
|
275
|
-
assert_equal(expected, str.
|
|
275
|
+
assert_equal(expected, str.count_chars)
|
|
276
276
|
end
|
|
277
277
|
|
|
278
|
-
def
|
|
278
|
+
def test_ascii_count_latin_graph_chars
|
|
279
279
|
str = "foo bar\r\nbaz quux\r\n".extend(CharString)
|
|
280
280
|
str.encoding = "US-ASCII"
|
|
281
281
|
str.eol = "CRLF"
|
|
282
282
|
expected = 13
|
|
283
|
-
assert_equal(expected, str.
|
|
283
|
+
assert_equal(expected, str.count_latin_graph_chars)
|
|
284
284
|
end
|
|
285
285
|
|
|
286
|
-
def
|
|
286
|
+
def test_ascii_count_graph_chars
|
|
287
287
|
str = "foo bar\r\nbaz quux\r\n".extend(CharString)
|
|
288
288
|
str.encoding = "US-ASCII"
|
|
289
289
|
str.eol = "CRLF"
|
|
290
290
|
expected = 13
|
|
291
|
-
assert_equal(expected, str.
|
|
291
|
+
assert_equal(expected, str.count_graph_chars)
|
|
292
292
|
end
|
|
293
293
|
|
|
294
|
-
def
|
|
294
|
+
def test_ascii_count_latin_blank_chars
|
|
295
295
|
str = "foo bar\r\nbaz\tquux\r\n".extend(CharString)
|
|
296
296
|
str.encoding = "US-ASCII"
|
|
297
297
|
str.eol = "CRLF"
|
|
298
298
|
expected = 2
|
|
299
|
-
assert_equal(expected, str.
|
|
299
|
+
assert_equal(expected, str.count_latin_blank_chars)
|
|
300
300
|
end
|
|
301
301
|
|
|
302
|
-
def
|
|
302
|
+
def test_ascii_count_blank_chars
|
|
303
303
|
str = "foo bar\r\nbaz\tquux\r\n".extend(CharString)
|
|
304
304
|
str.encoding = "US-ASCII"
|
|
305
305
|
str.eol = "CRLF"
|
|
306
306
|
expected = 2
|
|
307
|
-
assert_equal(expected, str.
|
|
307
|
+
assert_equal(expected, str.count_blank_chars)
|
|
308
308
|
end
|
|
309
309
|
|
|
310
|
-
def
|
|
310
|
+
def test_ascii_count_words
|
|
311
311
|
str = "foo bar \r\nbaz quux\r\n".extend(CharString)
|
|
312
312
|
str.encoding = "US-ASCII"
|
|
313
313
|
str.eol = "CRLF"
|
|
314
314
|
expected = 6
|
|
315
|
-
assert_equal(expected, str.
|
|
315
|
+
assert_equal(expected, str.count_words)
|
|
316
316
|
end
|
|
317
317
|
|
|
318
|
-
def
|
|
318
|
+
def test_ascii_count_latin_words
|
|
319
319
|
str = "foo bar \r\nbaz quux\r\n".extend(CharString)
|
|
320
320
|
str.encoding = "US-ASCII"
|
|
321
321
|
str.eol = "CRLF"
|
|
322
322
|
expected = 5 # " " is also counted as a word
|
|
323
|
-
assert_equal(expected, str.
|
|
323
|
+
assert_equal(expected, str.count_latin_words)
|
|
324
324
|
end
|
|
325
325
|
|
|
326
|
-
def
|
|
326
|
+
def test_ascii_count_latin_valid_words
|
|
327
327
|
str = "1 foo \r\n%%% ()\r\n".extend(CharString)
|
|
328
328
|
str.encoding = "US-ASCII"
|
|
329
329
|
str.eol = "CRLF"
|
|
330
330
|
expected = 2
|
|
331
|
-
assert_equal(expected, str.
|
|
331
|
+
assert_equal(expected, str.count_latin_valid_words)
|
|
332
332
|
end
|
|
333
333
|
|
|
334
|
-
def
|
|
334
|
+
def test_ascii_count_lines
|
|
335
335
|
str = "foo\r\nbar".extend(CharString)
|
|
336
336
|
str.encoding = "US-ASCII"
|
|
337
337
|
str.eol = "CRLF"
|
|
338
338
|
expected = 2
|
|
339
|
-
assert_equal(expected, str.
|
|
339
|
+
assert_equal(expected, str.count_lines)
|
|
340
340
|
end
|
|
341
341
|
|
|
342
|
-
def
|
|
342
|
+
def test_ascii_count_graph_lines
|
|
343
343
|
str = "foo\r\n ".extend(CharString)
|
|
344
344
|
str.encoding = "US-ASCII"
|
|
345
345
|
str.eol = "CRLF"
|
|
346
346
|
expected = 1
|
|
347
|
-
assert_equal(expected, str.
|
|
347
|
+
assert_equal(expected, str.count_graph_lines)
|
|
348
348
|
end
|
|
349
349
|
|
|
350
|
-
def
|
|
350
|
+
def test_ascii_count_empty_lines
|
|
351
351
|
str = "foo\r\n \r\n\t\r\n\r\n".extend(CharString)
|
|
352
352
|
str.encoding = "US-ASCII"
|
|
353
353
|
str.eol = "CRLF"
|
|
354
354
|
expected = 1
|
|
355
|
-
assert_equal(expected, str.
|
|
355
|
+
assert_equal(expected, str.count_empty_lines)
|
|
356
356
|
end
|
|
357
357
|
|
|
358
|
-
def
|
|
358
|
+
def test_ascii_count_blank_lines
|
|
359
359
|
str = "\r\n \r\n\t\r\n ".extend(CharString)
|
|
360
360
|
str.encoding = "US-ASCII"
|
|
361
361
|
str.eol = "CRLF"
|
|
362
362
|
expected = 3
|
|
363
|
-
assert_equal(expected, str.
|
|
363
|
+
assert_equal(expected, str.count_blank_lines)
|
|
364
364
|
end
|
|
365
365
|
|
|
366
366
|
# test EUCJP module
|
|
367
|
-
def
|
|
367
|
+
def test_eucjp_to_words
|
|
368
368
|
str = NKF.nkf("--euc", "日本語の文字foo bar").extend(CharString)
|
|
369
369
|
str.encoding = "EUC-JP"
|
|
370
370
|
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--euc", c) }
|
|
371
|
-
assert_equal(expected, str.
|
|
371
|
+
assert_equal(expected, str.to_words)
|
|
372
372
|
end
|
|
373
373
|
|
|
374
|
-
def
|
|
374
|
+
def test_eucjp_to_words_kanhira
|
|
375
375
|
str = NKF.nkf("--euc", "日本語の文字").extend(CharString)
|
|
376
376
|
str.encoding = "EUC-JP"
|
|
377
377
|
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
378
|
-
assert_equal(expected, str.
|
|
378
|
+
assert_equal(expected, str.to_words)
|
|
379
379
|
end
|
|
380
380
|
|
|
381
|
-
def
|
|
381
|
+
def test_eucjp_to_words_katahira
|
|
382
382
|
str = NKF.nkf("--euc", "カタカナの文字").extend(CharString)
|
|
383
383
|
str.encoding = "EUC-JP"
|
|
384
384
|
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
385
|
-
assert_equal(expected, str.
|
|
385
|
+
assert_equal(expected, str.to_words)
|
|
386
386
|
end
|
|
387
387
|
|
|
388
|
-
def
|
|
388
|
+
def test_eucjp_to_words_kataonbiki
|
|
389
389
|
str = NKF.nkf("--euc", "ルビー色の石").extend(CharString)
|
|
390
390
|
expected = ["ルビー", "色の", "石"].map { |c| NKF.nkf("--euc", c) }
|
|
391
|
-
assert_equal(expected, str.
|
|
391
|
+
assert_equal(expected, str.to_words)
|
|
392
392
|
end
|
|
393
393
|
|
|
394
|
-
def
|
|
394
|
+
def test_eucjp_to_words_hiraonbiki
|
|
395
395
|
str = NKF.nkf("--euc", "わールビーだ").extend(CharString)
|
|
396
396
|
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--euc", c) }
|
|
397
|
-
assert_equal(expected, str.
|
|
397
|
+
assert_equal(expected, str.to_words)
|
|
398
398
|
end
|
|
399
399
|
|
|
400
|
-
def
|
|
400
|
+
def test_eucjp_to_words_latinmix
|
|
401
401
|
str = NKF.nkf("--euc", "日本語とLatinの文字").extend(CharString)
|
|
402
402
|
str.encoding = "EUC-JP"
|
|
403
403
|
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
404
|
-
assert_equal(expected, str.
|
|
404
|
+
assert_equal(expected, str.to_words)
|
|
405
405
|
end
|
|
406
406
|
|
|
407
|
-
def
|
|
407
|
+
def test_eucjp_to_chars
|
|
408
408
|
str = NKF.nkf("--euc", "日本語a b").extend(CharString)
|
|
409
409
|
str.encoding = "EUC-JP"
|
|
410
410
|
expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--euc", c) }
|
|
411
|
-
assert_equal(expected, str.
|
|
411
|
+
assert_equal(expected, str.to_chars)
|
|
412
412
|
end
|
|
413
413
|
|
|
414
|
-
def
|
|
414
|
+
def test_eucjp_to_chars_with_cr
|
|
415
415
|
str = NKF.nkf("--euc", "日本語a b\r").extend(CharString)
|
|
416
416
|
str.encoding = "EUC-JP"
|
|
417
417
|
str.eol = "CR"
|
|
418
418
|
expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--euc", c) }
|
|
419
|
-
assert_equal(expected, str.
|
|
419
|
+
assert_equal(expected, str.to_chars)
|
|
420
420
|
end
|
|
421
421
|
|
|
422
|
-
def
|
|
422
|
+
def test_eucjp_to_chars_with_lf
|
|
423
423
|
str = NKF.nkf("--euc", "日本語a b\n").extend(CharString)
|
|
424
424
|
str.encoding = "EUC-JP"
|
|
425
425
|
str.eol = "LF"
|
|
426
426
|
expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--euc", c) }
|
|
427
|
-
assert_equal(expected, str.
|
|
427
|
+
assert_equal(expected, str.to_chars)
|
|
428
428
|
end
|
|
429
429
|
|
|
430
|
-
def
|
|
430
|
+
def test_eucjp_to_chars_with_crlf
|
|
431
431
|
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
432
432
|
str.encoding = "EUC-JP"
|
|
433
433
|
str.eol = "CRLF"
|
|
434
434
|
expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--euc", c) }
|
|
435
|
-
assert_equal(expected, str.
|
|
435
|
+
assert_equal(expected, str.to_chars)
|
|
436
436
|
end
|
|
437
437
|
|
|
438
|
-
def
|
|
438
|
+
def test_eucjp_count_chars
|
|
439
439
|
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
440
440
|
str.encoding = "EUC-JP"
|
|
441
441
|
str.eol = "CRLF"
|
|
442
442
|
expected = 7
|
|
443
|
-
assert_equal(expected, str.
|
|
443
|
+
assert_equal(expected, str.count_chars)
|
|
444
444
|
end
|
|
445
445
|
|
|
446
|
-
def
|
|
446
|
+
def test_eucjp_count_latin_graph_chars
|
|
447
447
|
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
448
448
|
str.encoding = "EUC-JP"
|
|
449
449
|
str.eol = "CRLF"
|
|
450
450
|
expected = 2
|
|
451
|
-
assert_equal(expected, str.
|
|
451
|
+
assert_equal(expected, str.count_latin_graph_chars)
|
|
452
452
|
end
|
|
453
453
|
|
|
454
|
-
def
|
|
454
|
+
def test_eucjp_count_ja_graph_chars
|
|
455
455
|
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
456
456
|
str.encoding = "EUC-JP"
|
|
457
457
|
str.eol = "CRLF"
|
|
458
458
|
expected = 3
|
|
459
|
-
assert_equal(expected, str.
|
|
459
|
+
assert_equal(expected, str.count_ja_graph_chars)
|
|
460
460
|
end
|
|
461
461
|
|
|
462
|
-
def
|
|
462
|
+
def test_eucjp_count_graph_chars
|
|
463
463
|
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
464
464
|
str.encoding = "EUC-JP"
|
|
465
465
|
str.eol = "CRLF"
|
|
466
466
|
expected = 5
|
|
467
|
-
assert_equal(expected, str.
|
|
467
|
+
assert_equal(expected, str.count_graph_chars)
|
|
468
468
|
end
|
|
469
469
|
|
|
470
|
-
def
|
|
470
|
+
def test_eucjp_count_latin_blank_chars
|
|
471
471
|
str = NKF.nkf("--euc", "日本語\ta b\r\n").extend(CharString)
|
|
472
472
|
str.encoding = "EUC-JP"
|
|
473
473
|
str.eol = "CRLF"
|
|
474
474
|
expected = 2
|
|
475
|
-
assert_equal(expected, str.
|
|
475
|
+
assert_equal(expected, str.count_latin_blank_chars)
|
|
476
476
|
end
|
|
477
477
|
|
|
478
|
-
def
|
|
478
|
+
def test_eucjp_count_ja_blank_chars
|
|
479
479
|
str = NKF.nkf("--euc", "日本 語\ta b\r\n").extend(CharString)
|
|
480
480
|
str.encoding = "EUC-JP"
|
|
481
481
|
str.eol = "CRLF"
|
|
482
482
|
expected = 1
|
|
483
|
-
assert_equal(expected, str.
|
|
483
|
+
assert_equal(expected, str.count_ja_blank_chars)
|
|
484
484
|
end
|
|
485
485
|
|
|
486
|
-
def
|
|
486
|
+
def test_eucjp_count_blank_chars
|
|
487
487
|
str = NKF.nkf("--euc", "日本 語\ta b\r\n").extend(CharString)
|
|
488
488
|
str.encoding = "EUC-JP"
|
|
489
489
|
str.eol = "CRLF"
|
|
490
490
|
expected = 3
|
|
491
|
-
assert_equal(expected, str.
|
|
491
|
+
assert_equal(expected, str.count_blank_chars)
|
|
492
492
|
end
|
|
493
493
|
|
|
494
|
-
def
|
|
494
|
+
def test_eucjp_count_words
|
|
495
495
|
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
496
496
|
str.encoding = "EUC-JP"
|
|
497
497
|
str.eol = "CRLF"
|
|
498
498
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
499
|
-
assert_equal(expected, str.
|
|
499
|
+
assert_equal(expected, str.count_words)
|
|
500
500
|
end
|
|
501
501
|
|
|
502
|
-
def
|
|
502
|
+
def test_eucjp_count_ja_words
|
|
503
503
|
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
504
504
|
str.encoding = "EUC-JP"
|
|
505
505
|
str.eol = "CRLF"
|
|
506
506
|
expected = 3
|
|
507
|
-
assert_equal(expected, str.
|
|
507
|
+
assert_equal(expected, str.count_ja_words)
|
|
508
508
|
end
|
|
509
509
|
|
|
510
|
-
def
|
|
510
|
+
def test_eucjp_count_latin_valid_words
|
|
511
511
|
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
512
512
|
str.encoding = "EUC-JP"
|
|
513
513
|
str.eol = "CRLF"
|
|
514
514
|
expected = 2
|
|
515
|
-
assert_equal(expected, str.
|
|
515
|
+
assert_equal(expected, str.count_latin_valid_words)
|
|
516
516
|
end
|
|
517
517
|
|
|
518
|
-
def
|
|
518
|
+
def test_eucjp_count_ja_valid_words
|
|
519
519
|
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
520
520
|
str.encoding = "EUC-JP"
|
|
521
521
|
str.eol = "CRLF"
|
|
522
522
|
expected = 2
|
|
523
|
-
assert_equal(expected, str.
|
|
523
|
+
assert_equal(expected, str.count_ja_valid_words)
|
|
524
524
|
end
|
|
525
525
|
|
|
526
|
-
def
|
|
526
|
+
def test_eucjp_count_valid_words
|
|
527
527
|
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
528
528
|
str.encoding = "EUC-JP"
|
|
529
529
|
str.eol = "CRLF"
|
|
530
530
|
expected = 4
|
|
531
|
-
assert_equal(expected, str.
|
|
531
|
+
assert_equal(expected, str.count_valid_words)
|
|
532
532
|
end
|
|
533
533
|
|
|
534
|
-
def
|
|
534
|
+
def test_eucjp_count_lines
|
|
535
535
|
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
536
536
|
str.encoding = "EUC-JP"
|
|
537
537
|
str.eol = "CRLF"
|
|
538
538
|
expected = 6
|
|
539
|
-
assert_equal(expected, str.
|
|
539
|
+
assert_equal(expected, str.count_lines)
|
|
540
540
|
end
|
|
541
541
|
|
|
542
|
-
def
|
|
542
|
+
def test_eucjp_count_graph_lines
|
|
543
543
|
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
544
544
|
str.encoding = "EUC-JP"
|
|
545
545
|
str.eol = "CRLF"
|
|
546
546
|
expected = 3
|
|
547
|
-
assert_equal(expected, str.
|
|
547
|
+
assert_equal(expected, str.count_graph_lines)
|
|
548
548
|
end
|
|
549
549
|
|
|
550
|
-
def
|
|
550
|
+
def test_eucjp_count_empty_lines
|
|
551
551
|
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
552
552
|
str.encoding = "EUC-JP"
|
|
553
553
|
str.eol = "CRLF"
|
|
554
554
|
expected = 1
|
|
555
|
-
assert_equal(expected, str.
|
|
555
|
+
assert_equal(expected, str.count_empty_lines)
|
|
556
556
|
end
|
|
557
557
|
|
|
558
|
-
def
|
|
558
|
+
def test_eucjp_count_blank_lines
|
|
559
559
|
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
560
560
|
str.encoding = "EUC-JP"
|
|
561
561
|
str.eol = "CRLF"
|
|
562
562
|
expected = 2
|
|
563
|
-
assert_equal(expected, str.
|
|
563
|
+
assert_equal(expected, str.count_blank_lines)
|
|
564
564
|
end
|
|
565
565
|
|
|
566
566
|
# test SJIS module
|
|
567
|
-
def
|
|
567
|
+
def test_sjis_to_words
|
|
568
568
|
str = NKF.nkf("--sjis", "日本語の文字foo bar").extend(CharString)
|
|
569
569
|
str.encoding = "Shift_JIS"
|
|
570
570
|
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--sjis", c) }
|
|
571
|
-
assert_equal(expected, str.
|
|
571
|
+
assert_equal(expected, str.to_words)
|
|
572
572
|
end
|
|
573
573
|
|
|
574
|
-
def
|
|
574
|
+
def test_sjis_to_words_kanhira
|
|
575
575
|
str = NKF.nkf("--sjis", "日本語の文字").extend(CharString)
|
|
576
576
|
str.encoding = "Shift_JIS"
|
|
577
577
|
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
578
|
-
assert_equal(expected, str.
|
|
578
|
+
assert_equal(expected, str.to_words)
|
|
579
579
|
end
|
|
580
580
|
|
|
581
|
-
def
|
|
581
|
+
def test_sjis_to_words_katahira
|
|
582
582
|
str = NKF.nkf("--sjis", "カタカナの文字").extend(CharString)
|
|
583
583
|
str.encoding = "Shift_JIS"
|
|
584
584
|
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
585
|
-
assert_equal(expected, str.
|
|
585
|
+
assert_equal(expected, str.to_words)
|
|
586
586
|
end
|
|
587
587
|
|
|
588
|
-
def
|
|
588
|
+
def test_sjis_to_words_kataonbiki
|
|
589
589
|
str = NKF.nkf("--sjis", "ルビーの指輪").extend(CharString)
|
|
590
590
|
str.encoding = "Shift_JIS"
|
|
591
591
|
expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--sjis", c) }
|
|
592
|
-
assert_equal(expected, str.
|
|
592
|
+
assert_equal(expected, str.to_words)
|
|
593
593
|
end
|
|
594
594
|
|
|
595
|
-
def
|
|
595
|
+
def test_sjis_to_words_hiraonbiki
|
|
596
596
|
str = NKF.nkf("--sjis", "わールビーだ").extend(CharString)
|
|
597
597
|
str.encoding = "Shift_JIS"
|
|
598
598
|
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--sjis", c) }
|
|
599
|
-
assert_equal(expected, str.
|
|
599
|
+
assert_equal(expected, str.to_words)
|
|
600
600
|
end
|
|
601
601
|
|
|
602
|
-
def
|
|
602
|
+
def test_sjis_to_words_latinmix
|
|
603
603
|
str = NKF.nkf("--sjis", "日本語とLatinの文字").extend(CharString)
|
|
604
604
|
str.encoding = "Shift_JIS"
|
|
605
605
|
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
606
|
-
assert_equal(expected, str.
|
|
606
|
+
assert_equal(expected, str.to_words)
|
|
607
607
|
end
|
|
608
608
|
|
|
609
|
-
def
|
|
609
|
+
def test_sjis_to_chars
|
|
610
610
|
str = NKF.nkf("--sjis", "表計算a b").extend(CharString)
|
|
611
611
|
str.encoding = "Shift_JIS"
|
|
612
612
|
expected = ["表", "計", "算", "a", " ", "b"].map { |c| NKF.nkf("--sjis", c) }
|
|
613
|
-
assert_equal(expected, str.
|
|
613
|
+
assert_equal(expected, str.to_chars)
|
|
614
614
|
end
|
|
615
615
|
|
|
616
|
-
def
|
|
616
|
+
def test_sjis_to_chars_with_cr
|
|
617
617
|
str = NKF.nkf("--sjis", "表計算a b\r").extend(CharString)
|
|
618
618
|
str.encoding = "Shift_JIS"
|
|
619
619
|
str.eol = "CR"
|
|
620
620
|
expected = ["表", "計", "算", "a", " ", "b", "\r"].map { |c| NKF.nkf("--sjis", c) }
|
|
621
|
-
assert_equal(expected, str.
|
|
621
|
+
assert_equal(expected, str.to_chars)
|
|
622
622
|
end
|
|
623
623
|
|
|
624
|
-
def
|
|
624
|
+
def test_sjis_to_chars_with_lf
|
|
625
625
|
str = NKF.nkf("--sjis", "表計算a b\n").extend(CharString)
|
|
626
626
|
str.encoding = "Shift_JIS"
|
|
627
627
|
str.eol = "LF"
|
|
628
628
|
expected = ["表", "計", "算", "a", " ", "b", "\n"].map { |c| NKF.nkf("--sjis", c) }
|
|
629
|
-
assert_equal(expected, str.
|
|
629
|
+
assert_equal(expected, str.to_chars)
|
|
630
630
|
end
|
|
631
631
|
|
|
632
|
-
def
|
|
632
|
+
def test_sjis_to_chars_with_crlf
|
|
633
633
|
str = NKF.nkf("--sjis", "表計算a b\r\n").extend(CharString)
|
|
634
634
|
str.encoding = "Shift_JIS"
|
|
635
635
|
str.eol = "CRLF"
|
|
636
636
|
expected = ["表", "計", "算", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--sjis", c) }
|
|
637
|
-
assert_equal(expected, str.
|
|
637
|
+
assert_equal(expected, str.to_chars)
|
|
638
638
|
end
|
|
639
639
|
|
|
640
|
-
def
|
|
640
|
+
def test_sjis_count_chars
|
|
641
641
|
str = NKF.nkf("--sjis", "日本語a b\r\n").extend(CharString)
|
|
642
642
|
str.encoding = "Shift_JIS"
|
|
643
643
|
str.eol = "CRLF"
|
|
644
644
|
expected = 7
|
|
645
|
-
assert_equal(expected, str.
|
|
645
|
+
assert_equal(expected, str.count_chars)
|
|
646
646
|
end
|
|
647
647
|
|
|
648
|
-
def
|
|
648
|
+
def test_sjis_count_latin_graph_chars
|
|
649
649
|
str = NKF.nkf("--sjis", "日本語a b\r\n").extend(CharString)
|
|
650
650
|
str.encoding = "Shift_JIS"
|
|
651
651
|
str.eol = "CRLF"
|
|
652
652
|
expected = 2
|
|
653
|
-
assert_equal(expected, str.
|
|
653
|
+
assert_equal(expected, str.count_latin_graph_chars)
|
|
654
654
|
end
|
|
655
655
|
|
|
656
|
-
def
|
|
656
|
+
def test_sjis_count_ja_graph_chars
|
|
657
657
|
str = NKF.nkf("--sjis", "日本語a b\r\n").extend(CharString)
|
|
658
658
|
str.encoding = "Shift_JIS"
|
|
659
659
|
str.eol = "CRLF"
|
|
660
660
|
expected = 3
|
|
661
|
-
assert_equal(expected, str.
|
|
661
|
+
assert_equal(expected, str.count_ja_graph_chars)
|
|
662
662
|
end
|
|
663
663
|
|
|
664
|
-
def
|
|
664
|
+
def test_sjis_count_graph_chars
|
|
665
665
|
str = NKF.nkf("--sjis", "日本語a b\r\n").extend(CharString)
|
|
666
666
|
str.encoding = "Shift_JIS"
|
|
667
667
|
str.eol = "CRLF"
|
|
668
668
|
expected = 5
|
|
669
|
-
assert_equal(expected, str.
|
|
669
|
+
assert_equal(expected, str.count_graph_chars)
|
|
670
670
|
end
|
|
671
671
|
|
|
672
|
-
def
|
|
672
|
+
def test_sjis_count_latin_blank_chars
|
|
673
673
|
str = NKF.nkf("--sjis", "日本語\ta b\r\n").extend(CharString)
|
|
674
674
|
str.encoding = "Shift_JIS"
|
|
675
675
|
str.eol = "CRLF"
|
|
676
676
|
expected = 2
|
|
677
|
-
assert_equal(expected, str.
|
|
677
|
+
assert_equal(expected, str.count_latin_blank_chars)
|
|
678
678
|
end
|
|
679
679
|
|
|
680
|
-
def
|
|
680
|
+
def test_sjis_count_ja_blank_chars
|
|
681
681
|
str = NKF.nkf("--sjis", "日本 語\ta b\r\n").extend(CharString)
|
|
682
682
|
str.encoding = "Shift_JIS"
|
|
683
683
|
str.eol = "CRLF"
|
|
684
684
|
expected = 1
|
|
685
|
-
assert_equal(expected, str.
|
|
685
|
+
assert_equal(expected, str.count_ja_blank_chars)
|
|
686
686
|
end
|
|
687
687
|
|
|
688
|
-
def
|
|
688
|
+
def test_sjis_count_blank_chars
|
|
689
689
|
str = NKF.nkf("--sjis", "日本 語\ta b\r\n").extend(CharString)
|
|
690
690
|
str.encoding = "Shift_JIS"
|
|
691
691
|
str.eol = "CRLF"
|
|
692
692
|
expected = 3
|
|
693
|
-
assert_equal(expected, str.
|
|
693
|
+
assert_equal(expected, str.count_blank_chars)
|
|
694
694
|
end
|
|
695
695
|
|
|
696
|
-
def
|
|
696
|
+
def test_sjis_count_words
|
|
697
697
|
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
698
698
|
str.encoding = "Shift_JIS"
|
|
699
699
|
str.eol = "CRLF"
|
|
700
700
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
701
|
-
assert_equal(expected, str.
|
|
701
|
+
assert_equal(expected, str.count_words)
|
|
702
702
|
end
|
|
703
703
|
|
|
704
|
-
def
|
|
704
|
+
def test_sjis_count_ja_words
|
|
705
705
|
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
706
706
|
str.encoding = "Shift_JIS"
|
|
707
707
|
str.eol = "CRLF"
|
|
708
708
|
expected = 3
|
|
709
|
-
assert_equal(expected, str.
|
|
709
|
+
assert_equal(expected, str.count_ja_words)
|
|
710
710
|
end
|
|
711
711
|
|
|
712
|
-
def
|
|
712
|
+
def test_sjis_count_latin_valid_words
|
|
713
713
|
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
714
714
|
str.encoding = "Shift_JIS"
|
|
715
715
|
str.eol = "CRLF"
|
|
716
716
|
expected = 2
|
|
717
|
-
assert_equal(expected, str.
|
|
717
|
+
assert_equal(expected, str.count_latin_valid_words)
|
|
718
718
|
end
|
|
719
719
|
|
|
720
|
-
def
|
|
720
|
+
def test_sjis_count_ja_valid_words
|
|
721
721
|
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
722
722
|
str.encoding = "Shift_JIS"
|
|
723
723
|
str.eol = "CRLF"
|
|
724
724
|
expected = 2
|
|
725
|
-
assert_equal(expected, str.
|
|
725
|
+
assert_equal(expected, str.count_ja_valid_words)
|
|
726
726
|
end
|
|
727
727
|
|
|
728
|
-
def
|
|
728
|
+
def test_sjis_count_valid_words
|
|
729
729
|
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
730
730
|
str.encoding = "Shift_JIS"
|
|
731
731
|
str.eol = "CRLF"
|
|
732
732
|
expected = 4
|
|
733
|
-
assert_equal(expected, str.
|
|
733
|
+
assert_equal(expected, str.count_valid_words)
|
|
734
734
|
end
|
|
735
735
|
|
|
736
|
-
def
|
|
736
|
+
def test_sjis_count_lines
|
|
737
737
|
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
738
738
|
str.encoding = "Shift_JIS"
|
|
739
739
|
str.eol = "CRLF"
|
|
740
740
|
expected = 6
|
|
741
|
-
assert_equal(expected, str.
|
|
741
|
+
assert_equal(expected, str.count_lines)
|
|
742
742
|
end
|
|
743
743
|
|
|
744
|
-
def
|
|
744
|
+
def test_sjis_count_graph_lines
|
|
745
745
|
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
746
746
|
str.encoding = "Shift_JIS"
|
|
747
747
|
str.eol = "CRLF"
|
|
748
748
|
expected = 3
|
|
749
|
-
assert_equal(expected, str.
|
|
749
|
+
assert_equal(expected, str.count_graph_lines)
|
|
750
750
|
end
|
|
751
751
|
|
|
752
|
-
def
|
|
752
|
+
def test_sjis_count_empty_lines
|
|
753
753
|
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
754
754
|
str.encoding = "Shift_JIS"
|
|
755
755
|
str.eol = "CRLF"
|
|
756
756
|
expected = 1
|
|
757
|
-
assert_equal(expected, str.
|
|
757
|
+
assert_equal(expected, str.count_empty_lines)
|
|
758
758
|
end
|
|
759
759
|
|
|
760
|
-
def
|
|
760
|
+
def test_sjis_count_blank_lines
|
|
761
761
|
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
762
762
|
str.encoding = "Shift_JIS"
|
|
763
763
|
str.eol = "CRLF"
|
|
764
764
|
expected = 2
|
|
765
|
-
assert_equal(expected, str.
|
|
765
|
+
assert_equal(expected, str.count_blank_lines)
|
|
766
766
|
end
|
|
767
767
|
|
|
768
768
|
# test UTF8 module
|
|
769
|
-
def
|
|
769
|
+
def test_utf8_to_words
|
|
770
770
|
str = NKF.nkf("--utf8", "日本語の文字foo bar").extend(CharString)
|
|
771
771
|
str.encoding = "UTF-8"
|
|
772
772
|
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--utf8", c) }
|
|
773
|
-
assert_equal(expected, str.
|
|
773
|
+
assert_equal(expected, str.to_words)
|
|
774
774
|
end
|
|
775
775
|
|
|
776
|
-
def
|
|
776
|
+
def test_utf8_to_words_kanhira
|
|
777
777
|
str = NKF.nkf("--utf8", "日本語の文字").extend(CharString)
|
|
778
778
|
str.encoding = "UTF-8"
|
|
779
779
|
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
780
|
-
assert_equal(expected, str.
|
|
780
|
+
assert_equal(expected, str.to_words)
|
|
781
781
|
end
|
|
782
782
|
|
|
783
|
-
def
|
|
783
|
+
def test_utf8_to_words_katahira
|
|
784
784
|
str = NKF.nkf("--utf8", "カタカナの文字").extend(CharString)
|
|
785
785
|
str.encoding = "UTF-8"
|
|
786
786
|
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
787
|
-
assert_equal(expected, str.
|
|
787
|
+
assert_equal(expected, str.to_words)
|
|
788
788
|
end
|
|
789
789
|
|
|
790
|
-
def
|
|
790
|
+
def test_utf8_to_words_kataonbiki
|
|
791
791
|
str = NKF.nkf("--utf8", "ルビーの指輪").extend(CharString)
|
|
792
792
|
str.encoding = "UTF-8"
|
|
793
793
|
expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--utf8", c) }
|
|
794
|
-
assert_equal(expected, str.
|
|
794
|
+
assert_equal(expected, str.to_words)
|
|
795
795
|
end
|
|
796
796
|
|
|
797
|
-
def
|
|
797
|
+
def test_utf8_to_words_hiraonbiki
|
|
798
798
|
str = NKF.nkf("--utf8", "わールビーだ").extend(CharString)
|
|
799
799
|
str.encoding = "UTF-8"
|
|
800
800
|
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--utf8", c) }
|
|
801
|
-
assert_equal(expected, str.
|
|
801
|
+
assert_equal(expected, str.to_words)
|
|
802
802
|
end
|
|
803
803
|
|
|
804
|
-
def
|
|
804
|
+
def test_utf8_to_words_latinmix
|
|
805
805
|
str = NKF.nkf("--utf8", "日本語とLatinの文字").extend(CharString)
|
|
806
806
|
str.encoding = "UTF-8"
|
|
807
807
|
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
808
|
-
assert_equal(expected, str.
|
|
808
|
+
assert_equal(expected, str.to_words)
|
|
809
809
|
end
|
|
810
810
|
|
|
811
|
-
def
|
|
811
|
+
def test_utf8_to_chars
|
|
812
812
|
str = NKF.nkf("--utf8", "日本語a b").extend(CharString)
|
|
813
813
|
expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--utf8", c) }
|
|
814
|
-
assert_equal(expected, str.
|
|
814
|
+
assert_equal(expected, str.to_chars)
|
|
815
815
|
end
|
|
816
816
|
|
|
817
|
-
def
|
|
817
|
+
def test_utf8_to_chars_with_cr
|
|
818
818
|
str = NKF.nkf("--utf8", "日本語a b\r").extend(CharString)
|
|
819
819
|
str.eol = "CR"
|
|
820
820
|
expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--utf8", c) }
|
|
821
|
-
assert_equal(expected, str.
|
|
821
|
+
assert_equal(expected, str.to_chars)
|
|
822
822
|
end
|
|
823
823
|
|
|
824
|
-
def
|
|
824
|
+
def test_utf8_to_chars_with_lf
|
|
825
825
|
str = NKF.nkf("--utf8", "日本語a b\n").extend(CharString)
|
|
826
826
|
str.eol = "LF"
|
|
827
827
|
expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--utf8", c) }
|
|
828
|
-
assert_equal(expected, str.
|
|
828
|
+
assert_equal(expected, str.to_chars)
|
|
829
829
|
end
|
|
830
830
|
|
|
831
|
-
def
|
|
831
|
+
def test_utf8_to_chars_with_crlf
|
|
832
832
|
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
833
833
|
str.eol = "CRLF"
|
|
834
834
|
expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--utf8", c) }
|
|
835
|
-
assert_equal(expected, str.
|
|
835
|
+
assert_equal(expected, str.to_chars)
|
|
836
836
|
end
|
|
837
837
|
|
|
838
|
-
def
|
|
838
|
+
def test_utf8_count_chars
|
|
839
839
|
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
840
840
|
str.eol = "CRLF"
|
|
841
841
|
expected = 7
|
|
842
|
-
assert_equal(expected, str.
|
|
842
|
+
assert_equal(expected, str.count_chars)
|
|
843
843
|
end
|
|
844
844
|
|
|
845
|
-
def
|
|
845
|
+
def test_utf8_count_latin_graph_chars
|
|
846
846
|
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
847
847
|
str.eol = "CRLF"
|
|
848
848
|
expected = 2
|
|
849
|
-
assert_equal(expected, str.
|
|
849
|
+
assert_equal(expected, str.count_latin_graph_chars)
|
|
850
850
|
end
|
|
851
851
|
|
|
852
|
-
def
|
|
852
|
+
def test_utf8_count_ja_graph_chars
|
|
853
853
|
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
854
854
|
str.eol = "CRLF"
|
|
855
855
|
expected = 3
|
|
856
|
-
assert_equal(expected, str.
|
|
856
|
+
assert_equal(expected, str.count_ja_graph_chars)
|
|
857
857
|
end
|
|
858
858
|
|
|
859
|
-
def
|
|
859
|
+
def test_utf8_count_graph_chars
|
|
860
860
|
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
861
861
|
str.eol = "CRLF"
|
|
862
862
|
expected = 5
|
|
863
|
-
assert_equal(expected, str.
|
|
863
|
+
assert_equal(expected, str.count_graph_chars)
|
|
864
864
|
end
|
|
865
865
|
|
|
866
|
-
def
|
|
866
|
+
def test_utf8_count_latin_blank_chars
|
|
867
867
|
str = NKF.nkf("--utf8", "日本語\ta b\r\n").extend(CharString)
|
|
868
868
|
str.encoding = "UTF-8"
|
|
869
869
|
str.eol = "CRLF"
|
|
870
870
|
expected = 2
|
|
871
|
-
assert_equal(expected, str.
|
|
871
|
+
assert_equal(expected, str.count_latin_blank_chars)
|
|
872
872
|
end
|
|
873
873
|
|
|
874
|
-
def
|
|
874
|
+
def test_utf8_count_ja_blank_chars
|
|
875
875
|
str = NKF.nkf("--utf8", "日本 語\ta b\r\n").extend(CharString)
|
|
876
876
|
str.encoding = "UTF-8"
|
|
877
877
|
str.eol = "CRLF"
|
|
878
878
|
expected = 1
|
|
879
|
-
assert_equal(expected, str.
|
|
879
|
+
assert_equal(expected, str.count_ja_blank_chars)
|
|
880
880
|
end
|
|
881
881
|
|
|
882
|
-
def
|
|
882
|
+
def test_utf8_count_blank_chars
|
|
883
883
|
str = NKF.nkf("--utf8", "日本 語\ta b\r\n").extend(CharString)
|
|
884
884
|
str.encoding = "UTF-8"
|
|
885
885
|
str.eol = "CRLF"
|
|
886
886
|
expected = 3
|
|
887
|
-
assert_equal(expected, str.
|
|
887
|
+
assert_equal(expected, str.count_blank_chars)
|
|
888
888
|
end
|
|
889
889
|
|
|
890
|
-
def
|
|
890
|
+
def test_utf8_count_words
|
|
891
891
|
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
892
892
|
str.encoding = "UTF-8"
|
|
893
893
|
str.eol = "CRLF"
|
|
894
894
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
895
|
-
assert_equal(expected, str.
|
|
895
|
+
assert_equal(expected, str.count_words)
|
|
896
896
|
end
|
|
897
897
|
|
|
898
|
-
def
|
|
898
|
+
def test_utf8_count_ja_words
|
|
899
899
|
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
900
900
|
str.encoding = "UTF-8"
|
|
901
901
|
str.eol = "CRLF"
|
|
902
902
|
expected = 3
|
|
903
|
-
assert_equal(expected, str.
|
|
903
|
+
assert_equal(expected, str.count_ja_words)
|
|
904
904
|
end
|
|
905
905
|
|
|
906
|
-
def
|
|
906
|
+
def test_utf8_count_latin_valid_words
|
|
907
907
|
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
908
908
|
str.encoding = "UTF-8"
|
|
909
909
|
str.eol = "CRLF"
|
|
910
910
|
expected = 2
|
|
911
|
-
assert_equal(expected, str.
|
|
911
|
+
assert_equal(expected, str.count_latin_valid_words)
|
|
912
912
|
end
|
|
913
913
|
|
|
914
|
-
def
|
|
914
|
+
def test_utf8_count_ja_valid_words
|
|
915
915
|
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
916
916
|
str.encoding = "UTF-8"
|
|
917
917
|
str.eol = "CRLF"
|
|
918
918
|
expected = 2
|
|
919
|
-
assert_equal(expected, str.
|
|
919
|
+
assert_equal(expected, str.count_ja_valid_words)
|
|
920
920
|
end
|
|
921
921
|
|
|
922
|
-
def
|
|
922
|
+
def test_utf8_count_valid_words
|
|
923
923
|
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
924
924
|
str.encoding = "UTF-8"
|
|
925
925
|
str.eol = "CRLF"
|
|
926
926
|
expected = 4
|
|
927
|
-
assert_equal(expected, str.
|
|
927
|
+
assert_equal(expected, str.count_valid_words)
|
|
928
928
|
end
|
|
929
929
|
|
|
930
|
-
def
|
|
930
|
+
def test_utf8_count_lines
|
|
931
931
|
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
932
932
|
str.encoding = "UTF-8"
|
|
933
933
|
str.eol = "CRLF"
|
|
934
934
|
expected = 6
|
|
935
|
-
assert_equal(expected, str.
|
|
935
|
+
assert_equal(expected, str.count_lines)
|
|
936
936
|
end
|
|
937
937
|
|
|
938
|
-
def
|
|
938
|
+
def test_utf8_count_graph_lines
|
|
939
939
|
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
940
940
|
str.encoding = "UTF-8"
|
|
941
941
|
str.eol = "CRLF"
|
|
942
942
|
expected = 3
|
|
943
|
-
assert_equal(expected, str.
|
|
943
|
+
assert_equal(expected, str.count_graph_lines)
|
|
944
944
|
end
|
|
945
945
|
|
|
946
|
-
def
|
|
946
|
+
def test_utf8_count_empty_lines
|
|
947
947
|
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
948
948
|
str.encoding = "UTF-8"
|
|
949
949
|
str.eol = "CRLF"
|
|
950
950
|
expected = 1
|
|
951
|
-
assert_equal(expected, str.
|
|
951
|
+
assert_equal(expected, str.count_empty_lines)
|
|
952
952
|
end
|
|
953
953
|
|
|
954
|
-
def
|
|
954
|
+
def test_utf8_count_blank_lines
|
|
955
955
|
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
956
956
|
str.encoding = "UTF-8"
|
|
957
957
|
str.eol = "CRLF"
|
|
958
958
|
expected = 2
|
|
959
|
-
assert_equal(expected, str.
|
|
959
|
+
assert_equal(expected, str.count_blank_lines)
|
|
960
960
|
end
|
|
961
961
|
|
|
962
962
|
# test module functions
|