docdiff 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -7
- data/Guardfile +4 -4
- data/Makefile +1 -1
- data/Rakefile +6 -6
- data/bin/docdiff +1 -1
- data/devutil/Rakefile +12 -5
- data/devutil/char_by_charclass.rb +43 -20
- data/devutil/charclass_by_char.rb +40 -19
- data/devutil/jis0208.rb +263 -231
- data/devutil/jis0208_test.rb +196 -0
- data/doc/news.md +8 -0
- data/docdiff.gemspec +12 -10
- data/lib/doc_diff.rb +59 -60
- data/lib/docdiff/charstring.rb +225 -241
- data/lib/docdiff/cli.rb +285 -250
- data/lib/docdiff/diff/contours.rb +1 -1
- data/lib/docdiff/diff/editscript.rb +1 -1
- data/lib/docdiff/diff/rcsdiff.rb +1 -1
- data/lib/docdiff/diff/shortestpath.rb +1 -1
- data/lib/docdiff/diff/speculative.rb +1 -1
- data/lib/docdiff/diff/subsequence.rb +1 -1
- data/lib/docdiff/diff/unidiff.rb +1 -1
- data/lib/docdiff/diff.rb +1 -1
- data/lib/docdiff/difference.rb +71 -70
- data/lib/docdiff/document.rb +129 -109
- data/lib/docdiff/encoding/en_ascii.rb +64 -58
- data/lib/docdiff/encoding/ja_eucjp.rb +250 -235
- data/lib/docdiff/encoding/ja_sjis.rb +240 -226
- data/lib/docdiff/encoding/ja_utf8.rb +6952 -6939
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +522 -438
- data/lib/docdiff.rb +2 -2
- data/test/charstring_test.rb +475 -351
- data/test/cli_test.rb +103 -101
- data/test/diff_test.rb +15 -16
- data/test/difference_test.rb +40 -31
- data/test/docdiff_test.rb +162 -136
- data/test/document_test.rb +280 -175
- data/test/test_helper.rb +2 -1
- data/test/view_test.rb +636 -497
- metadata +8 -8
- data/devutil/testjis0208.rb +0 -38
data/test/charstring_test.rb
CHANGED
|
@@ -3,162 +3,180 @@
|
|
|
3
3
|
|
|
4
4
|
# frozen_string_literal: false
|
|
5
5
|
|
|
6
|
-
require
|
|
7
|
-
require
|
|
8
|
-
require
|
|
6
|
+
require "test/unit"
|
|
7
|
+
require "docdiff/charstring"
|
|
8
|
+
require "nkf"
|
|
9
9
|
|
|
10
|
-
class
|
|
10
|
+
class TestCharString < Test::Unit::TestCase
|
|
11
11
|
CharString = DocDiff::CharString
|
|
12
12
|
|
|
13
|
-
def setup
|
|
14
|
-
#
|
|
13
|
+
def setup
|
|
15
14
|
end
|
|
16
15
|
|
|
17
16
|
# test encoding module registration
|
|
18
|
-
def test_encoding_ascii
|
|
19
|
-
str = "foo".extend
|
|
17
|
+
def test_encoding_ascii
|
|
18
|
+
str = "foo".extend(CharString)
|
|
20
19
|
str.encoding = "US-ASCII"
|
|
21
20
|
expected = CharString::ASCII
|
|
22
21
|
assert_equal(expected, CharString::Encodings[str.encoding])
|
|
23
22
|
end
|
|
24
|
-
|
|
25
|
-
|
|
23
|
+
|
|
24
|
+
def test_encoding_eucjp
|
|
25
|
+
str = "foo".extend(CharString)
|
|
26
26
|
str.encoding = "EUC-JP"
|
|
27
|
-
expected = CharString::
|
|
27
|
+
expected = CharString::EUCJP
|
|
28
28
|
assert_equal(expected, CharString::Encodings[str.encoding])
|
|
29
29
|
end
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
|
|
31
|
+
def test_encoding_sjis
|
|
32
|
+
str = "foo".extend(CharString)
|
|
32
33
|
str.encoding = "Shift_JIS"
|
|
33
|
-
expected = CharString::
|
|
34
|
+
expected = CharString::ShiftJIS
|
|
34
35
|
assert_equal(expected, CharString::Encodings[str.encoding])
|
|
35
36
|
end
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
|
|
38
|
+
def test_encoding_utf8
|
|
39
|
+
str = "foo".extend(CharString)
|
|
38
40
|
str.encoding = "UTF-8"
|
|
39
41
|
expected = CharString::UTF8
|
|
40
42
|
assert_equal(expected, CharString::Encodings[str.encoding])
|
|
41
43
|
end
|
|
42
44
|
|
|
43
45
|
# test eol module registration
|
|
44
|
-
def test_eol_cr
|
|
45
|
-
str = "foo".extend
|
|
46
|
+
def test_eol_cr
|
|
47
|
+
str = "foo".extend(CharString)
|
|
46
48
|
str.eol = "CR"
|
|
47
49
|
expected = CharString::CR
|
|
48
50
|
assert_equal(expected, CharString::EOLChars[str.eol])
|
|
49
51
|
end
|
|
50
|
-
|
|
51
|
-
|
|
52
|
+
|
|
53
|
+
def test_eol_lf
|
|
54
|
+
str = "foo".extend(CharString)
|
|
52
55
|
str.eol = "LF"
|
|
53
56
|
expected = CharString::LF
|
|
54
57
|
assert_equal(expected, CharString::EOLChars[str.eol])
|
|
55
58
|
end
|
|
56
|
-
|
|
57
|
-
|
|
59
|
+
|
|
60
|
+
def test_eol_crlf
|
|
61
|
+
str = "foo".extend(CharString)
|
|
58
62
|
str.eol = "CRLF"
|
|
59
63
|
expected = CharString::CRLF
|
|
60
64
|
assert_equal(expected, CharString::EOLChars[str.eol])
|
|
61
65
|
end
|
|
62
66
|
|
|
63
67
|
# test eol eol_char method
|
|
64
|
-
def test_eol_char_cr
|
|
65
|
-
str = "foo\rbar\r".extend
|
|
68
|
+
def test_eol_char_cr
|
|
69
|
+
str = "foo\rbar\r".extend(CharString)
|
|
66
70
|
str.eol = "CR"
|
|
67
71
|
expected = "\r"
|
|
68
72
|
assert_equal(expected, str.eol_char)
|
|
69
73
|
end
|
|
70
|
-
|
|
71
|
-
|
|
74
|
+
|
|
75
|
+
def test_eol_char_lf
|
|
76
|
+
str = "foo\nbar\n".extend(CharString)
|
|
72
77
|
str.eol = "LF"
|
|
73
78
|
expected = "\n"
|
|
74
79
|
assert_equal(expected, str.eol_char)
|
|
75
80
|
end
|
|
76
|
-
|
|
77
|
-
|
|
81
|
+
|
|
82
|
+
def test_eol_char_crlf
|
|
83
|
+
str = "foo\r\nbar\r\n".extend(CharString)
|
|
78
84
|
str.eol = "CRLF"
|
|
79
85
|
expected = "\r\n"
|
|
80
86
|
assert_equal(expected, str.eol_char)
|
|
81
87
|
end
|
|
82
|
-
|
|
83
|
-
|
|
88
|
+
|
|
89
|
+
def test_eol_char_none
|
|
90
|
+
str = "foobar".extend(CharString)
|
|
84
91
|
expected = nil
|
|
85
92
|
assert_equal(expected, str.eol_char)
|
|
86
93
|
end
|
|
87
|
-
|
|
88
|
-
|
|
94
|
+
|
|
95
|
+
def test_eol_char_none_for_0length_string
|
|
96
|
+
str = "".extend(CharString)
|
|
89
97
|
expected = nil
|
|
90
98
|
assert_equal(expected, str.eol_char)
|
|
91
99
|
end
|
|
92
|
-
|
|
93
|
-
|
|
100
|
+
|
|
101
|
+
def test_eol_char_none_eucjp
|
|
102
|
+
str = NKF.nkf("--euc", "日本語a b").extend(CharString)
|
|
94
103
|
expected = nil
|
|
95
104
|
assert_equal(expected, str.eol_char)
|
|
96
105
|
end
|
|
97
|
-
|
|
98
|
-
|
|
106
|
+
|
|
107
|
+
def test_eol_char_none_sjis
|
|
108
|
+
str = NKF.nkf("--sjis", "日本語a b").extend(CharString)
|
|
99
109
|
expected = nil
|
|
100
110
|
assert_equal(expected, str.eol_char)
|
|
101
111
|
end
|
|
102
112
|
|
|
103
113
|
# test eol split_to_line() method
|
|
104
|
-
def test_cr_split_to_line
|
|
105
|
-
str = "foo\rbar\r".extend
|
|
106
|
-
encoding
|
|
107
|
-
str.
|
|
114
|
+
def test_cr_split_to_line
|
|
115
|
+
str = "foo\rbar\r".extend(CharString)
|
|
116
|
+
str.encoding = "US-ASCII"
|
|
117
|
+
str.eol = "CR"
|
|
108
118
|
expected = ["foo\r", "bar\r"]
|
|
109
119
|
assert_equal(expected, str.split_to_line)
|
|
110
120
|
end
|
|
111
|
-
|
|
112
|
-
|
|
121
|
+
|
|
122
|
+
def test_cr_split_to_line_chomped_lastline
|
|
123
|
+
str = "foo\rbar".extend(CharString)
|
|
113
124
|
str.encoding = "US-ASCII"
|
|
114
125
|
str.eol = "CR"
|
|
115
126
|
expected = ["foo\r", "bar"]
|
|
116
127
|
assert_equal(expected, str.split_to_line)
|
|
117
128
|
end
|
|
118
|
-
|
|
119
|
-
|
|
129
|
+
|
|
130
|
+
def test_cr_split_to_line_empty_line
|
|
131
|
+
str = "foo\r\rbar\r".extend(CharString)
|
|
120
132
|
str.encoding = "US-ASCII"
|
|
121
133
|
str.eol = "CR"
|
|
122
134
|
expected = ["foo\r", "\r", "bar\r"]
|
|
123
135
|
assert_equal(expected, str.split_to_line)
|
|
124
136
|
end
|
|
125
|
-
|
|
126
|
-
|
|
137
|
+
|
|
138
|
+
def test_lf_split_to_line
|
|
139
|
+
str = "foo\nbar\n".extend(CharString)
|
|
127
140
|
str.encoding = "US-ASCII"
|
|
128
141
|
str.eol = "LF"
|
|
129
142
|
expected = ["foo\n", "bar\n"]
|
|
130
143
|
assert_equal(expected, str.split_to_line)
|
|
131
144
|
end
|
|
132
|
-
|
|
133
|
-
|
|
145
|
+
|
|
146
|
+
def test_lf_split_to_line_chomped_lastline
|
|
147
|
+
str = "foo\nbar".extend(CharString)
|
|
134
148
|
str.encoding = "US-ASCII"
|
|
135
149
|
str.eol = "LF"
|
|
136
150
|
expected = ["foo\n", "bar"]
|
|
137
151
|
assert_equal(expected, str.split_to_line)
|
|
138
152
|
end
|
|
139
|
-
|
|
140
|
-
|
|
153
|
+
|
|
154
|
+
def test_lf_split_to_line_empty_line
|
|
155
|
+
str = "foo\n\nbar\n".extend(CharString)
|
|
141
156
|
str.encoding = "US-ASCII"
|
|
142
157
|
str.eol = "LF"
|
|
143
158
|
expected = ["foo\n", "\n", "bar\n"]
|
|
144
159
|
assert_equal(expected, str.split_to_line)
|
|
145
160
|
end
|
|
146
|
-
|
|
147
|
-
|
|
161
|
+
|
|
162
|
+
def test_crlf_split_to_line
|
|
163
|
+
str = "foo\r\nbar\r\n".extend(CharString)
|
|
148
164
|
str.encoding = "US-ASCII"
|
|
149
165
|
str.eol = "CRLF"
|
|
150
166
|
expected = ["foo\r\n", "bar\r\n"]
|
|
151
167
|
assert_equal(expected, str.split_to_line)
|
|
152
168
|
end
|
|
153
|
-
|
|
154
|
-
|
|
169
|
+
|
|
170
|
+
def test_crlf_split_to_line_chomped_lastline
|
|
171
|
+
str = "foo\r\nbar".extend(CharString)
|
|
155
172
|
str.encoding = "US-ASCII"
|
|
156
173
|
str.eol = "CRLF"
|
|
157
174
|
expected = ["foo\r\n", "bar"]
|
|
158
175
|
assert_equal(expected, str.split_to_line)
|
|
159
176
|
end
|
|
160
|
-
|
|
161
|
-
|
|
177
|
+
|
|
178
|
+
def test_crlf_split_to_line_empty_line
|
|
179
|
+
str = "foo\r\n\r\nbar\r\n".extend(CharString)
|
|
162
180
|
str.encoding = "US-ASCII"
|
|
163
181
|
str.eol = "CRLF"
|
|
164
182
|
expected = ["foo\r\n", "\r\n", "bar\r\n"]
|
|
@@ -166,157 +184,179 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
166
184
|
end
|
|
167
185
|
|
|
168
186
|
# test ASCII module
|
|
169
|
-
def test_ascii_split_to_word
|
|
170
|
-
str = "foo bar".extend
|
|
187
|
+
def test_ascii_split_to_word
|
|
188
|
+
str = "foo bar".extend(CharString)
|
|
171
189
|
str.encoding = "US-ASCII"
|
|
172
190
|
expected = ["foo ", "bar"]
|
|
173
191
|
assert_equal(expected, str.split_to_word)
|
|
174
192
|
end
|
|
175
|
-
|
|
176
|
-
|
|
193
|
+
|
|
194
|
+
def test_ascii_split_to_word_withsymbol
|
|
195
|
+
str = "foo (bar) baz-baz".extend(CharString)
|
|
177
196
|
str.encoding = "US-ASCII"
|
|
178
197
|
expected = ["foo ", "(bar) ", "baz-baz"]
|
|
179
198
|
assert_equal(expected, str.split_to_word)
|
|
180
199
|
end
|
|
181
|
-
|
|
182
|
-
|
|
200
|
+
|
|
201
|
+
def test_ascii_split_to_word_withquote
|
|
202
|
+
str = "foo's 'foo' \"bar\" 'baz.'".extend(CharString)
|
|
183
203
|
str.encoding = "US-ASCII"
|
|
184
204
|
expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
|
|
185
205
|
assert_equal(expected, str.split_to_word)
|
|
186
206
|
end
|
|
187
|
-
|
|
188
|
-
|
|
207
|
+
|
|
208
|
+
def test_ascii_split_to_word_withlongspace
|
|
209
|
+
str = " foo bar".extend(CharString)
|
|
189
210
|
str.encoding = "US-ASCII"
|
|
190
211
|
expected = [" ", "foo ", " ", "bar"]
|
|
191
212
|
assert_equal(expected, str.split_to_word)
|
|
192
213
|
end
|
|
193
|
-
|
|
194
|
-
|
|
214
|
+
|
|
215
|
+
def test_ascii_split_to_word_withdash
|
|
216
|
+
str = "foo -- bar, baz - quux".extend(CharString)
|
|
195
217
|
str.encoding = "US-ASCII"
|
|
196
218
|
expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
|
|
197
219
|
assert_equal(expected, str.split_to_word)
|
|
198
220
|
end
|
|
199
|
-
|
|
200
|
-
|
|
221
|
+
|
|
222
|
+
def test_ascii_split_to_char
|
|
223
|
+
str = "foo bar".extend(CharString)
|
|
201
224
|
str.encoding = "US-ASCII"
|
|
202
225
|
str.eol = "LF"
|
|
203
|
-
expected = ["f","o","o"," ","b","a","r"]
|
|
226
|
+
expected = ["f", "o", "o", " ", "b", "a", "r"]
|
|
204
227
|
assert_equal(expected, str.split_to_char)
|
|
205
228
|
end
|
|
206
|
-
|
|
207
|
-
|
|
229
|
+
|
|
230
|
+
def test_ascii_split_to_char_with_eol_cr
|
|
231
|
+
str = "foo bar\r".extend(CharString)
|
|
208
232
|
str.encoding = "US-ASCII"
|
|
209
233
|
str.eol = "CR"
|
|
210
|
-
expected = ["f","o","o"," ","b","a","r","\r"]
|
|
234
|
+
expected = ["f", "o", "o", " ", "b", "a", "r", "\r"]
|
|
211
235
|
assert_equal(expected, str.split_to_char)
|
|
212
236
|
end
|
|
213
|
-
|
|
214
|
-
|
|
237
|
+
|
|
238
|
+
def test_ascii_split_to_char_with_eol_lf
|
|
239
|
+
str = "foo bar\n".extend(CharString)
|
|
215
240
|
str.encoding = "US-ASCII"
|
|
216
241
|
str.eol = "LF"
|
|
217
|
-
expected = ["f","o","o"," ","b","a","r","\n"]
|
|
242
|
+
expected = ["f", "o", "o", " ", "b", "a", "r", "\n"]
|
|
218
243
|
assert_equal(expected, str.split_to_char)
|
|
219
244
|
end
|
|
220
|
-
|
|
221
|
-
|
|
245
|
+
|
|
246
|
+
def test_ascii_split_to_char_with_eol_crlf
|
|
247
|
+
str = "foo bar\r\n".extend(CharString)
|
|
222
248
|
str.encoding = "US-ASCII"
|
|
223
249
|
str.eol = "CRLF"
|
|
224
|
-
expected = ["f","o","o"," ","b","a","r","\r\n"]
|
|
250
|
+
expected = ["f", "o", "o", " ", "b", "a", "r", "\r\n"]
|
|
225
251
|
assert_equal(expected, str.split_to_char)
|
|
226
252
|
end
|
|
227
|
-
|
|
228
|
-
|
|
253
|
+
|
|
254
|
+
def test_ascii_split_to_byte
|
|
255
|
+
str = "foo bar\r\n".extend(CharString)
|
|
229
256
|
str.encoding = "US-ASCII"
|
|
230
257
|
str.eol = "CRLF"
|
|
231
|
-
expected = ["f","o","o"," ","b","a","r","\r","\n"]
|
|
258
|
+
expected = ["f", "o", "o", " ", "b", "a", "r", "\r", "\n"]
|
|
232
259
|
assert_equal(expected, str.split_to_byte)
|
|
233
260
|
end
|
|
234
|
-
|
|
235
|
-
|
|
261
|
+
|
|
262
|
+
def test_ascii_count_byte
|
|
263
|
+
str = "foo bar\r\n".extend(CharString)
|
|
236
264
|
str.encoding = "US-ASCII"
|
|
237
265
|
str.eol = "CRLF"
|
|
238
266
|
expected = 9
|
|
239
267
|
assert_equal(expected, str.count_byte)
|
|
240
268
|
end
|
|
241
|
-
|
|
242
|
-
|
|
269
|
+
|
|
270
|
+
def test_ascii_count_char
|
|
271
|
+
str = "foo bar\r\nbaz quux\r\n".extend(CharString)
|
|
243
272
|
str.encoding = "US-ASCII"
|
|
244
273
|
str.eol = "CRLF"
|
|
245
274
|
expected = 17
|
|
246
275
|
assert_equal(expected, str.count_char)
|
|
247
276
|
end
|
|
248
|
-
|
|
249
|
-
|
|
277
|
+
|
|
278
|
+
def test_ascii_count_latin_graph_char
|
|
279
|
+
str = "foo bar\r\nbaz quux\r\n".extend(CharString)
|
|
250
280
|
str.encoding = "US-ASCII"
|
|
251
281
|
str.eol = "CRLF"
|
|
252
282
|
expected = 13
|
|
253
283
|
assert_equal(expected, str.count_latin_graph_char)
|
|
254
284
|
end
|
|
255
|
-
|
|
256
|
-
|
|
285
|
+
|
|
286
|
+
def test_ascii_count_graph_char
|
|
287
|
+
str = "foo bar\r\nbaz quux\r\n".extend(CharString)
|
|
257
288
|
str.encoding = "US-ASCII"
|
|
258
289
|
str.eol = "CRLF"
|
|
259
290
|
expected = 13
|
|
260
291
|
assert_equal(expected, str.count_graph_char)
|
|
261
292
|
end
|
|
262
|
-
|
|
263
|
-
|
|
293
|
+
|
|
294
|
+
def test_ascii_count_latin_blank_char
|
|
295
|
+
str = "foo bar\r\nbaz\tquux\r\n".extend(CharString)
|
|
264
296
|
str.encoding = "US-ASCII"
|
|
265
297
|
str.eol = "CRLF"
|
|
266
298
|
expected = 2
|
|
267
299
|
assert_equal(expected, str.count_latin_blank_char)
|
|
268
300
|
end
|
|
269
|
-
|
|
270
|
-
|
|
301
|
+
|
|
302
|
+
def test_ascii_count_blank_char
|
|
303
|
+
str = "foo bar\r\nbaz\tquux\r\n".extend(CharString)
|
|
271
304
|
str.encoding = "US-ASCII"
|
|
272
305
|
str.eol = "CRLF"
|
|
273
306
|
expected = 2
|
|
274
307
|
assert_equal(expected, str.count_blank_char)
|
|
275
308
|
end
|
|
276
|
-
|
|
277
|
-
|
|
309
|
+
|
|
310
|
+
def test_ascii_count_word
|
|
311
|
+
str = "foo bar \r\nbaz quux\r\n".extend(CharString)
|
|
278
312
|
str.encoding = "US-ASCII"
|
|
279
313
|
str.eol = "CRLF"
|
|
280
314
|
expected = 6
|
|
281
315
|
assert_equal(expected, str.count_word)
|
|
282
316
|
end
|
|
283
|
-
|
|
284
|
-
|
|
317
|
+
|
|
318
|
+
def test_ascii_count_latin_word
|
|
319
|
+
str = "foo bar \r\nbaz quux\r\n".extend(CharString)
|
|
285
320
|
str.encoding = "US-ASCII"
|
|
286
321
|
str.eol = "CRLF"
|
|
287
|
-
expected = 5
|
|
322
|
+
expected = 5 # " " is also counted as a word
|
|
288
323
|
assert_equal(expected, str.count_latin_word)
|
|
289
324
|
end
|
|
290
|
-
|
|
291
|
-
|
|
325
|
+
|
|
326
|
+
def test_ascii_count_latin_valid_word
|
|
327
|
+
str = "1 foo \r\n%%% ()\r\n".extend(CharString)
|
|
292
328
|
str.encoding = "US-ASCII"
|
|
293
329
|
str.eol = "CRLF"
|
|
294
330
|
expected = 2
|
|
295
331
|
assert_equal(expected, str.count_latin_valid_word)
|
|
296
332
|
end
|
|
297
|
-
|
|
298
|
-
|
|
333
|
+
|
|
334
|
+
def test_ascii_count_line
|
|
335
|
+
str = "foo\r\nbar".extend(CharString)
|
|
299
336
|
str.encoding = "US-ASCII"
|
|
300
337
|
str.eol = "CRLF"
|
|
301
338
|
expected = 2
|
|
302
339
|
assert_equal(expected, str.count_line)
|
|
303
340
|
end
|
|
304
|
-
|
|
305
|
-
|
|
341
|
+
|
|
342
|
+
def test_ascii_count_graph_line
|
|
343
|
+
str = "foo\r\n ".extend(CharString)
|
|
306
344
|
str.encoding = "US-ASCII"
|
|
307
345
|
str.eol = "CRLF"
|
|
308
346
|
expected = 1
|
|
309
347
|
assert_equal(expected, str.count_graph_line)
|
|
310
348
|
end
|
|
311
|
-
|
|
312
|
-
|
|
349
|
+
|
|
350
|
+
def test_ascii_count_empty_line
|
|
351
|
+
str = "foo\r\n \r\n\t\r\n\r\n".extend(CharString)
|
|
313
352
|
str.encoding = "US-ASCII"
|
|
314
353
|
str.eol = "CRLF"
|
|
315
354
|
expected = 1
|
|
316
355
|
assert_equal(expected, str.count_empty_line)
|
|
317
356
|
end
|
|
318
|
-
|
|
319
|
-
|
|
357
|
+
|
|
358
|
+
def test_ascii_count_blank_line
|
|
359
|
+
str = "\r\n \r\n\t\r\n ".extend(CharString)
|
|
320
360
|
str.encoding = "US-ASCII"
|
|
321
361
|
str.eol = "CRLF"
|
|
322
362
|
expected = 3
|
|
@@ -324,177 +364,199 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
324
364
|
end
|
|
325
365
|
|
|
326
366
|
# test EUCJP module
|
|
327
|
-
def test_eucjp_split_to_word
|
|
328
|
-
str = NKF.nkf("--euc", "日本語の文字foo bar").extend
|
|
367
|
+
def test_eucjp_split_to_word
|
|
368
|
+
str = NKF.nkf("--euc", "日本語の文字foo bar").extend(CharString)
|
|
329
369
|
str.encoding = "EUC-JP"
|
|
330
|
-
expected = ["日本語の","文字","foo ","bar"].map{|c| NKF.nkf("--euc", c)}
|
|
370
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--euc", c) }
|
|
331
371
|
assert_equal(expected, str.split_to_word)
|
|
332
372
|
end
|
|
333
|
-
|
|
334
|
-
|
|
373
|
+
|
|
374
|
+
def test_eucjp_split_to_word_kanhira
|
|
375
|
+
str = NKF.nkf("--euc", "日本語の文字").extend(CharString)
|
|
335
376
|
str.encoding = "EUC-JP"
|
|
336
|
-
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
377
|
+
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
337
378
|
assert_equal(expected, str.split_to_word)
|
|
338
379
|
end
|
|
339
|
-
|
|
340
|
-
|
|
380
|
+
|
|
381
|
+
def test_eucjp_split_to_word_katahira
|
|
382
|
+
str = NKF.nkf("--euc", "カタカナの文字").extend(CharString)
|
|
341
383
|
str.encoding = "EUC-JP"
|
|
342
|
-
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
384
|
+
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
343
385
|
assert_equal(expected, str.split_to_word)
|
|
344
386
|
end
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
str
|
|
348
|
-
expected = ["ルビー", "色の", "石"].map{|c| NKF.nkf("--euc", c)}
|
|
387
|
+
|
|
388
|
+
def test_eucjp_split_to_word_kataonbiki
|
|
389
|
+
str = NKF.nkf("--euc", "ルビー色の石").extend(CharString)
|
|
390
|
+
expected = ["ルビー", "色の", "石"].map { |c| NKF.nkf("--euc", c) }
|
|
349
391
|
assert_equal(expected, str.split_to_word)
|
|
350
392
|
end
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
str
|
|
354
|
-
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--euc", c)}
|
|
393
|
+
|
|
394
|
+
def test_eucjp_split_to_word_hiraonbiki
|
|
395
|
+
str = NKF.nkf("--euc", "わールビーだ").extend(CharString)
|
|
396
|
+
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--euc", c) }
|
|
355
397
|
assert_equal(expected, str.split_to_word)
|
|
356
398
|
end
|
|
357
|
-
|
|
358
|
-
|
|
399
|
+
|
|
400
|
+
def test_eucjp_split_to_word_latinmix
|
|
401
|
+
str = NKF.nkf("--euc", "日本語とLatinの文字").extend(CharString)
|
|
359
402
|
str.encoding = "EUC-JP"
|
|
360
|
-
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--euc", c)}
|
|
403
|
+
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--euc", c) }
|
|
361
404
|
assert_equal(expected, str.split_to_word)
|
|
362
405
|
end
|
|
363
|
-
|
|
364
|
-
|
|
406
|
+
|
|
407
|
+
def test_eucjp_split_to_char
|
|
408
|
+
str = NKF.nkf("--euc", "日本語a b").extend(CharString)
|
|
365
409
|
str.encoding = "EUC-JP"
|
|
366
|
-
|
|
367
|
-
expected = ["日","本","語","a"," ","b"].map{|c|NKF.nkf("--euc",c)}
|
|
410
|
+
expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--euc", c) }
|
|
368
411
|
assert_equal(expected, str.split_to_char)
|
|
369
412
|
end
|
|
370
|
-
|
|
371
|
-
|
|
413
|
+
|
|
414
|
+
def test_eucjp_split_to_char_with_cr
|
|
415
|
+
str = NKF.nkf("--euc", "日本語a b\r").extend(CharString)
|
|
372
416
|
str.encoding = "EUC-JP"
|
|
373
417
|
str.eol = "CR"
|
|
374
|
-
expected = ["日","本","語","a"," ","b","\r"].map{|c|NKF.nkf("--euc",c)}
|
|
418
|
+
expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--euc", c) }
|
|
375
419
|
assert_equal(expected, str.split_to_char)
|
|
376
420
|
end
|
|
377
|
-
|
|
378
|
-
|
|
421
|
+
|
|
422
|
+
def test_eucjp_split_to_char_with_lf
|
|
423
|
+
str = NKF.nkf("--euc", "日本語a b\n").extend(CharString)
|
|
379
424
|
str.encoding = "EUC-JP"
|
|
380
425
|
str.eol = "LF"
|
|
381
|
-
expected = ["日","本","語","a"," ","b","\n"].map{|c|NKF.nkf("--euc",c)}
|
|
426
|
+
expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--euc", c) }
|
|
382
427
|
assert_equal(expected, str.split_to_char)
|
|
383
428
|
end
|
|
384
|
-
|
|
385
|
-
|
|
429
|
+
|
|
430
|
+
def test_eucjp_split_to_char_with_crlf
|
|
431
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
386
432
|
str.encoding = "EUC-JP"
|
|
387
433
|
str.eol = "CRLF"
|
|
388
|
-
expected = ["日","本","語","a"," ","b","\r\n"].map{|c|NKF.nkf("--euc",c)}
|
|
434
|
+
expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--euc", c) }
|
|
389
435
|
assert_equal(expected, str.split_to_char)
|
|
390
436
|
end
|
|
391
|
-
|
|
392
|
-
|
|
437
|
+
|
|
438
|
+
def test_eucjp_count_char
|
|
439
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
393
440
|
str.encoding = "EUC-JP"
|
|
394
441
|
str.eol = "CRLF"
|
|
395
442
|
expected = 7
|
|
396
443
|
assert_equal(expected, str.count_char)
|
|
397
444
|
end
|
|
398
|
-
|
|
399
|
-
|
|
445
|
+
|
|
446
|
+
def test_eucjp_count_latin_graph_char
|
|
447
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
400
448
|
str.encoding = "EUC-JP"
|
|
401
449
|
str.eol = "CRLF"
|
|
402
450
|
expected = 2
|
|
403
451
|
assert_equal(expected, str.count_latin_graph_char)
|
|
404
452
|
end
|
|
405
|
-
|
|
406
|
-
|
|
453
|
+
|
|
454
|
+
def test_eucjp_count_ja_graph_char
|
|
455
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
407
456
|
str.encoding = "EUC-JP"
|
|
408
457
|
str.eol = "CRLF"
|
|
409
458
|
expected = 3
|
|
410
459
|
assert_equal(expected, str.count_ja_graph_char)
|
|
411
460
|
end
|
|
412
|
-
|
|
413
|
-
|
|
461
|
+
|
|
462
|
+
def test_eucjp_count_graph_char
|
|
463
|
+
str = NKF.nkf("--euc", "日本語a b\r\n").extend(CharString)
|
|
414
464
|
str.encoding = "EUC-JP"
|
|
415
465
|
str.eol = "CRLF"
|
|
416
466
|
expected = 5
|
|
417
467
|
assert_equal(expected, str.count_graph_char)
|
|
418
468
|
end
|
|
419
|
-
|
|
420
|
-
|
|
469
|
+
|
|
470
|
+
def test_eucjp_count_latin_blank_char
|
|
471
|
+
str = NKF.nkf("--euc", "日本語\ta b\r\n").extend(CharString)
|
|
421
472
|
str.encoding = "EUC-JP"
|
|
422
473
|
str.eol = "CRLF"
|
|
423
474
|
expected = 2
|
|
424
475
|
assert_equal(expected, str.count_latin_blank_char)
|
|
425
476
|
end
|
|
426
|
-
|
|
427
|
-
|
|
477
|
+
|
|
478
|
+
def test_eucjp_count_ja_blank_char
|
|
479
|
+
str = NKF.nkf("--euc", "日本 語\ta b\r\n").extend(CharString)
|
|
428
480
|
str.encoding = "EUC-JP"
|
|
429
481
|
str.eol = "CRLF"
|
|
430
482
|
expected = 1
|
|
431
483
|
assert_equal(expected, str.count_ja_blank_char)
|
|
432
484
|
end
|
|
433
|
-
|
|
434
|
-
|
|
485
|
+
|
|
486
|
+
def test_eucjp_count_blank_char
|
|
487
|
+
str = NKF.nkf("--euc", "日本 語\ta b\r\n").extend(CharString)
|
|
435
488
|
str.encoding = "EUC-JP"
|
|
436
489
|
str.eol = "CRLF"
|
|
437
490
|
expected = 3
|
|
438
491
|
assert_equal(expected, str.count_blank_char)
|
|
439
492
|
end
|
|
440
|
-
|
|
441
|
-
|
|
493
|
+
|
|
494
|
+
def test_eucjp_count_word
|
|
495
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
442
496
|
str.encoding = "EUC-JP"
|
|
443
497
|
str.eol = "CRLF"
|
|
444
498
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
445
499
|
assert_equal(expected, str.count_word)
|
|
446
500
|
end
|
|
447
|
-
|
|
448
|
-
|
|
501
|
+
|
|
502
|
+
def test_eucjp_count_ja_word
|
|
503
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
449
504
|
str.encoding = "EUC-JP"
|
|
450
505
|
str.eol = "CRLF"
|
|
451
506
|
expected = 3
|
|
452
507
|
assert_equal(expected, str.count_ja_word)
|
|
453
508
|
end
|
|
454
|
-
|
|
455
|
-
|
|
509
|
+
|
|
510
|
+
def test_eucjp_count_latin_valid_word
|
|
511
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
456
512
|
str.encoding = "EUC-JP"
|
|
457
513
|
str.eol = "CRLF"
|
|
458
514
|
expected = 2
|
|
459
515
|
assert_equal(expected, str.count_latin_valid_word)
|
|
460
516
|
end
|
|
461
|
-
|
|
462
|
-
|
|
517
|
+
|
|
518
|
+
def test_eucjp_count_ja_valid_word
|
|
519
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
463
520
|
str.encoding = "EUC-JP"
|
|
464
521
|
str.eol = "CRLF"
|
|
465
522
|
expected = 2
|
|
466
523
|
assert_equal(expected, str.count_ja_valid_word)
|
|
467
524
|
end
|
|
468
|
-
|
|
469
|
-
|
|
525
|
+
|
|
526
|
+
def test_eucjp_count_valid_word
|
|
527
|
+
str = NKF.nkf("--euc", "日本 語a b --\r\n").extend(CharString)
|
|
470
528
|
str.encoding = "EUC-JP"
|
|
471
529
|
str.eol = "CRLF"
|
|
472
530
|
expected = 4
|
|
473
531
|
assert_equal(expected, str.count_valid_word)
|
|
474
532
|
end
|
|
475
|
-
|
|
476
|
-
|
|
533
|
+
|
|
534
|
+
def test_eucjp_count_line
|
|
535
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
477
536
|
str.encoding = "EUC-JP"
|
|
478
537
|
str.eol = "CRLF"
|
|
479
538
|
expected = 6
|
|
480
539
|
assert_equal(expected, str.count_line)
|
|
481
540
|
end
|
|
482
|
-
|
|
483
|
-
|
|
541
|
+
|
|
542
|
+
def test_eucjp_count_graph_line
|
|
543
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
484
544
|
str.encoding = "EUC-JP"
|
|
485
545
|
str.eol = "CRLF"
|
|
486
546
|
expected = 3
|
|
487
547
|
assert_equal(expected, str.count_graph_line)
|
|
488
548
|
end
|
|
489
|
-
|
|
490
|
-
|
|
549
|
+
|
|
550
|
+
def test_eucjp_count_empty_line
|
|
551
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
491
552
|
str.encoding = "EUC-JP"
|
|
492
553
|
str.eol = "CRLF"
|
|
493
554
|
expected = 1
|
|
494
555
|
assert_equal(expected, str.count_empty_line)
|
|
495
556
|
end
|
|
496
|
-
|
|
497
|
-
|
|
557
|
+
|
|
558
|
+
def test_eucjp_count_blank_line
|
|
559
|
+
str = NKF.nkf("--euc", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
498
560
|
str.encoding = "EUC-JP"
|
|
499
561
|
str.eol = "CRLF"
|
|
500
562
|
expected = 2
|
|
@@ -502,177 +564,201 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
502
564
|
end
|
|
503
565
|
|
|
504
566
|
# test SJIS module
|
|
505
|
-
def test_sjis_split_to_word
|
|
506
|
-
str = NKF.nkf("--sjis", "日本語の文字foo bar").extend
|
|
567
|
+
def test_sjis_split_to_word
|
|
568
|
+
str = NKF.nkf("--sjis", "日本語の文字foo bar").extend(CharString)
|
|
507
569
|
str.encoding = "Shift_JIS"
|
|
508
|
-
expected = ["日本語の", "文字", "foo ", "bar"].map{|c|NKF.nkf("--sjis",c)}
|
|
570
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--sjis", c) }
|
|
509
571
|
assert_equal(expected, str.split_to_word)
|
|
510
572
|
end
|
|
511
|
-
|
|
512
|
-
|
|
573
|
+
|
|
574
|
+
def test_sjisplit_s_to_word_kanhira
|
|
575
|
+
str = NKF.nkf("--sjis", "日本語の文字").extend(CharString)
|
|
513
576
|
str.encoding = "Shift_JIS"
|
|
514
|
-
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
577
|
+
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
515
578
|
assert_equal(expected, str.split_to_word)
|
|
516
579
|
end
|
|
517
|
-
|
|
518
|
-
|
|
580
|
+
|
|
581
|
+
def test_sjis_split_to_word_katahira
|
|
582
|
+
str = NKF.nkf("--sjis", "カタカナの文字").extend(CharString)
|
|
519
583
|
str.encoding = "Shift_JIS"
|
|
520
|
-
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
584
|
+
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
521
585
|
assert_equal(expected, str.split_to_word)
|
|
522
586
|
end
|
|
523
|
-
|
|
524
|
-
|
|
587
|
+
|
|
588
|
+
def test_sjis_split_to_word_kataonbiki
|
|
589
|
+
str = NKF.nkf("--sjis", "ルビーの指輪").extend(CharString)
|
|
525
590
|
str.encoding = "Shift_JIS"
|
|
526
|
-
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--sjis", c)}
|
|
591
|
+
expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--sjis", c) }
|
|
527
592
|
assert_equal(expected, str.split_to_word)
|
|
528
593
|
end
|
|
529
|
-
|
|
530
|
-
|
|
594
|
+
|
|
595
|
+
def test_sjis_split_to_word_hiraonbiki
|
|
596
|
+
str = NKF.nkf("--sjis", "わールビーだ").extend(CharString)
|
|
531
597
|
str.encoding = "Shift_JIS"
|
|
532
|
-
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--sjis", c)}
|
|
598
|
+
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--sjis", c) }
|
|
533
599
|
assert_equal(expected, str.split_to_word)
|
|
534
600
|
end
|
|
535
|
-
|
|
536
|
-
|
|
601
|
+
|
|
602
|
+
def test_sjis_split_to_word_latinmix
|
|
603
|
+
str = NKF.nkf("--sjis", "日本語とLatinの文字").extend(CharString)
|
|
537
604
|
str.encoding = "Shift_JIS"
|
|
538
|
-
expected = ["日本語と","Latin","の","文字"].map{|c| NKF.nkf("--sjis", c)}
|
|
605
|
+
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--sjis", c) }
|
|
539
606
|
assert_equal(expected, str.split_to_word)
|
|
540
607
|
end
|
|
541
|
-
|
|
542
|
-
|
|
608
|
+
|
|
609
|
+
def test_sjis_split_to_char
|
|
610
|
+
str = NKF.nkf("--sjis", "表計算a b").extend(CharString)
|
|
543
611
|
str.encoding = "Shift_JIS"
|
|
544
|
-
|
|
545
|
-
expected = ["表","計","算","a"," ","b"].map{|c|NKF.nkf("--sjis",c)}
|
|
612
|
+
expected = ["表", "計", "算", "a", " ", "b"].map { |c| NKF.nkf("--sjis", c) }
|
|
546
613
|
assert_equal(expected, str.split_to_char)
|
|
547
614
|
end
|
|
548
|
-
|
|
549
|
-
|
|
615
|
+
|
|
616
|
+
def test_sjis_split_to_char_with_cr
|
|
617
|
+
str = NKF.nkf("--sjis", "表計算a b\r").extend(CharString)
|
|
550
618
|
str.encoding = "Shift_JIS"
|
|
551
619
|
str.eol = "CR"
|
|
552
|
-
expected = ["表","計","算","a"," ","b","\r"].map{|c|NKF.nkf("--sjis",c)}
|
|
620
|
+
expected = ["表", "計", "算", "a", " ", "b", "\r"].map { |c| NKF.nkf("--sjis", c) }
|
|
553
621
|
assert_equal(expected, str.split_to_char)
|
|
554
622
|
end
|
|
555
|
-
|
|
556
|
-
|
|
623
|
+
|
|
624
|
+
def test_sjis_split_to_char_with_lf
|
|
625
|
+
str = NKF.nkf("--sjis", "表計算a b\n").extend(CharString)
|
|
557
626
|
str.encoding = "Shift_JIS"
|
|
558
627
|
str.eol = "LF"
|
|
559
|
-
expected = ["表","計","算","a"," ","b","\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
628
|
+
expected = ["表", "計", "算", "a", " ", "b", "\n"].map { |c| NKF.nkf("--sjis", c) }
|
|
560
629
|
assert_equal(expected, str.split_to_char)
|
|
561
630
|
end
|
|
562
|
-
|
|
563
|
-
|
|
631
|
+
|
|
632
|
+
def test_sjis_split_to_char_with_crlf
|
|
633
|
+
str = NKF.nkf("--sjis", "表計算a b\r\n").extend(CharString)
|
|
564
634
|
str.encoding = "Shift_JIS"
|
|
565
635
|
str.eol = "CRLF"
|
|
566
|
-
expected = ["表","計","算","a"," ","b","\r\n"].map{|c|NKF.nkf("--sjis",c)}
|
|
636
|
+
expected = ["表", "計", "算", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--sjis", c) }
|
|
567
637
|
assert_equal(expected, str.split_to_char)
|
|
568
638
|
end
|
|
569
|
-
|
|
570
|
-
|
|
639
|
+
|
|
640
|
+
def test_sjis_count_char
|
|
641
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend(CharString)
|
|
571
642
|
str.encoding = "Shift_JIS"
|
|
572
643
|
str.eol = "CRLF"
|
|
573
644
|
expected = 7
|
|
574
645
|
assert_equal(expected, str.count_char)
|
|
575
646
|
end
|
|
576
|
-
|
|
577
|
-
|
|
647
|
+
|
|
648
|
+
def test_sjis_count_latin_graph_char
|
|
649
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend(CharString)
|
|
578
650
|
str.encoding = "Shift_JIS"
|
|
579
651
|
str.eol = "CRLF"
|
|
580
652
|
expected = 2
|
|
581
653
|
assert_equal(expected, str.count_latin_graph_char)
|
|
582
654
|
end
|
|
583
|
-
|
|
584
|
-
|
|
655
|
+
|
|
656
|
+
def test_sjis_count_ja_graph_char
|
|
657
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend(CharString)
|
|
585
658
|
str.encoding = "Shift_JIS"
|
|
586
659
|
str.eol = "CRLF"
|
|
587
660
|
expected = 3
|
|
588
661
|
assert_equal(expected, str.count_ja_graph_char)
|
|
589
662
|
end
|
|
590
|
-
|
|
591
|
-
|
|
663
|
+
|
|
664
|
+
def test_sjis_count_graph_char
|
|
665
|
+
str = NKF.nkf("--sjis", "日本語a b\r\n").extend(CharString)
|
|
592
666
|
str.encoding = "Shift_JIS"
|
|
593
667
|
str.eol = "CRLF"
|
|
594
668
|
expected = 5
|
|
595
669
|
assert_equal(expected, str.count_graph_char)
|
|
596
670
|
end
|
|
597
|
-
|
|
598
|
-
|
|
671
|
+
|
|
672
|
+
def test_sjis_count_latin_blank_char
|
|
673
|
+
str = NKF.nkf("--sjis", "日本語\ta b\r\n").extend(CharString)
|
|
599
674
|
str.encoding = "Shift_JIS"
|
|
600
675
|
str.eol = "CRLF"
|
|
601
676
|
expected = 2
|
|
602
677
|
assert_equal(expected, str.count_latin_blank_char)
|
|
603
678
|
end
|
|
604
|
-
|
|
605
|
-
|
|
679
|
+
|
|
680
|
+
def test_sjis_count_ja_blank_char
|
|
681
|
+
str = NKF.nkf("--sjis", "日本 語\ta b\r\n").extend(CharString)
|
|
606
682
|
str.encoding = "Shift_JIS"
|
|
607
683
|
str.eol = "CRLF"
|
|
608
684
|
expected = 1
|
|
609
685
|
assert_equal(expected, str.count_ja_blank_char)
|
|
610
686
|
end
|
|
611
|
-
|
|
612
|
-
|
|
687
|
+
|
|
688
|
+
def test_sjis_count_blank_char
|
|
689
|
+
str = NKF.nkf("--sjis", "日本 語\ta b\r\n").extend(CharString)
|
|
613
690
|
str.encoding = "Shift_JIS"
|
|
614
691
|
str.eol = "CRLF"
|
|
615
692
|
expected = 3
|
|
616
693
|
assert_equal(expected, str.count_blank_char)
|
|
617
694
|
end
|
|
618
|
-
|
|
619
|
-
|
|
695
|
+
|
|
696
|
+
def test_sjis_count_word
|
|
697
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
620
698
|
str.encoding = "Shift_JIS"
|
|
621
699
|
str.eol = "CRLF"
|
|
622
700
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
623
701
|
assert_equal(expected, str.count_word)
|
|
624
702
|
end
|
|
625
|
-
|
|
626
|
-
|
|
703
|
+
|
|
704
|
+
def test_sjis_count_ja_word
|
|
705
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
627
706
|
str.encoding = "Shift_JIS"
|
|
628
707
|
str.eol = "CRLF"
|
|
629
708
|
expected = 3
|
|
630
709
|
assert_equal(expected, str.count_ja_word)
|
|
631
710
|
end
|
|
632
|
-
|
|
633
|
-
|
|
711
|
+
|
|
712
|
+
def test_sjis_count_latin_valid_word
|
|
713
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
634
714
|
str.encoding = "Shift_JIS"
|
|
635
715
|
str.eol = "CRLF"
|
|
636
716
|
expected = 2
|
|
637
717
|
assert_equal(expected, str.count_latin_valid_word)
|
|
638
718
|
end
|
|
639
|
-
|
|
640
|
-
|
|
719
|
+
|
|
720
|
+
def test_sjis_count_ja_valid_word
|
|
721
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
641
722
|
str.encoding = "Shift_JIS"
|
|
642
723
|
str.eol = "CRLF"
|
|
643
724
|
expected = 2
|
|
644
725
|
assert_equal(expected, str.count_ja_valid_word)
|
|
645
726
|
end
|
|
646
|
-
|
|
647
|
-
|
|
727
|
+
|
|
728
|
+
def test_sjis_count_valid_word
|
|
729
|
+
str = NKF.nkf("--sjis", "日本 語a b --\r\n").extend(CharString)
|
|
648
730
|
str.encoding = "Shift_JIS"
|
|
649
731
|
str.eol = "CRLF"
|
|
650
732
|
expected = 4
|
|
651
733
|
assert_equal(expected, str.count_valid_word)
|
|
652
734
|
end
|
|
653
|
-
|
|
654
|
-
|
|
735
|
+
|
|
736
|
+
def test_sjis_count_line
|
|
737
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
655
738
|
str.encoding = "Shift_JIS"
|
|
656
739
|
str.eol = "CRLF"
|
|
657
740
|
expected = 6
|
|
658
741
|
assert_equal(expected, str.count_line)
|
|
659
742
|
end
|
|
660
|
-
|
|
661
|
-
|
|
743
|
+
|
|
744
|
+
def test_sjis_count_graph_line
|
|
745
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
662
746
|
str.encoding = "Shift_JIS"
|
|
663
747
|
str.eol = "CRLF"
|
|
664
748
|
expected = 3
|
|
665
749
|
assert_equal(expected, str.count_graph_line)
|
|
666
750
|
end
|
|
667
|
-
|
|
668
|
-
|
|
751
|
+
|
|
752
|
+
def test_sjis_count_empty_line
|
|
753
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
669
754
|
str.encoding = "Shift_JIS"
|
|
670
755
|
str.eol = "CRLF"
|
|
671
756
|
expected = 1
|
|
672
757
|
assert_equal(expected, str.count_empty_line)
|
|
673
758
|
end
|
|
674
|
-
|
|
675
|
-
|
|
759
|
+
|
|
760
|
+
def test_sjis_count_blank_line
|
|
761
|
+
str = NKF.nkf("--sjis", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
676
762
|
str.encoding = "Shift_JIS"
|
|
677
763
|
str.eol = "CRLF"
|
|
678
764
|
expected = 2
|
|
@@ -680,177 +766,193 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
680
766
|
end
|
|
681
767
|
|
|
682
768
|
# test UTF8 module
|
|
683
|
-
def test_utf8_split_to_word
|
|
684
|
-
str = NKF.nkf("--utf8", "日本語の文字foo bar").extend
|
|
769
|
+
def test_utf8_split_to_word
|
|
770
|
+
str = NKF.nkf("--utf8", "日本語の文字foo bar").extend(CharString)
|
|
685
771
|
str.encoding = "UTF-8"
|
|
686
|
-
expected = ["日本語の", "文字", "foo ", "bar"].map{|c| NKF.nkf("--utf8", c)}
|
|
772
|
+
expected = ["日本語の", "文字", "foo ", "bar"].map { |c| NKF.nkf("--utf8", c) }
|
|
687
773
|
assert_equal(expected, str.split_to_word)
|
|
688
774
|
end
|
|
689
|
-
|
|
690
|
-
|
|
775
|
+
|
|
776
|
+
def test_utf8_split_to_word_kanhira
|
|
777
|
+
str = NKF.nkf("--utf8", "日本語の文字").extend(CharString)
|
|
691
778
|
str.encoding = "UTF-8"
|
|
692
|
-
expected = ["日本語の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
779
|
+
expected = ["日本語の", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
693
780
|
assert_equal(expected, str.split_to_word)
|
|
694
781
|
end
|
|
695
|
-
|
|
696
|
-
|
|
782
|
+
|
|
783
|
+
def test_utf8_split_to_word_katahira
|
|
784
|
+
str = NKF.nkf("--utf8", "カタカナの文字").extend(CharString)
|
|
697
785
|
str.encoding = "UTF-8"
|
|
698
|
-
expected = ["カタカナの", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
786
|
+
expected = ["カタカナの", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
699
787
|
assert_equal(expected, str.split_to_word)
|
|
700
788
|
end
|
|
701
|
-
|
|
702
|
-
|
|
789
|
+
|
|
790
|
+
def test_utf8_split_to_word_kataonbiki
|
|
791
|
+
str = NKF.nkf("--utf8", "ルビーの指輪").extend(CharString)
|
|
703
792
|
str.encoding = "UTF-8"
|
|
704
|
-
expected = ["ルビーの", "指輪"].map{|c| NKF.nkf("--utf8", c)}
|
|
793
|
+
expected = ["ルビーの", "指輪"].map { |c| NKF.nkf("--utf8", c) }
|
|
705
794
|
assert_equal(expected, str.split_to_word)
|
|
706
795
|
end
|
|
707
|
-
|
|
708
|
-
|
|
796
|
+
|
|
797
|
+
def test_utf8_split_to_word_hiraonbiki
|
|
798
|
+
str = NKF.nkf("--utf8", "わールビーだ").extend(CharString)
|
|
709
799
|
str.encoding = "UTF-8"
|
|
710
|
-
expected = ["わー", "ルビーだ"].map{|c| NKF.nkf("--utf8", c)}
|
|
800
|
+
expected = ["わー", "ルビーだ"].map { |c| NKF.nkf("--utf8", c) }
|
|
711
801
|
assert_equal(expected, str.split_to_word)
|
|
712
802
|
end
|
|
713
|
-
|
|
714
|
-
|
|
803
|
+
|
|
804
|
+
def test_utf8_split_to_word_latinmix
|
|
805
|
+
str = NKF.nkf("--utf8", "日本語とLatinの文字").extend(CharString)
|
|
715
806
|
str.encoding = "UTF-8"
|
|
716
|
-
expected = ["日本語と", "Latin", "の", "文字"].map{|c| NKF.nkf("--utf8", c)}
|
|
807
|
+
expected = ["日本語と", "Latin", "の", "文字"].map { |c| NKF.nkf("--utf8", c) }
|
|
717
808
|
assert_equal(expected, str.split_to_word)
|
|
718
809
|
end
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
str
|
|
722
|
-
|
|
723
|
-
expected = ["日", "本", "語", "a", " ", "b"].map{|c| NKF.nkf("--utf8", c)}
|
|
810
|
+
|
|
811
|
+
def test_utf8_split_to_char
|
|
812
|
+
str = NKF.nkf("--utf8", "日本語a b").extend(CharString)
|
|
813
|
+
expected = ["日", "本", "語", "a", " ", "b"].map { |c| NKF.nkf("--utf8", c) }
|
|
724
814
|
assert_equal(expected, str.split_to_char)
|
|
725
815
|
end
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
str
|
|
816
|
+
|
|
817
|
+
def test_utf8_split_to_char_with_cr
|
|
818
|
+
str = NKF.nkf("--utf8", "日本語a b\r").extend(CharString)
|
|
729
819
|
str.eol = "CR"
|
|
730
|
-
expected = ["日","本","語","a"," ","b","\r"].map{|c| NKF.nkf("--utf8", c)}
|
|
820
|
+
expected = ["日", "本", "語", "a", " ", "b", "\r"].map { |c| NKF.nkf("--utf8", c) }
|
|
731
821
|
assert_equal(expected, str.split_to_char)
|
|
732
822
|
end
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
str
|
|
823
|
+
|
|
824
|
+
def test_utf8_split_to_char_with_lf
|
|
825
|
+
str = NKF.nkf("--utf8", "日本語a b\n").extend(CharString)
|
|
736
826
|
str.eol = "LF"
|
|
737
|
-
expected = ["日","本","語","a"," ","b","\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
827
|
+
expected = ["日", "本", "語", "a", " ", "b", "\n"].map { |c| NKF.nkf("--utf8", c) }
|
|
738
828
|
assert_equal(expected, str.split_to_char)
|
|
739
829
|
end
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
str
|
|
830
|
+
|
|
831
|
+
def test_utf8_split_to_char_with_crlf
|
|
832
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
743
833
|
str.eol = "CRLF"
|
|
744
|
-
expected = ["日","本","語","a"," ","b","\r\n"].map{|c| NKF.nkf("--utf8", c)}
|
|
834
|
+
expected = ["日", "本", "語", "a", " ", "b", "\r\n"].map { |c| NKF.nkf("--utf8", c) }
|
|
745
835
|
assert_equal(expected, str.split_to_char)
|
|
746
836
|
end
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
str
|
|
837
|
+
|
|
838
|
+
def test_utf8_count_char
|
|
839
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
750
840
|
str.eol = "CRLF"
|
|
751
841
|
expected = 7
|
|
752
842
|
assert_equal(expected, str.count_char)
|
|
753
843
|
end
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
str
|
|
844
|
+
|
|
845
|
+
def test_utf8_count_latin_graph_char
|
|
846
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
757
847
|
str.eol = "CRLF"
|
|
758
848
|
expected = 2
|
|
759
849
|
assert_equal(expected, str.count_latin_graph_char)
|
|
760
850
|
end
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
str
|
|
851
|
+
|
|
852
|
+
def test_utf8_count_ja_graph_char
|
|
853
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
764
854
|
str.eol = "CRLF"
|
|
765
855
|
expected = 3
|
|
766
856
|
assert_equal(expected, str.count_ja_graph_char)
|
|
767
857
|
end
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
str
|
|
858
|
+
|
|
859
|
+
def test_utf8_count_graph_char
|
|
860
|
+
str = NKF.nkf("--utf8", "日本語a b\r\n").extend(CharString)
|
|
771
861
|
str.eol = "CRLF"
|
|
772
862
|
expected = 5
|
|
773
863
|
assert_equal(expected, str.count_graph_char)
|
|
774
864
|
end
|
|
775
|
-
|
|
776
|
-
|
|
865
|
+
|
|
866
|
+
def test_utf8_count_latin_blank_char
|
|
867
|
+
str = NKF.nkf("--utf8", "日本語\ta b\r\n").extend(CharString)
|
|
777
868
|
str.encoding = "UTF-8"
|
|
778
869
|
str.eol = "CRLF"
|
|
779
870
|
expected = 2
|
|
780
871
|
assert_equal(expected, str.count_latin_blank_char)
|
|
781
872
|
end
|
|
782
|
-
|
|
783
|
-
|
|
873
|
+
|
|
874
|
+
def test_utf8_count_ja_blank_char
|
|
875
|
+
str = NKF.nkf("--utf8", "日本 語\ta b\r\n").extend(CharString)
|
|
784
876
|
str.encoding = "UTF-8"
|
|
785
877
|
str.eol = "CRLF"
|
|
786
878
|
expected = 1
|
|
787
879
|
assert_equal(expected, str.count_ja_blank_char)
|
|
788
880
|
end
|
|
789
|
-
|
|
790
|
-
|
|
881
|
+
|
|
882
|
+
def test_utf8_count_blank_char
|
|
883
|
+
str = NKF.nkf("--utf8", "日本 語\ta b\r\n").extend(CharString)
|
|
791
884
|
str.encoding = "UTF-8"
|
|
792
885
|
str.eol = "CRLF"
|
|
793
886
|
expected = 3
|
|
794
887
|
assert_equal(expected, str.count_blank_char)
|
|
795
888
|
end
|
|
796
|
-
|
|
797
|
-
|
|
889
|
+
|
|
890
|
+
def test_utf8_count_word
|
|
891
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
798
892
|
str.encoding = "UTF-8"
|
|
799
893
|
str.eol = "CRLF"
|
|
800
894
|
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
|
801
895
|
assert_equal(expected, str.count_word)
|
|
802
896
|
end
|
|
803
|
-
|
|
804
|
-
|
|
897
|
+
|
|
898
|
+
def test_utf8_count_ja_word
|
|
899
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
805
900
|
str.encoding = "UTF-8"
|
|
806
901
|
str.eol = "CRLF"
|
|
807
902
|
expected = 3
|
|
808
903
|
assert_equal(expected, str.count_ja_word)
|
|
809
904
|
end
|
|
810
|
-
|
|
811
|
-
|
|
905
|
+
|
|
906
|
+
def test_utf8_count_latin_valid_word
|
|
907
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
812
908
|
str.encoding = "UTF-8"
|
|
813
909
|
str.eol = "CRLF"
|
|
814
910
|
expected = 2
|
|
815
911
|
assert_equal(expected, str.count_latin_valid_word)
|
|
816
912
|
end
|
|
817
|
-
|
|
818
|
-
|
|
913
|
+
|
|
914
|
+
def test_utf8_count_ja_valid_word
|
|
915
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
819
916
|
str.encoding = "UTF-8"
|
|
820
917
|
str.eol = "CRLF"
|
|
821
918
|
expected = 2
|
|
822
919
|
assert_equal(expected, str.count_ja_valid_word)
|
|
823
920
|
end
|
|
824
|
-
|
|
825
|
-
|
|
921
|
+
|
|
922
|
+
def test_utf8_count_valid_word
|
|
923
|
+
str = NKF.nkf("--utf8", "日本 語a b --\r\n").extend(CharString)
|
|
826
924
|
str.encoding = "UTF-8"
|
|
827
925
|
str.eol = "CRLF"
|
|
828
926
|
expected = 4
|
|
829
927
|
assert_equal(expected, str.count_valid_word)
|
|
830
928
|
end
|
|
831
|
-
|
|
832
|
-
|
|
929
|
+
|
|
930
|
+
def test_utf8_count_line
|
|
931
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
833
932
|
str.encoding = "UTF-8"
|
|
834
933
|
str.eol = "CRLF"
|
|
835
934
|
expected = 6
|
|
836
935
|
assert_equal(expected, str.count_line)
|
|
837
936
|
end
|
|
838
|
-
|
|
839
|
-
|
|
937
|
+
|
|
938
|
+
def test_utf8_count_graph_line
|
|
939
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
840
940
|
str.encoding = "UTF-8"
|
|
841
941
|
str.eol = "CRLF"
|
|
842
942
|
expected = 3
|
|
843
943
|
assert_equal(expected, str.count_graph_line)
|
|
844
944
|
end
|
|
845
|
-
|
|
846
|
-
|
|
945
|
+
|
|
946
|
+
def test_utf8_count_empty_line
|
|
947
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
847
948
|
str.encoding = "UTF-8"
|
|
848
949
|
str.eol = "CRLF"
|
|
849
950
|
expected = 1
|
|
850
951
|
assert_equal(expected, str.count_empty_line)
|
|
851
952
|
end
|
|
852
|
-
|
|
853
|
-
|
|
953
|
+
|
|
954
|
+
def test_utf8_count_blank_line
|
|
955
|
+
str = NKF.nkf("--utf8", "日本語\r\n \r\n \r\n\r\nfoo\r\nbar").extend(CharString)
|
|
854
956
|
str.encoding = "UTF-8"
|
|
855
957
|
str.eol = "CRLF"
|
|
856
958
|
expected = 2
|
|
@@ -863,136 +965,158 @@ class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
|
863
965
|
assert_equal(expected, CharString.guess_encoding(str))
|
|
864
966
|
end
|
|
865
967
|
|
|
866
|
-
def test_guess_encoding_nil
|
|
968
|
+
def test_guess_encoding_nil
|
|
867
969
|
str = nil
|
|
868
970
|
expected = nil
|
|
869
971
|
assert_guess_encoding(expected, str)
|
|
870
972
|
end
|
|
871
|
-
|
|
872
|
-
#
|
|
873
|
-
#
|
|
874
|
-
#
|
|
875
|
-
#
|
|
876
|
-
|
|
973
|
+
|
|
974
|
+
# def test_guess_encoding_binary
|
|
975
|
+
# str = "\xFF\xFF"
|
|
976
|
+
# expected = "BINARY"
|
|
977
|
+
# assert_equal(expected, CharString.guess_encoding(str))
|
|
978
|
+
# end
|
|
979
|
+
|
|
980
|
+
def test_guess_encoding_unknown
|
|
877
981
|
str = "".encode("BINARY") # cannot put invalid string literal
|
|
878
982
|
expected = "ASCII-8BIT"
|
|
879
983
|
assert_guess_encoding(expected, str)
|
|
880
984
|
end
|
|
881
|
-
|
|
985
|
+
|
|
986
|
+
def test_guess_encoding_ascii_1
|
|
882
987
|
str = "ASCII string".encode("US-ASCII")
|
|
883
988
|
expected = "US-ASCII"
|
|
884
989
|
assert_guess_encoding(expected, str)
|
|
885
990
|
end
|
|
886
|
-
|
|
991
|
+
|
|
992
|
+
def test_guess_encoding_ascii_2
|
|
887
993
|
str = "abc\ndef\n".encode("US-ASCII")
|
|
888
994
|
expected = "US-ASCII"
|
|
889
995
|
assert_guess_encoding(expected, str)
|
|
890
996
|
end
|
|
891
|
-
|
|
892
|
-
#
|
|
893
|
-
|
|
894
|
-
#
|
|
895
|
-
#
|
|
896
|
-
#
|
|
897
|
-
#
|
|
898
|
-
#
|
|
899
|
-
|
|
900
|
-
#
|
|
901
|
-
#
|
|
902
|
-
|
|
997
|
+
|
|
998
|
+
# # CharString.guess_encoding mistakes JIS for ASCII sometimes, due to Iconv.
|
|
999
|
+
|
|
1000
|
+
# def test_guess_encoding_jis_1
|
|
1001
|
+
# str = NKF.nkf("--jis", "漢字とカタカナとひらがな\n")
|
|
1002
|
+
# expected = "JIS"
|
|
1003
|
+
# assert_guess_encoding(expected, str)
|
|
1004
|
+
# end
|
|
1005
|
+
|
|
1006
|
+
# def test_guess_encoding_jis_2
|
|
1007
|
+
# str = NKF.nkf("--jis", "漢字とカタカナとひらがなとLatinの文字と空白( )と記号@\n" * 100)
|
|
1008
|
+
# expected = "JIS"
|
|
1009
|
+
# assert_guess_encoding(expected, str)
|
|
1010
|
+
# end
|
|
1011
|
+
|
|
1012
|
+
def test_guess_encoding_eucjp_1
|
|
903
1013
|
str = NKF.nkf("--euc", "日本語とLatinの文字")
|
|
904
1014
|
expected = "EUC-JP"
|
|
905
1015
|
assert_guess_encoding(expected, str)
|
|
906
1016
|
end
|
|
907
|
-
|
|
1017
|
+
|
|
1018
|
+
def test_guess_encoding_eucjp_2
|
|
908
1019
|
str = NKF.nkf("--euc", "漢字とカタカナとひらがなとLatinの文字と空白( )\n" * 10)
|
|
909
1020
|
expected = "EUC-JP"
|
|
910
1021
|
assert_guess_encoding(expected, str)
|
|
911
1022
|
end
|
|
912
|
-
|
|
1023
|
+
|
|
1024
|
+
def test_guess_encoding_eucjp_3
|
|
913
1025
|
str = NKF.nkf("--euc", "こんばんは、私の名前はまつもとです。\nRubyを作ったのは私です。私はRuby Hackerです。\n")
|
|
914
1026
|
expected = "EUC-JP"
|
|
915
1027
|
assert_guess_encoding(expected, str)
|
|
916
1028
|
end
|
|
917
|
-
|
|
1029
|
+
|
|
1030
|
+
def test_guess_encoding_sjis_1
|
|
918
1031
|
str = NKF.nkf("--sjis", "日本語とLatinの文字")
|
|
919
1032
|
expected = "Shift_JIS"
|
|
920
1033
|
assert_guess_encoding(expected, str)
|
|
921
1034
|
end
|
|
922
|
-
|
|
1035
|
+
|
|
1036
|
+
def test_guess_encoding_sjis_2
|
|
923
1037
|
str = NKF.nkf("--sjis", "漢字と\nカタカナと\nひらがなと\nLatin")
|
|
924
1038
|
expected = "Shift_JIS"
|
|
925
1039
|
assert_guess_encoding(expected, str)
|
|
926
1040
|
end
|
|
927
|
-
|
|
1041
|
+
|
|
1042
|
+
def test_guess_encoding_cp932_1
|
|
928
1043
|
str = NKF.nkf("--oc=CP932", "\\u2460") # CIRCLED DIGIT ONE
|
|
929
1044
|
expected = "Windows-31J" # CP932 == Windows-31J in Ruby 1.9+
|
|
930
1045
|
assert_guess_encoding(expected, str)
|
|
931
1046
|
end
|
|
932
|
-
|
|
1047
|
+
|
|
1048
|
+
def test_guess_encoding_utf8_1
|
|
933
1049
|
str = NKF.nkf("--utf8", "日本語とLatinの文字")
|
|
934
1050
|
expected = "UTF-8"
|
|
935
1051
|
assert_guess_encoding(expected, str)
|
|
936
1052
|
end
|
|
937
|
-
|
|
1053
|
+
|
|
1054
|
+
def test_guess_encoding_utf8_2
|
|
938
1055
|
str = NKF.nkf("--utf8", "いろは\nにほへと\n")
|
|
939
1056
|
expected = "UTF-8"
|
|
940
1057
|
assert_guess_encoding(expected, str)
|
|
941
1058
|
end
|
|
942
1059
|
|
|
943
|
-
def test_guess_eol_nil
|
|
1060
|
+
def test_guess_eol_nil
|
|
944
1061
|
str = nil
|
|
945
1062
|
expected = nil
|
|
946
1063
|
assert_equal(expected, CharString.guess_eol(str))
|
|
947
1064
|
end
|
|
948
|
-
|
|
1065
|
+
|
|
1066
|
+
def test_guess_eol_empty
|
|
949
1067
|
str = ""
|
|
950
1068
|
expected = "NONE"
|
|
951
1069
|
assert_equal(expected, CharString.guess_eol(str))
|
|
952
1070
|
end
|
|
953
|
-
|
|
1071
|
+
|
|
1072
|
+
def test_guess_eol_none
|
|
954
1073
|
str = "foo bar"
|
|
955
1074
|
expected = "NONE"
|
|
956
1075
|
assert_equal(expected, CharString.guess_eol(str))
|
|
957
1076
|
end
|
|
958
|
-
|
|
1077
|
+
|
|
1078
|
+
def test_guess_eol_cr
|
|
959
1079
|
str = "foo bar\r"
|
|
960
1080
|
expected = "CR"
|
|
961
1081
|
assert_equal(expected, CharString.guess_eol(str))
|
|
962
1082
|
end
|
|
963
|
-
|
|
1083
|
+
|
|
1084
|
+
def test_guess_eol_lf
|
|
964
1085
|
str = "foo bar\n"
|
|
965
1086
|
expected = "LF"
|
|
966
1087
|
assert_equal(expected, CharString.guess_eol(str))
|
|
967
1088
|
end
|
|
968
|
-
|
|
1089
|
+
|
|
1090
|
+
def test_guess_eol_crlf
|
|
969
1091
|
str = "foo bar\r\n"
|
|
970
1092
|
expected = "CRLF"
|
|
971
1093
|
assert_equal(expected, CharString.guess_eol(str))
|
|
972
1094
|
end
|
|
973
|
-
|
|
1095
|
+
|
|
1096
|
+
def test_guess_eol_mixed
|
|
974
1097
|
str = "foo\rbar\nbaz\r\n"
|
|
975
1098
|
expected = "UNKNOWN"
|
|
976
1099
|
assert_equal(expected, CharString.guess_eol(str))
|
|
977
1100
|
end
|
|
978
|
-
|
|
979
|
-
|
|
1101
|
+
|
|
1102
|
+
def test_guess_eol_cr2
|
|
1103
|
+
str = "foo\rbar\rbaz\r".extend(CharString)
|
|
980
1104
|
expected = "CR"
|
|
981
1105
|
assert_equal(expected, CharString.guess_eol(str))
|
|
982
1106
|
end
|
|
983
|
-
|
|
984
|
-
|
|
1107
|
+
|
|
1108
|
+
def test_guess_eol_lf2
|
|
1109
|
+
str = "foo\nbar\nbaz\n".extend(CharString)
|
|
985
1110
|
expected = "LF"
|
|
986
1111
|
assert_equal(expected, CharString.guess_eol(str))
|
|
987
1112
|
end
|
|
988
|
-
|
|
989
|
-
|
|
1113
|
+
|
|
1114
|
+
def test_guess_eol_crlf2
|
|
1115
|
+
str = "foo\r\nbar\r\nbaz\r\n".extend(CharString)
|
|
990
1116
|
expected = "CRLF"
|
|
991
1117
|
assert_equal(expected, CharString.guess_eol(str))
|
|
992
1118
|
end
|
|
993
1119
|
|
|
994
|
-
def teardown
|
|
995
|
-
#
|
|
1120
|
+
def teardown
|
|
996
1121
|
end
|
|
997
|
-
|
|
998
1122
|
end
|