docdiff 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +6 -0
- data/.travis.yml +7 -0
- data/Gemfile +17 -0
- data/Guardfile +8 -0
- data/Makefile +108 -0
- data/Rakefile +17 -0
- data/bin/docdiff +179 -0
- data/devutil/JIS0208.TXT +6952 -0
- data/devutil/char_by_charclass.rb +23 -0
- data/devutil/charclass_by_char.rb +21 -0
- data/devutil/jis0208.rb +343 -0
- data/devutil/testjis0208.rb +38 -0
- data/docdiff.conf.example +22 -0
- data/docdiff.gemspec +23 -0
- data/docdiffwebui.cgi +176 -0
- data/docdiffwebui.html +123 -0
- data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
- data/img/docdiff-screenshot-format-html-firefox.png +0 -0
- data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
- data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
- data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
- data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
- data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
- data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
- data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
- data/index.html +181 -0
- data/langfilter.rb +14 -0
- data/lib/doc_diff.rb +170 -0
- data/lib/docdiff.rb +7 -0
- data/lib/docdiff/charstring.rb +579 -0
- data/lib/docdiff/diff.rb +217 -0
- data/lib/docdiff/diff/contours.rb +382 -0
- data/lib/docdiff/diff/editscript.rb +148 -0
- data/lib/docdiff/diff/rcsdiff.rb +107 -0
- data/lib/docdiff/diff/shortestpath.rb +93 -0
- data/lib/docdiff/diff/speculative.rb +40 -0
- data/lib/docdiff/diff/subsequence.rb +39 -0
- data/lib/docdiff/diff/unidiff.rb +124 -0
- data/lib/docdiff/difference.rb +92 -0
- data/lib/docdiff/document.rb +127 -0
- data/lib/docdiff/encoding/en_ascii.rb +97 -0
- data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
- data/lib/docdiff/encoding/ja_sjis.rb +260 -0
- data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
- data/lib/docdiff/version.rb +3 -0
- data/lib/docdiff/view.rb +476 -0
- data/lib/viewdiff.rb +375 -0
- data/readme.html +713 -0
- data/sample/01.en.ascii.cr +1 -0
- data/sample/01.en.ascii.crlf +2 -0
- data/sample/01.en.ascii.lf +2 -0
- data/sample/01.ja.eucjp.lf +2 -0
- data/sample/01.ja.sjis.cr +1 -0
- data/sample/01.ja.sjis.crlf +2 -0
- data/sample/01.ja.utf8.crlf +2 -0
- data/sample/02.en.ascii.cr +1 -0
- data/sample/02.en.ascii.crlf +2 -0
- data/sample/02.en.ascii.lf +2 -0
- data/sample/02.ja.eucjp.lf +2 -0
- data/sample/02.ja.sjis.cr +1 -0
- data/sample/02.ja.sjis.crlf +2 -0
- data/sample/02.ja.utf8.crlf +2 -0
- data/sample/humpty_dumpty01.ascii.lf +4 -0
- data/sample/humpty_dumpty02.ascii.lf +4 -0
- data/test/charstring_test.rb +1008 -0
- data/test/diff_test.rb +36 -0
- data/test/difference_test.rb +64 -0
- data/test/docdiff_test.rb +193 -0
- data/test/document_test.rb +626 -0
- data/test/test_helper.rb +7 -0
- data/test/view_test.rb +570 -0
- data/test/viewdiff_test.rb +908 -0
- metadata +129 -0
@@ -0,0 +1 @@
|
|
1
|
+
Hello, my name is Watanabe.
|
@@ -0,0 +1 @@
|
|
1
|
+
����ɂ��́A���̖��O�͂킽�Ȃׂł��B
|
@@ -0,0 +1 @@
|
|
1
|
+
Hello, my name is matz.
|
@@ -0,0 +1 @@
|
|
1
|
+
�����́A���̖��O�͂܂��Ƃł��B
|
@@ -0,0 +1,1008 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# -*- coding: euc-jp; -*-
|
3
|
+
require 'test/unit'
|
4
|
+
require 'docdiff/charstring'
|
5
|
+
require 'nkf'
|
6
|
+
|
7
|
+
class TC_CharString < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup()
|
10
|
+
#
|
11
|
+
end
|
12
|
+
|
13
|
+
# test encoding module registration
|
14
|
+
def test_encoding_ascii()
|
15
|
+
str = "foo".extend CharString
|
16
|
+
str.encoding = "US-ASCII"
|
17
|
+
expected = CharString::ASCII
|
18
|
+
assert_equal(expected, CharString::Encodings[str.encoding])
|
19
|
+
end
|
20
|
+
def test_encoding_eucjp()
|
21
|
+
str = "foo".extend CharString
|
22
|
+
str.encoding = "EUC-JP"
|
23
|
+
expected = CharString::EUC_JP
|
24
|
+
assert_equal(expected, CharString::Encodings[str.encoding])
|
25
|
+
end
|
26
|
+
def test_encoding_sjis()
|
27
|
+
str = "foo".extend CharString
|
28
|
+
str.encoding = "Shift_JIS"
|
29
|
+
expected = CharString::Shift_JIS
|
30
|
+
assert_equal(expected, CharString::Encodings[str.encoding])
|
31
|
+
end
|
32
|
+
def test_encoding_utf8()
|
33
|
+
str = "foo".extend CharString
|
34
|
+
str.encoding = "UTF-8"
|
35
|
+
expected = CharString::UTF8
|
36
|
+
assert_equal(expected, CharString::Encodings[str.encoding])
|
37
|
+
end
|
38
|
+
|
39
|
+
# test eol module registration
|
40
|
+
def test_eol_cr()
|
41
|
+
str = "foo".extend CharString
|
42
|
+
str.eol = "CR"
|
43
|
+
expected = CharString::CR
|
44
|
+
assert_equal(expected, CharString::EOLChars[str.eol])
|
45
|
+
end
|
46
|
+
def test_eol_lf()
|
47
|
+
str = "foo".extend CharString
|
48
|
+
str.eol = "LF"
|
49
|
+
expected = CharString::LF
|
50
|
+
assert_equal(expected, CharString::EOLChars[str.eol])
|
51
|
+
end
|
52
|
+
def test_eol_crlf()
|
53
|
+
str = "foo".extend CharString
|
54
|
+
str.eol = "CRLF"
|
55
|
+
expected = CharString::CRLF
|
56
|
+
assert_equal(expected, CharString::EOLChars[str.eol])
|
57
|
+
end
|
58
|
+
|
59
|
+
# test eol eol_char method
|
60
|
+
def test_eol_char_cr()
|
61
|
+
str = "foo\rbar\r".extend CharString
|
62
|
+
str.eol = "CR"
|
63
|
+
expected = "\r"
|
64
|
+
assert_equal(expected, str.eol_char)
|
65
|
+
end
|
66
|
+
def test_eol_char_lf()
|
67
|
+
str = "foo\nbar\n".extend CharString
|
68
|
+
str.eol = "LF"
|
69
|
+
expected = "\n"
|
70
|
+
assert_equal(expected, str.eol_char)
|
71
|
+
end
|
72
|
+
def test_eol_char_crlf()
|
73
|
+
str = "foo\r\nbar\r\n".extend CharString
|
74
|
+
str.eol = "CRLF"
|
75
|
+
expected = "\r\n"
|
76
|
+
assert_equal(expected, str.eol_char)
|
77
|
+
end
|
78
|
+
def test_eol_char_none()
|
79
|
+
str = "foobar".extend CharString
|
80
|
+
expected = nil
|
81
|
+
assert_equal(expected, str.eol_char)
|
82
|
+
end
|
83
|
+
def test_eol_char_none_for_0length_string()
|
84
|
+
str = "".extend CharString
|
85
|
+
expected = nil
|
86
|
+
assert_equal(expected, str.eol_char)
|
87
|
+
end
|
88
|
+
def test_eol_char_none_eucjp()
|
89
|
+
str = NKF.nkf("-e", "���ܸ�a b").extend CharString
|
90
|
+
expected = nil
|
91
|
+
assert_equal(expected, str.eol_char)
|
92
|
+
end
|
93
|
+
def test_eol_char_none_sjis()
|
94
|
+
str = NKF.nkf("-s", "���ܸ�a b").extend CharString
|
95
|
+
expected = nil
|
96
|
+
assert_equal(expected, str.eol_char)
|
97
|
+
end
|
98
|
+
|
99
|
+
# test eol split_to_line() method
|
100
|
+
def test_cr_split_to_line()
|
101
|
+
str = "foo\rbar\r".extend CharString
|
102
|
+
encoding, eol = "US-ASCII", "CR"
|
103
|
+
str.encoding, str.eol = encoding, eol
|
104
|
+
expected = ["foo\r", "bar\r"]
|
105
|
+
assert_equal(expected, str.split_to_line)
|
106
|
+
end
|
107
|
+
def test_cr_split_to_line_chomped_lastline()
|
108
|
+
str = "foo\rbar".extend CharString
|
109
|
+
str.encoding = "US-ASCII"
|
110
|
+
str.eol = "CR"
|
111
|
+
expected = ["foo\r", "bar"]
|
112
|
+
assert_equal(expected, str.split_to_line)
|
113
|
+
end
|
114
|
+
def test_cr_split_to_line_empty_line()
|
115
|
+
str = "foo\r\rbar\r".extend CharString
|
116
|
+
str.encoding = "US-ASCII"
|
117
|
+
str.eol = "CR"
|
118
|
+
expected = ["foo\r", "\r", "bar\r"]
|
119
|
+
assert_equal(expected, str.split_to_line)
|
120
|
+
end
|
121
|
+
def test_lf_split_to_line()
|
122
|
+
str = "foo\nbar\n".extend CharString
|
123
|
+
str.encoding = "US-ASCII"
|
124
|
+
str.eol = "LF"
|
125
|
+
expected = ["foo\n", "bar\n"]
|
126
|
+
assert_equal(expected, str.split_to_line)
|
127
|
+
end
|
128
|
+
def test_lf_split_to_line_chomped_lastline()
|
129
|
+
str = "foo\nbar".extend CharString
|
130
|
+
str.encoding = "US-ASCII"
|
131
|
+
str.eol = "LF"
|
132
|
+
expected = ["foo\n", "bar"]
|
133
|
+
assert_equal(expected, str.split_to_line)
|
134
|
+
end
|
135
|
+
def test_lf_split_to_line_empty_line()
|
136
|
+
str = "foo\n\nbar\n".extend CharString
|
137
|
+
str.encoding = "US-ASCII"
|
138
|
+
str.eol = "LF"
|
139
|
+
expected = ["foo\n", "\n", "bar\n"]
|
140
|
+
assert_equal(expected, str.split_to_line)
|
141
|
+
end
|
142
|
+
def test_crlf_split_to_line()
|
143
|
+
str = "foo\r\nbar\r\n".extend CharString
|
144
|
+
str.encoding = "US-ASCII"
|
145
|
+
str.eol = "CRLF"
|
146
|
+
expected = ["foo\r\n", "bar\r\n"]
|
147
|
+
assert_equal(expected, str.split_to_line)
|
148
|
+
end
|
149
|
+
def test_crlf_split_to_line_chomped_lastline()
|
150
|
+
str = "foo\r\nbar".extend CharString
|
151
|
+
str.encoding = "US-ASCII"
|
152
|
+
str.eol = "CRLF"
|
153
|
+
expected = ["foo\r\n", "bar"]
|
154
|
+
assert_equal(expected, str.split_to_line)
|
155
|
+
end
|
156
|
+
def test_crlf_split_to_line_empty_line()
|
157
|
+
str = "foo\r\n\r\nbar\r\n".extend CharString
|
158
|
+
str.encoding = "US-ASCII"
|
159
|
+
str.eol = "CRLF"
|
160
|
+
expected = ["foo\r\n", "\r\n", "bar\r\n"]
|
161
|
+
assert_equal(expected, str.split_to_line)
|
162
|
+
end
|
163
|
+
|
164
|
+
# test ASCII module
|
165
|
+
def test_ascii_split_to_word()
|
166
|
+
str = "foo bar".extend CharString
|
167
|
+
str.encoding = "US-ASCII"
|
168
|
+
expected = ["foo ", "bar"]
|
169
|
+
assert_equal(expected, str.split_to_word)
|
170
|
+
end
|
171
|
+
def test_ascii_split_to_word_withsymbol()
|
172
|
+
str = "foo (bar) baz-baz".extend CharString
|
173
|
+
str.encoding = "US-ASCII"
|
174
|
+
expected = ["foo ", "(bar) ", "baz-baz"]
|
175
|
+
assert_equal(expected, str.split_to_word)
|
176
|
+
end
|
177
|
+
def test_ascii_split_to_word_withquote()
|
178
|
+
str = "foo's 'foo' \"bar\" 'baz.'".extend CharString
|
179
|
+
str.encoding = "US-ASCII"
|
180
|
+
expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
|
181
|
+
assert_equal(expected, str.split_to_word)
|
182
|
+
end
|
183
|
+
def test_ascii_split_to_word_withlongspace()
|
184
|
+
str = " foo bar".extend CharString
|
185
|
+
str.encoding = "US-ASCII"
|
186
|
+
expected = [" ", "foo ", " ", "bar"]
|
187
|
+
assert_equal(expected, str.split_to_word)
|
188
|
+
end
|
189
|
+
def test_ascii_split_to_word_withdash()
|
190
|
+
str = "foo -- bar, baz - quux".extend CharString
|
191
|
+
str.encoding = "US-ASCII"
|
192
|
+
expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
|
193
|
+
assert_equal(expected, str.split_to_word)
|
194
|
+
end
|
195
|
+
def test_ascii_split_to_char()
|
196
|
+
str = "foo bar".extend CharString
|
197
|
+
str.encoding = "US-ASCII"
|
198
|
+
str.eol = "LF"
|
199
|
+
expected = ["f","o","o"," ","b","a","r"]
|
200
|
+
assert_equal(expected, str.split_to_char)
|
201
|
+
end
|
202
|
+
def test_ascii_split_to_char_with_eol_cr()
|
203
|
+
str = "foo bar\r".extend CharString
|
204
|
+
str.encoding = "US-ASCII"
|
205
|
+
str.eol = "CR"
|
206
|
+
expected = ["f","o","o"," ","b","a","r","\r"]
|
207
|
+
assert_equal(expected, str.split_to_char)
|
208
|
+
end
|
209
|
+
def test_ascii_split_to_char_with_eol_lf()
|
210
|
+
str = "foo bar\n".extend CharString
|
211
|
+
str.encoding = "US-ASCII"
|
212
|
+
str.eol = "LF"
|
213
|
+
expected = ["f","o","o"," ","b","a","r","\n"]
|
214
|
+
assert_equal(expected, str.split_to_char)
|
215
|
+
end
|
216
|
+
def test_ascii_split_to_char_with_eol_crlf()
|
217
|
+
str = "foo bar\r\n".extend CharString
|
218
|
+
str.encoding = "US-ASCII"
|
219
|
+
str.eol = "CRLF"
|
220
|
+
expected = ["f","o","o"," ","b","a","r","\r\n"]
|
221
|
+
assert_equal(expected, str.split_to_char)
|
222
|
+
end
|
223
|
+
def test_ascii_split_to_byte()
|
224
|
+
str = "foo bar\r\n".extend CharString
|
225
|
+
str.encoding = "US-ASCII"
|
226
|
+
str.eol = "CRLF"
|
227
|
+
expected = ["f","o","o"," ","b","a","r","\r","\n"]
|
228
|
+
assert_equal(expected, str.split_to_byte)
|
229
|
+
end
|
230
|
+
def test_ascii_count_byte()
|
231
|
+
str = "foo bar\r\n".extend CharString
|
232
|
+
str.encoding = "US-ASCII"
|
233
|
+
str.eol = "CRLF"
|
234
|
+
expected = 9
|
235
|
+
assert_equal(expected, str.count_byte)
|
236
|
+
end
|
237
|
+
def test_ascii_count_char()
|
238
|
+
str = "foo bar\r\nbaz quux\r\n".extend CharString
|
239
|
+
str.encoding = "US-ASCII"
|
240
|
+
str.eol = "CRLF"
|
241
|
+
expected = 17
|
242
|
+
assert_equal(expected, str.count_char)
|
243
|
+
end
|
244
|
+
def test_ascii_count_latin_graph_char()
|
245
|
+
str = "foo bar\r\nbaz quux\r\n".extend CharString
|
246
|
+
str.encoding = "US-ASCII"
|
247
|
+
str.eol = "CRLF"
|
248
|
+
expected = 13
|
249
|
+
assert_equal(expected, str.count_latin_graph_char)
|
250
|
+
end
|
251
|
+
def test_ascii_count_graph_char()
|
252
|
+
str = "foo bar\r\nbaz quux\r\n".extend CharString
|
253
|
+
str.encoding = "US-ASCII"
|
254
|
+
str.eol = "CRLF"
|
255
|
+
expected = 13
|
256
|
+
assert_equal(expected, str.count_graph_char)
|
257
|
+
end
|
258
|
+
def test_ascii_count_latin_blank_char()
|
259
|
+
str = "foo bar\r\nbaz\tquux\r\n".extend CharString
|
260
|
+
str.encoding = "US-ASCII"
|
261
|
+
str.eol = "CRLF"
|
262
|
+
expected = 2
|
263
|
+
assert_equal(expected, str.count_latin_blank_char)
|
264
|
+
end
|
265
|
+
def test_ascii_count_blank_char()
|
266
|
+
str = "foo bar\r\nbaz\tquux\r\n".extend CharString
|
267
|
+
str.encoding = "US-ASCII"
|
268
|
+
str.eol = "CRLF"
|
269
|
+
expected = 2
|
270
|
+
assert_equal(expected, str.count_blank_char)
|
271
|
+
end
|
272
|
+
def test_ascii_count_word()
|
273
|
+
str = "foo bar \r\nbaz quux\r\n".extend CharString
|
274
|
+
str.encoding = "US-ASCII"
|
275
|
+
str.eol = "CRLF"
|
276
|
+
expected = 6
|
277
|
+
assert_equal(expected, str.count_word)
|
278
|
+
end
|
279
|
+
def test_ascii_count_latin_word()
|
280
|
+
str = "foo bar \r\nbaz quux\r\n".extend CharString
|
281
|
+
str.encoding = "US-ASCII"
|
282
|
+
str.eol = "CRLF"
|
283
|
+
expected = 5 # " " is also counted as a word
|
284
|
+
assert_equal(expected, str.count_latin_word)
|
285
|
+
end
|
286
|
+
def test_ascii_count_latin_valid_word()
|
287
|
+
str = "1 foo \r\n%%% ()\r\n".extend CharString
|
288
|
+
str.encoding = "US-ASCII"
|
289
|
+
str.eol = "CRLF"
|
290
|
+
expected = 2
|
291
|
+
assert_equal(expected, str.count_latin_valid_word)
|
292
|
+
end
|
293
|
+
def test_ascii_count_line()
|
294
|
+
str = "foo\r\nbar".extend CharString
|
295
|
+
str.encoding = "US-ASCII"
|
296
|
+
str.eol = "CRLF"
|
297
|
+
expected = 2
|
298
|
+
assert_equal(expected, str.count_line)
|
299
|
+
end
|
300
|
+
def test_ascii_count_graph_line()
|
301
|
+
str = "foo\r\n ".extend CharString
|
302
|
+
str.encoding = "US-ASCII"
|
303
|
+
str.eol = "CRLF"
|
304
|
+
expected = 1
|
305
|
+
assert_equal(expected, str.count_graph_line)
|
306
|
+
end
|
307
|
+
def test_ascii_count_empty_line()
|
308
|
+
str = "foo\r\n \r\n\t\r\n\r\n".extend CharString
|
309
|
+
str.encoding = "US-ASCII"
|
310
|
+
str.eol = "CRLF"
|
311
|
+
expected = 1
|
312
|
+
assert_equal(expected, str.count_empty_line)
|
313
|
+
end
|
314
|
+
def test_ascii_count_blank_line()
|
315
|
+
str = "\r\n \r\n\t\r\n ".extend CharString
|
316
|
+
str.encoding = "US-ASCII"
|
317
|
+
str.eol = "CRLF"
|
318
|
+
expected = 3
|
319
|
+
assert_equal(expected, str.count_blank_line)
|
320
|
+
end
|
321
|
+
|
322
|
+
# test EUCJP module
|
323
|
+
def test_eucjp_split_to_word()
|
324
|
+
str = NKF.nkf("-e", "���ܸ��ʸ��foo bar").extend CharString
|
325
|
+
str.encoding = "EUC-JP"
|
326
|
+
expected = ["���ܸ��","ʸ��","foo ","bar"].collect{|c| NKF.nkf("-e", c)}
|
327
|
+
assert_equal(expected, str.split_to_word)
|
328
|
+
end
|
329
|
+
def test_eucjp_split_to_word_kanhira()
|
330
|
+
str = NKF.nkf("-e", "���ܸ��ʸ��").extend CharString
|
331
|
+
str.encoding = "EUC-JP"
|
332
|
+
expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
|
333
|
+
assert_equal(expected, str.split_to_word)
|
334
|
+
end
|
335
|
+
def test_eucjp_split_to_word_katahira()
|
336
|
+
str = NKF.nkf("-e", "�������ʤ�ʸ��").extend CharString
|
337
|
+
str.encoding = "EUC-JP"
|
338
|
+
expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
|
339
|
+
assert_equal(expected, str.split_to_word)
|
340
|
+
end
|
341
|
+
def test_eucjp_split_to_word_kataonbiki()
|
342
|
+
str = NKF.nkf("-e", "��ӡ�������").extend CharString
|
343
|
+
str.encoding = "EUC-JP" #<= needed to pass the test
|
344
|
+
expected = ["��ӡ�", "����", "��"].collect{|c| NKF.nkf("-e", c)}
|
345
|
+
assert_equal(expected, str.split_to_word)
|
346
|
+
end
|
347
|
+
def test_eucjp_split_to_word_hiraonbiki()
|
348
|
+
str = NKF.nkf("-e", "���ӡ���").extend CharString
|
349
|
+
str.encoding = "EUC-JP" #<= needed to pass the test
|
350
|
+
expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-e", c)}
|
351
|
+
assert_equal(expected, str.split_to_word)
|
352
|
+
end
|
353
|
+
def test_eucjp_split_to_word_latinmix()
|
354
|
+
str = NKF.nkf("-e", "���ܸ��Latin��ʸ��").extend CharString
|
355
|
+
str.encoding = "EUC-JP"
|
356
|
+
expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
|
357
|
+
assert_equal(expected, str.split_to_word)
|
358
|
+
end
|
359
|
+
def test_eucjp_split_to_char()
|
360
|
+
str = NKF.nkf("-e", "���ܸ�a b").extend CharString
|
361
|
+
str.encoding = "EUC-JP"
|
362
|
+
str.eol = "LF" #<= needed to pass the test
|
363
|
+
expected = ["��","��","��","a"," ","b"].collect{|c|NKF.nkf("-e",c)}
|
364
|
+
assert_equal(expected, str.split_to_char)
|
365
|
+
end
|
366
|
+
def test_eucjp_split_to_char_with_cr()
|
367
|
+
str = NKF.nkf("-e", "���ܸ�a b\r").extend CharString
|
368
|
+
str.encoding = "EUC-JP"
|
369
|
+
str.eol = "CR"
|
370
|
+
expected = ["��","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-e",c)}
|
371
|
+
assert_equal(expected, str.split_to_char)
|
372
|
+
end
|
373
|
+
def test_eucjp_split_to_char_with_lf()
|
374
|
+
str = NKF.nkf("-e", "���ܸ�a b\n").extend CharString
|
375
|
+
str.encoding = "EUC-JP"
|
376
|
+
str.eol = "LF"
|
377
|
+
expected = ["��","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-e",c)}
|
378
|
+
assert_equal(expected, str.split_to_char)
|
379
|
+
end
|
380
|
+
def test_eucjp_split_to_char_with_crlf()
|
381
|
+
str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
|
382
|
+
str.encoding = "EUC-JP"
|
383
|
+
str.eol = "CRLF"
|
384
|
+
expected = ["��","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-e",c)}
|
385
|
+
assert_equal(expected, str.split_to_char)
|
386
|
+
end
|
387
|
+
def test_eucjp_count_char()
|
388
|
+
str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
|
389
|
+
str.encoding = "EUC-JP"
|
390
|
+
str.eol = "CRLF"
|
391
|
+
expected = 7
|
392
|
+
assert_equal(expected, str.count_char)
|
393
|
+
end
|
394
|
+
def test_eucjp_count_latin_graph_char()
|
395
|
+
str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
|
396
|
+
str.encoding = "EUC-JP"
|
397
|
+
str.eol = "CRLF"
|
398
|
+
expected = 2
|
399
|
+
assert_equal(expected, str.count_latin_graph_char)
|
400
|
+
end
|
401
|
+
def test_eucjp_count_ja_graph_char()
|
402
|
+
str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
|
403
|
+
str.encoding = "EUC-JP"
|
404
|
+
str.eol = "CRLF"
|
405
|
+
expected = 3
|
406
|
+
assert_equal(expected, str.count_ja_graph_char)
|
407
|
+
end
|
408
|
+
def test_eucjp_count_graph_char()
|
409
|
+
str = NKF.nkf("-e", "���ܸ�a b\r\n").extend CharString
|
410
|
+
str.encoding = "EUC-JP"
|
411
|
+
str.eol = "CRLF"
|
412
|
+
expected = 5
|
413
|
+
assert_equal(expected, str.count_graph_char)
|
414
|
+
end
|
415
|
+
def test_eucjp_count_latin_blank_char()
|
416
|
+
str = NKF.nkf("-e", "���ܸ�\ta b\r\n").extend CharString
|
417
|
+
str.encoding = "EUC-JP"
|
418
|
+
str.eol = "CRLF"
|
419
|
+
expected = 2
|
420
|
+
assert_equal(expected, str.count_latin_blank_char)
|
421
|
+
end
|
422
|
+
def test_eucjp_count_ja_blank_char()
|
423
|
+
str = NKF.nkf("-e", "���ܡ���\ta b\r\n").extend CharString
|
424
|
+
str.encoding = "EUC-JP"
|
425
|
+
str.eol = "CRLF"
|
426
|
+
expected = 1
|
427
|
+
assert_equal(expected, str.count_ja_blank_char)
|
428
|
+
end
|
429
|
+
def test_eucjp_count_blank_char()
|
430
|
+
str = NKF.nkf("-e", "���ܡ���\ta b\r\n").extend CharString
|
431
|
+
str.encoding = "EUC-JP"
|
432
|
+
str.eol = "CRLF"
|
433
|
+
expected = 3
|
434
|
+
assert_equal(expected, str.count_blank_char)
|
435
|
+
end
|
436
|
+
def test_eucjp_count_word()
|
437
|
+
str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
|
438
|
+
str.encoding = "EUC-JP"
|
439
|
+
str.eol = "CRLF"
|
440
|
+
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
441
|
+
assert_equal(expected, str.count_word)
|
442
|
+
end
|
443
|
+
def test_eucjp_count_ja_word()
|
444
|
+
str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
|
445
|
+
str.encoding = "EUC-JP"
|
446
|
+
str.eol = "CRLF"
|
447
|
+
expected = 3
|
448
|
+
assert_equal(expected, str.count_ja_word)
|
449
|
+
end
|
450
|
+
def test_eucjp_count_latin_valid_word()
|
451
|
+
str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
|
452
|
+
str.encoding = "EUC-JP"
|
453
|
+
str.eol = "CRLF"
|
454
|
+
expected = 2
|
455
|
+
assert_equal(expected, str.count_latin_valid_word)
|
456
|
+
end
|
457
|
+
def test_eucjp_count_ja_valid_word()
|
458
|
+
str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
|
459
|
+
str.encoding = "EUC-JP"
|
460
|
+
str.eol = "CRLF"
|
461
|
+
expected = 2
|
462
|
+
assert_equal(expected, str.count_ja_valid_word)
|
463
|
+
end
|
464
|
+
def test_eucjp_count_valid_word()
|
465
|
+
str = NKF.nkf("-e", "���ܡ���a b --\r\n").extend CharString
|
466
|
+
str.encoding = "EUC-JP"
|
467
|
+
str.eol = "CRLF"
|
468
|
+
expected = 4
|
469
|
+
assert_equal(expected, str.count_valid_word)
|
470
|
+
end
|
471
|
+
def test_eucjp_count_line()
|
472
|
+
str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
473
|
+
str.encoding = "EUC-JP"
|
474
|
+
str.eol = "CRLF"
|
475
|
+
expected = 6
|
476
|
+
assert_equal(expected, str.count_line)
|
477
|
+
end
|
478
|
+
def test_eucjp_count_graph_line()
|
479
|
+
str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
480
|
+
str.encoding = "EUC-JP"
|
481
|
+
str.eol = "CRLF"
|
482
|
+
expected = 3
|
483
|
+
assert_equal(expected, str.count_graph_line)
|
484
|
+
end
|
485
|
+
def test_eucjp_count_empty_line()
|
486
|
+
str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
487
|
+
str.encoding = "EUC-JP"
|
488
|
+
str.eol = "CRLF"
|
489
|
+
expected = 1
|
490
|
+
assert_equal(expected, str.count_empty_line)
|
491
|
+
end
|
492
|
+
def test_eucjp_count_blank_line()
|
493
|
+
str = NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
494
|
+
str.encoding = "EUC-JP"
|
495
|
+
str.eol = "CRLF"
|
496
|
+
expected = 2
|
497
|
+
assert_equal(expected, str.count_blank_line)
|
498
|
+
end
|
499
|
+
|
500
|
+
# test SJIS module
|
501
|
+
def test_sjis_split_to_word()
|
502
|
+
str = NKF.nkf("-s", "���ܸ��ʸ��foo bar").extend CharString
|
503
|
+
str.encoding = "Shift_JIS"
|
504
|
+
expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c|NKF.nkf("-s",c)}
|
505
|
+
assert_equal(expected, str.split_to_word)
|
506
|
+
end
|
507
|
+
def test_sjisplit_s_to_word_kanhira()
|
508
|
+
str = NKF.nkf("-s", "���ܸ��ʸ��").extend CharString
|
509
|
+
str.encoding = "Shift_JIS"
|
510
|
+
expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
|
511
|
+
assert_equal(expected, str.split_to_word)
|
512
|
+
end
|
513
|
+
def test_sjis_split_to_word_katahira()
|
514
|
+
str = NKF.nkf("-s", "�������ʤ�ʸ��").extend CharString
|
515
|
+
str.encoding = "Shift_JIS"
|
516
|
+
expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
|
517
|
+
assert_equal(expected, str.split_to_word)
|
518
|
+
end
|
519
|
+
def test_sjis_split_to_word_kataonbiki()
|
520
|
+
str = NKF.nkf("-s", "��ӡ��λ���").extend CharString
|
521
|
+
str.encoding = "Shift_JIS"
|
522
|
+
expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-s", c)}
|
523
|
+
assert_equal(expected, str.split_to_word)
|
524
|
+
end
|
525
|
+
def test_sjis_split_to_word_hiraonbiki()
|
526
|
+
str = NKF.nkf("-s", "���ӡ���").extend CharString
|
527
|
+
str.encoding = "Shift_JIS"
|
528
|
+
expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-s", c)}
|
529
|
+
assert_equal(expected, str.split_to_word)
|
530
|
+
end
|
531
|
+
def test_sjis_split_to_word_latinmix()
|
532
|
+
str = NKF.nkf("-s", "���ܸ��Latin��ʸ��").extend CharString
|
533
|
+
str.encoding = "Shift_JIS"
|
534
|
+
expected = ["���ܸ��","Latin","��","ʸ��"].collect{|c| NKF.nkf("-s", c)}
|
535
|
+
assert_equal(expected, str.split_to_word)
|
536
|
+
end
|
537
|
+
def test_sjis_split_to_char()
|
538
|
+
str = NKF.nkf("-s", "ɽ��a b").extend CharString
|
539
|
+
str.encoding = "Shift_JIS"
|
540
|
+
str.eol = "LF" #<= needed to pass the test
|
541
|
+
expected = ["ɽ","��","��","a"," ","b"].collect{|c|NKF.nkf("-s",c)}
|
542
|
+
assert_equal(expected, str.split_to_char)
|
543
|
+
end
|
544
|
+
def test_sjis_split_to_char_with_cr()
|
545
|
+
str = NKF.nkf("-s", "ɽ��a b\r").extend CharString
|
546
|
+
str.encoding = "Shift_JIS"
|
547
|
+
str.eol = "CR"
|
548
|
+
expected = ["ɽ","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-s",c)}
|
549
|
+
assert_equal(expected, str.split_to_char)
|
550
|
+
end
|
551
|
+
def test_sjis_split_to_char_with_lf()
|
552
|
+
str = NKF.nkf("-s", "ɽ��a b\n").extend CharString
|
553
|
+
str.encoding = "Shift_JIS"
|
554
|
+
str.eol = "LF"
|
555
|
+
expected = ["ɽ","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-s",c)}
|
556
|
+
assert_equal(expected, str.split_to_char)
|
557
|
+
end
|
558
|
+
def test_sjis_split_to_char_with_crlf()
|
559
|
+
str = NKF.nkf("-s", "ɽ��a b\r\n").extend CharString
|
560
|
+
str.encoding = "Shift_JIS"
|
561
|
+
str.eol = "CRLF"
|
562
|
+
expected = ["ɽ","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-s",c)}
|
563
|
+
assert_equal(expected, str.split_to_char)
|
564
|
+
end
|
565
|
+
def test_sjis_count_char()
|
566
|
+
str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
|
567
|
+
str.encoding = "Shift_JIS"
|
568
|
+
str.eol = "CRLF"
|
569
|
+
expected = 7
|
570
|
+
assert_equal(expected, str.count_char)
|
571
|
+
end
|
572
|
+
def test_sjis_count_latin_graph_char()
|
573
|
+
str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
|
574
|
+
str.encoding = "Shift_JIS"
|
575
|
+
str.eol = "CRLF"
|
576
|
+
expected = 2
|
577
|
+
assert_equal(expected, str.count_latin_graph_char)
|
578
|
+
end
|
579
|
+
def test_sjis_count_ja_graph_char()
|
580
|
+
str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
|
581
|
+
str.encoding = "Shift_JIS"
|
582
|
+
str.eol = "CRLF"
|
583
|
+
expected = 3
|
584
|
+
assert_equal(expected, str.count_ja_graph_char)
|
585
|
+
end
|
586
|
+
def test_sjis_count_graph_char()
|
587
|
+
str = NKF.nkf("-s", "���ܸ�a b\r\n").extend CharString
|
588
|
+
str.encoding = "Shift_JIS"
|
589
|
+
str.eol = "CRLF"
|
590
|
+
expected = 5
|
591
|
+
assert_equal(expected, str.count_graph_char)
|
592
|
+
end
|
593
|
+
def test_sjis_count_latin_blank_char()
|
594
|
+
str = NKF.nkf("-s", "���ܸ�\ta b\r\n").extend CharString
|
595
|
+
str.encoding = "Shift_JIS"
|
596
|
+
str.eol = "CRLF"
|
597
|
+
expected = 2
|
598
|
+
assert_equal(expected, str.count_latin_blank_char)
|
599
|
+
end
|
600
|
+
def test_sjis_count_ja_blank_char()
|
601
|
+
str = NKF.nkf("-s", "���ܡ���\ta b\r\n").extend CharString
|
602
|
+
str.encoding = "Shift_JIS"
|
603
|
+
str.eol = "CRLF"
|
604
|
+
expected = 1
|
605
|
+
assert_equal(expected, str.count_ja_blank_char)
|
606
|
+
end
|
607
|
+
def test_sjis_count_blank_char()
|
608
|
+
str = NKF.nkf("-s", "���ܡ���\ta b\r\n").extend CharString
|
609
|
+
str.encoding = "Shift_JIS"
|
610
|
+
str.eol = "CRLF"
|
611
|
+
expected = 3
|
612
|
+
assert_equal(expected, str.count_blank_char)
|
613
|
+
end
|
614
|
+
def test_sjis_count_word()
|
615
|
+
str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
|
616
|
+
str.encoding = "Shift_JIS"
|
617
|
+
str.eol = "CRLF"
|
618
|
+
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
619
|
+
assert_equal(expected, str.count_word)
|
620
|
+
end
|
621
|
+
def test_sjis_count_ja_word()
|
622
|
+
str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
|
623
|
+
str.encoding = "Shift_JIS"
|
624
|
+
str.eol = "CRLF"
|
625
|
+
expected = 3
|
626
|
+
assert_equal(expected, str.count_ja_word)
|
627
|
+
end
|
628
|
+
def test_sjis_count_latin_valid_word()
|
629
|
+
str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
|
630
|
+
str.encoding = "Shift_JIS"
|
631
|
+
str.eol = "CRLF"
|
632
|
+
expected = 2
|
633
|
+
assert_equal(expected, str.count_latin_valid_word)
|
634
|
+
end
|
635
|
+
def test_sjis_count_ja_valid_word()
|
636
|
+
str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
|
637
|
+
str.encoding = "Shift_JIS"
|
638
|
+
str.eol = "CRLF"
|
639
|
+
expected = 2
|
640
|
+
assert_equal(expected, str.count_ja_valid_word)
|
641
|
+
end
|
642
|
+
def test_sjis_count_valid_word()
|
643
|
+
str = NKF.nkf("-s", "���ܡ���a b --\r\n").extend CharString
|
644
|
+
str.encoding = "Shift_JIS"
|
645
|
+
str.eol = "CRLF"
|
646
|
+
expected = 4
|
647
|
+
assert_equal(expected, str.count_valid_word)
|
648
|
+
end
|
649
|
+
def test_sjis_count_line()
|
650
|
+
str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
651
|
+
str.encoding = "Shift_JIS"
|
652
|
+
str.eol = "CRLF"
|
653
|
+
expected = 6
|
654
|
+
assert_equal(expected, str.count_line)
|
655
|
+
end
|
656
|
+
def test_sjis_count_graph_line()
|
657
|
+
str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
658
|
+
str.encoding = "Shift_JIS"
|
659
|
+
str.eol = "CRLF"
|
660
|
+
expected = 3
|
661
|
+
assert_equal(expected, str.count_graph_line)
|
662
|
+
end
|
663
|
+
def test_sjis_count_empty_line()
|
664
|
+
str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
665
|
+
str.encoding = "Shift_JIS"
|
666
|
+
str.eol = "CRLF"
|
667
|
+
expected = 1
|
668
|
+
assert_equal(expected, str.count_empty_line)
|
669
|
+
end
|
670
|
+
def test_sjis_count_blank_line()
|
671
|
+
str = NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
672
|
+
str.encoding = "Shift_JIS"
|
673
|
+
str.eol = "CRLF"
|
674
|
+
expected = 2
|
675
|
+
assert_equal(expected, str.count_blank_line)
|
676
|
+
end
|
677
|
+
|
678
|
+
# test UTF8 module
|
679
|
+
def test_utf8_split_to_word()
|
680
|
+
str = NKF.nkf("-E -w", "���ܸ��ʸ��foo bar").extend CharString
|
681
|
+
str.encoding = "UTF-8"
|
682
|
+
expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c| NKF.nkf("-E -w", c)}
|
683
|
+
assert_equal(expected, str.split_to_word)
|
684
|
+
end
|
685
|
+
def test_utf8_split_to_word_kanhira()
|
686
|
+
str = NKF.nkf("-E -w", "���ܸ��ʸ��").extend CharString
|
687
|
+
str.encoding = "UTF-8"
|
688
|
+
expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
|
689
|
+
assert_equal(expected, str.split_to_word)
|
690
|
+
end
|
691
|
+
def test_utf8_split_to_word_katahira()
|
692
|
+
str = NKF.nkf("-E -w", "�������ʤ�ʸ��").extend CharString
|
693
|
+
str.encoding = "UTF-8"
|
694
|
+
expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
|
695
|
+
assert_equal(expected, str.split_to_word)
|
696
|
+
end
|
697
|
+
def test_utf8_split_to_word_kataonbiki()
|
698
|
+
str = NKF.nkf("-E -w", "��ӡ��λ���").extend CharString
|
699
|
+
str.encoding = "UTF-8"
|
700
|
+
expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-E -w", c)}
|
701
|
+
assert_equal(expected, str.split_to_word)
|
702
|
+
end
|
703
|
+
def test_utf8_split_to_word_hiraonbiki()
|
704
|
+
str = NKF.nkf("-E -w", "���ӡ���").extend CharString
|
705
|
+
str.encoding = "UTF-8"
|
706
|
+
expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-E -w", c)}
|
707
|
+
assert_equal(expected, str.split_to_word)
|
708
|
+
end
|
709
|
+
def test_utf8_split_to_word_latinmix()
|
710
|
+
str = NKF.nkf("-E -w", "���ܸ��Latin��ʸ��").extend CharString
|
711
|
+
str.encoding = "UTF-8"
|
712
|
+
expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
|
713
|
+
assert_equal(expected, str.split_to_word)
|
714
|
+
end
|
715
|
+
def test_utf8_split_to_char()
|
716
|
+
str = NKF.nkf("-E -w", "���ܸ�a b").extend CharString
|
717
|
+
str.encoding = "UTF-8" #<= needed to pass the test
|
718
|
+
str.eol = "LF" #<= needed to pass the test
|
719
|
+
expected = ["��", "��", "��", "a", " ", "b"].collect{|c| NKF.nkf("-E -w", c)}
|
720
|
+
assert_equal(expected, str.split_to_char)
|
721
|
+
end
|
722
|
+
def test_utf8_split_to_char_with_cr()
|
723
|
+
str = NKF.nkf("-E -w", "���ܸ�a b\r").extend CharString
|
724
|
+
str.encoding = "UTF-8" #<= needed to pass the test
|
725
|
+
str.eol = "CR"
|
726
|
+
expected = ["��","��","��","a"," ","b","\r"].collect{|c| NKF.nkf("-E -w", c)}
|
727
|
+
assert_equal(expected, str.split_to_char)
|
728
|
+
end
|
729
|
+
def test_utf8_split_to_char_with_lf()
|
730
|
+
str = NKF.nkf("-E -w", "���ܸ�a b\n").extend CharString
|
731
|
+
str.encoding = "UTF-8" #<= needed to pass the test
|
732
|
+
str.eol = "LF"
|
733
|
+
expected = ["��","��","��","a"," ","b","\n"].collect{|c| NKF.nkf("-E -w", c)}
|
734
|
+
assert_equal(expected, str.split_to_char)
|
735
|
+
end
|
736
|
+
def test_utf8_split_to_char_with_crlf()
|
737
|
+
str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
|
738
|
+
str.encoding = "UTF-8"#<= needed to pass the test
|
739
|
+
str.eol = "CRLF"
|
740
|
+
expected = ["��","��","��","a"," ","b","\r\n"].collect{|c| NKF.nkf("-E -w", c)}
|
741
|
+
assert_equal(expected, str.split_to_char)
|
742
|
+
end
|
743
|
+
def test_utf8_count_char()
|
744
|
+
str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
|
745
|
+
str.encoding = "UTF-8" #<= needed to pass the test
|
746
|
+
str.eol = "CRLF"
|
747
|
+
expected = 7
|
748
|
+
assert_equal(expected, str.count_char)
|
749
|
+
end
|
750
|
+
def test_utf8_count_latin_graph_char()
|
751
|
+
str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
|
752
|
+
str.encoding = "UTF-8" #<= needed to pass the test
|
753
|
+
str.eol = "CRLF"
|
754
|
+
expected = 2
|
755
|
+
assert_equal(expected, str.count_latin_graph_char)
|
756
|
+
end
|
757
|
+
def test_utf8_count_ja_graph_char()
|
758
|
+
str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
|
759
|
+
str.encoding = "UTF-8" #<= needed to pass the test
|
760
|
+
str.eol = "CRLF"
|
761
|
+
expected = 3
|
762
|
+
assert_equal(expected, str.count_ja_graph_char)
|
763
|
+
end
|
764
|
+
def test_utf8_count_graph_char()
|
765
|
+
str = NKF.nkf("-E -w", "���ܸ�a b\r\n").extend CharString
|
766
|
+
str.encoding = "UTF-8" #<= needed to passs the test
|
767
|
+
str.eol = "CRLF"
|
768
|
+
expected = 5
|
769
|
+
assert_equal(expected, str.count_graph_char)
|
770
|
+
end
|
771
|
+
def test_utf8_count_latin_blank_char()
|
772
|
+
str = NKF.nkf("-E -w", "���ܸ�\ta b\r\n").extend CharString
|
773
|
+
str.encoding = "UTF-8"
|
774
|
+
str.eol = "CRLF"
|
775
|
+
expected = 2
|
776
|
+
assert_equal(expected, str.count_latin_blank_char)
|
777
|
+
end
|
778
|
+
def test_utf8_count_ja_blank_char()
|
779
|
+
str = NKF.nkf("-E -w", "���ܡ���\ta b\r\n").extend CharString
|
780
|
+
str.encoding = "UTF-8"
|
781
|
+
str.eol = "CRLF"
|
782
|
+
expected = 1
|
783
|
+
assert_equal(expected, str.count_ja_blank_char)
|
784
|
+
end
|
785
|
+
def test_utf8_count_blank_char()
|
786
|
+
str = NKF.nkf("-E -w", "���ܡ���\ta b\r\n").extend CharString
|
787
|
+
str.encoding = "UTF-8"
|
788
|
+
str.eol = "CRLF"
|
789
|
+
expected = 3
|
790
|
+
assert_equal(expected, str.count_blank_char)
|
791
|
+
end
|
792
|
+
def test_utf8_count_word()
|
793
|
+
str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
|
794
|
+
str.encoding = "UTF-8"
|
795
|
+
str.eol = "CRLF"
|
796
|
+
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
797
|
+
assert_equal(expected, str.count_word)
|
798
|
+
end
|
799
|
+
def test_utf8_count_ja_word()
|
800
|
+
str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
|
801
|
+
str.encoding = "UTF-8"
|
802
|
+
str.eol = "CRLF"
|
803
|
+
expected = 3
|
804
|
+
assert_equal(expected, str.count_ja_word)
|
805
|
+
end
|
806
|
+
def test_utf8_count_latin_valid_word()
|
807
|
+
str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
|
808
|
+
str.encoding = "UTF-8"
|
809
|
+
str.eol = "CRLF"
|
810
|
+
expected = 2
|
811
|
+
assert_equal(expected, str.count_latin_valid_word)
|
812
|
+
end
|
813
|
+
def test_utf8_count_ja_valid_word()
|
814
|
+
str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
|
815
|
+
str.encoding = "UTF-8"
|
816
|
+
str.eol = "CRLF"
|
817
|
+
expected = 2
|
818
|
+
assert_equal(expected, str.count_ja_valid_word)
|
819
|
+
end
|
820
|
+
def test_utf8_count_valid_word()
|
821
|
+
str = NKF.nkf("-E -w", "���ܡ���a b --\r\n").extend CharString
|
822
|
+
str.encoding = "UTF-8"
|
823
|
+
str.eol = "CRLF"
|
824
|
+
expected = 4
|
825
|
+
assert_equal(expected, str.count_valid_word)
|
826
|
+
end
|
827
|
+
def test_utf8_count_line()
|
828
|
+
str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
829
|
+
str.encoding = "UTF-8"
|
830
|
+
str.eol = "CRLF"
|
831
|
+
expected = 6
|
832
|
+
assert_equal(expected, str.count_line)
|
833
|
+
end
|
834
|
+
def test_utf8_count_graph_line()
|
835
|
+
str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
836
|
+
str.encoding = "UTF-8"
|
837
|
+
str.eol = "CRLF"
|
838
|
+
expected = 3
|
839
|
+
assert_equal(expected, str.count_graph_line)
|
840
|
+
end
|
841
|
+
def test_utf8_count_empty_line()
|
842
|
+
str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
843
|
+
str.encoding = "UTF-8"
|
844
|
+
str.eol = "CRLF"
|
845
|
+
expected = 1
|
846
|
+
assert_equal(expected, str.count_empty_line)
|
847
|
+
end
|
848
|
+
def test_utf8_count_blank_line()
|
849
|
+
str = NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar").extend CharString
|
850
|
+
str.encoding = "UTF-8"
|
851
|
+
str.eol = "CRLF"
|
852
|
+
expected = 2
|
853
|
+
assert_equal(expected, str.count_blank_line)
|
854
|
+
end
|
855
|
+
|
856
|
+
# test module functions
|
857
|
+
|
858
|
+
def assert_guess_encoding(expected, str)
|
859
|
+
unless CharString.ruby_m17n?
|
860
|
+
assert_equal(expected, CharString.guess_encoding_using_pureruby(str))
|
861
|
+
assert_equal(expected, CharString.guess_encoding_using_iconv(str))
|
862
|
+
end
|
863
|
+
assert_equal(expected, CharString.guess_encoding(str))
|
864
|
+
end
|
865
|
+
|
866
|
+
def test_guess_encoding_nil()
|
867
|
+
str = nil
|
868
|
+
expected = nil
|
869
|
+
assert_guess_encoding(expected, str)
|
870
|
+
end
|
871
|
+
# def test_guess_encoding_binary()
|
872
|
+
# str = "\xFF\xFF"
|
873
|
+
# expected = "BINARY"
|
874
|
+
# assert_equal(expected, CharString.guess_encoding(str))
|
875
|
+
# end
|
876
|
+
def test_guess_encoding_unknown()
|
877
|
+
if CharString.ruby_m17n?
|
878
|
+
str = "".encode("BINARY") # cannot put invalid string literal
|
879
|
+
expected = "ASCII-8BIT"
|
880
|
+
else
|
881
|
+
str = "\xff\xff\xff\xff" # "\xDE\xAD\xBE\xEF"
|
882
|
+
expected = "UNKNOWN"
|
883
|
+
end
|
884
|
+
assert_guess_encoding(expected, str)
|
885
|
+
end
|
886
|
+
def test_guess_encoding_ascii_1()
|
887
|
+
if CharString.ruby_m17n?
|
888
|
+
str = "ASCII string".encode("US-ASCII")
|
889
|
+
expected = "US-ASCII"
|
890
|
+
else
|
891
|
+
str = "ASCII string"
|
892
|
+
expected = "US-ASCII"
|
893
|
+
end
|
894
|
+
assert_guess_encoding(expected, str)
|
895
|
+
end
|
896
|
+
def test_guess_encoding_ascii_2()
|
897
|
+
if CharString.ruby_m17n?
|
898
|
+
str = "abc\ndef\n".encode("US-ASCII")
|
899
|
+
expected = "US-ASCII"
|
900
|
+
else
|
901
|
+
str = "abc\ndef\n"
|
902
|
+
expected = "US-ASCII"
|
903
|
+
end
|
904
|
+
assert_guess_encoding(expected, str)
|
905
|
+
end
|
906
|
+
# CharString.guess_encoding mistakes JIS for ASCII sometimes, due to Iconv.
|
907
|
+
# def test_guess_encoding_jis_1()
|
908
|
+
# str = NKF.nkf("-j", "�����ȥ������ʤȤҤ餬��\n")
|
909
|
+
# expected = "JIS"
|
910
|
+
# assert_guess_encoding(expected, str)
|
911
|
+
# end
|
912
|
+
# def test_guess_encoding_jis_2()
|
913
|
+
# str = NKF.nkf("-j", "�����ȥ������ʤȤҤ餬�ʤ�Latin��ʸ���ȶ���( )�ȵ���@\n" * 100)
|
914
|
+
# expected = "JIS"
|
915
|
+
# assert_guess_encoding(expected, str)
|
916
|
+
# end
|
917
|
+
def test_guess_encoding_eucjp_1()
|
918
|
+
str = NKF.nkf("-e", "���ܸ��Latin��ʸ��")
|
919
|
+
expected = "EUC-JP"
|
920
|
+
assert_guess_encoding(expected, str)
|
921
|
+
end
|
922
|
+
def test_guess_encoding_eucjp_2()
|
923
|
+
str = NKF.nkf('-e', "�����ȥ������ʤȤҤ餬�ʤ�Latin��ʸ���ȶ���( )\n" * 10)
|
924
|
+
expected = "EUC-JP"
|
925
|
+
assert_guess_encoding(expected, str)
|
926
|
+
end
|
927
|
+
def test_guess_encoding_eucjp_3()
|
928
|
+
str = NKF.nkf('-e', "����Ф�ϡ����̾���ϤޤĤ�ȤǤ���\nRuby���ä��Τϻ�Ǥ������Ruby Hacker�Ǥ���\n")
|
929
|
+
expected = "EUC-JP"
|
930
|
+
assert_guess_encoding(expected, str)
|
931
|
+
end
|
932
|
+
def test_guess_encoding_sjis_1()
|
933
|
+
str = NKF.nkf("-s", "���ܸ��Latin��ʸ��")
|
934
|
+
expected = "Shift_JIS"
|
935
|
+
assert_guess_encoding(expected, str)
|
936
|
+
end
|
937
|
+
def test_guess_encoding_sjis_2()
|
938
|
+
str = NKF.nkf('-s', "������\n�������ʤ�\n�Ҥ餬�ʤ�\nLatin")
|
939
|
+
expected = "Shift_JIS"
|
940
|
+
assert_guess_encoding(expected, str)
|
941
|
+
end
|
942
|
+
def test_guess_encoding_utf8_1()
|
943
|
+
str = NKF.nkf("-E -w", "���ܸ��Latin��ʸ��")
|
944
|
+
expected = "UTF-8"
|
945
|
+
assert_guess_encoding(expected, str)
|
946
|
+
end
|
947
|
+
def test_guess_encoding_utf8_2()
|
948
|
+
str = NKF.nkf("-E -w", "������\n�ˤۤؤ�\n")
|
949
|
+
expected = "UTF-8"
|
950
|
+
assert_guess_encoding(expected, str)
|
951
|
+
end
|
952
|
+
|
953
|
+
def test_guess_eol_nil()
|
954
|
+
str = nil
|
955
|
+
expected = nil
|
956
|
+
assert_equal(expected, CharString.guess_eol(str))
|
957
|
+
end
|
958
|
+
def test_guess_eol_empty()
|
959
|
+
str = ""
|
960
|
+
expected = "NONE"
|
961
|
+
assert_equal(expected, CharString.guess_eol(str))
|
962
|
+
end
|
963
|
+
def test_guess_eol_none()
|
964
|
+
str = "foo bar"
|
965
|
+
expected = "NONE"
|
966
|
+
assert_equal(expected, CharString.guess_eol(str))
|
967
|
+
end
|
968
|
+
def test_guess_eol_cr()
|
969
|
+
str = "foo bar\r"
|
970
|
+
expected = "CR"
|
971
|
+
assert_equal(expected, CharString.guess_eol(str))
|
972
|
+
end
|
973
|
+
def test_guess_eol_lf()
|
974
|
+
str = "foo bar\n"
|
975
|
+
expected = "LF"
|
976
|
+
assert_equal(expected, CharString.guess_eol(str))
|
977
|
+
end
|
978
|
+
def test_guess_eol_crlf()
|
979
|
+
str = "foo bar\r\n"
|
980
|
+
expected = "CRLF"
|
981
|
+
assert_equal(expected, CharString.guess_eol(str))
|
982
|
+
end
|
983
|
+
def test_guess_eol_mixed()
|
984
|
+
str = "foo\rbar\nbaz\r\n"
|
985
|
+
expected = "UNKNOWN"
|
986
|
+
assert_equal(expected, CharString.guess_eol(str))
|
987
|
+
end
|
988
|
+
def test_guess_eol_cr2()
|
989
|
+
str = "foo\rbar\rbaz\r".extend CharString
|
990
|
+
expected = "CR"
|
991
|
+
assert_equal(expected, CharString.guess_eol(str))
|
992
|
+
end
|
993
|
+
def test_guess_eol_lf2()
|
994
|
+
str = "foo\nbar\nbaz\n".extend CharString
|
995
|
+
expected = "LF"
|
996
|
+
assert_equal(expected, CharString.guess_eol(str))
|
997
|
+
end
|
998
|
+
def test_guess_eol_crlf2()
|
999
|
+
str = "foo\r\nbar\r\nbaz\r\n".extend CharString
|
1000
|
+
expected = "CRLF"
|
1001
|
+
assert_equal(expected, CharString.guess_eol(str))
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
def teardown()
|
1005
|
+
#
|
1006
|
+
end
|
1007
|
+
|
1008
|
+
end
|