icu4r 0.1.3.2006.01.26

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,381 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ class UnicodeStringTest < Test::Unit::TestCase
4
+
5
+ def test_string
6
+ a = u("абвг", "utf8")
7
+ b = u("абвг", "utf8")
8
+ assert_equal(a,b )
9
+ end
10
+
11
+ def test_casecmp
12
+ assert_equal(0, u("Сцуко").casecmp("сЦуКо".u))
13
+ assert_equal(-1, u("Сцук").casecmp("сЦуКо".u))
14
+ assert_equal(1, u("Сцуко").casecmp("сЦуК".u))
15
+ end
16
+
17
+ def test_match
18
+ assert_match(ure("абвг"), u("абвг"))
19
+ assert("аавг".u !~ ure("^$"))
20
+ assert("авб\n".u !~ ure("^$"))
21
+ assert("абв".u !~ ure("^г*$"))
22
+ assert_equal("".u, ("абв".u =~ ure("г*$"))[0])
23
+ assert("".u =~ ure("^$"))
24
+ assert("абвабв".u =~ ure( ".*а") )
25
+ assert("абвабв".u =~ ure( ".*в") )
26
+ assert("абвабв".u =~ ure( ".*?а") )
27
+ assert("абвабв".u =~ ure( ".*?в") )
28
+ assert(ure("(.|\n)*?\n(б|\n)") =~ u("а\nб\n\n"))
29
+ end
30
+
31
+ def test_sub
32
+ x = "a.gif".u
33
+ assert_equal("gif".u, x.sub(ure(".*\\.([^\\.]+)$"), "$1".u))
34
+ assert_equal("b.gif".u, x.sub(ure(".*\\.([^\\.]+)$"), "b.$1".u))
35
+ assert_equal(x, "a.gif".u)
36
+ x.sub!(/gif/.U, ''.u)
37
+ assert_equal(x, "a.".u)
38
+ x= "1234561234".u
39
+ x.sub!(/123/.U, "".u)
40
+ assert_equal(x, "4561234".u)
41
+
42
+ end
43
+
44
+
45
+ def test_case_fold
46
+ assert_equal("А".u, "а".u.upcase!)
47
+ assert_equal("а".u, ("А".u.downcase!))
48
+
49
+ s = "аБв".u
50
+ s.upcase
51
+ assert_equal("аБв".u, s)
52
+ s.upcase!
53
+ assert_equal("АБВ".u, s)
54
+
55
+ s = "аБв".u
56
+ s.downcase
57
+ assert_equal("аБв".u, s)
58
+ s.downcase!
59
+ assert_equal("абв".u, s)
60
+ end
61
+
62
+ def test_index
63
+ assert_equal( "hello".u.rindex('e'.u), 1)
64
+ assert_equal( "hello".u.rindex('l'.u) , 3)
65
+ assert_equal( "hello".u.rindex('a'.u), nil)
66
+ assert_equal( "hello".u.index('e'.u),1)
67
+ assert_equal( "hello".u.index('lo'.u),3)
68
+ assert_equal( "hello".u.index('a'.u), nil)
69
+ assert_equal( "hello".u.index(ure('[aeiou]'), -3), 4)
70
+ assert_equal( "hello".u.rindex(ure('[aeiou]'), -2), 1)
71
+ end
72
+
73
+ def test_insert
74
+ assert_equal("abcd".u.insert(0, 'X'.u) , "Xabcd".u)
75
+ assert_equal("abcd".u.insert(3, 'X'.u) , "abcXd".u)
76
+ assert_equal("abcd".u.insert(4, 'X'.u) , "abcdX".u)
77
+ assert_equal("abcd".u.insert(-3, 'X'.u) , "abXcd".u)
78
+ assert_equal("abcd".u.insert(-1, 'X'.u) , "abcdX".u)
79
+ end
80
+
81
+ def test_include
82
+ assert( "hello".u.include?("lo".u))
83
+ assert(!("hello".u.include?("ol".u)))
84
+ end
85
+
86
+ def test_init
87
+ assert_equal( "нах!".u, UString.new("нах!".u))
88
+ a = "ГНУ!".u
89
+ a.replace("ФИГНУ!".u)
90
+ assert_equal(a, "ФИГНУ!".u)
91
+ assert_equal(a, a.clone)
92
+ end
93
+
94
+ def test_aref
95
+ a = "hello there".u
96
+ assert_equal('e'.u, a[1]) #=> 'e'
97
+ assert_equal('ell'.u, a[1,3]) #=> "ell"
98
+ assert_equal('ell'.u, a[1..3]) #=> "ell"
99
+ assert_equal('er'.u, a[-3,2]) #=> "er"
100
+ assert_equal('her'.u, a[-4..-2]) #=> "her"
101
+ assert_nil(a[12..-1]) #=> nil
102
+ assert_equal(''.u, a[-2..-4]) #=> ""
103
+ assert_equal('ell'.u, a[ure('[aeiou](.)\1')]) #=> "ell"
104
+ assert_equal('ell'.u, a[ure('[aeiou](.)\1'), 0]) #=> "ell"
105
+ assert_equal('l'.u, a[ure('[aeiou](l)\1'), 1]) #=> "l"
106
+ assert_nil( a[ure('[aeiou](.)$1'), 2]) #=> nil
107
+ assert_equal('lo'.u, a["lo".u]) #=> "lo"
108
+ assert_nil(a["bye".u]) #=> nil
109
+ end
110
+
111
+ def test_slice_bang
112
+ string = "this is a string".u
113
+ assert_equal(string.slice!(2) , 'i'.u)
114
+ assert_equal(string.slice!(3..6) , " is ".u)
115
+ assert_equal(string.slice!(ure("s.*t")) , "sa st".u)
116
+ assert_equal(string.slice!("r".u) , "r".u)
117
+ assert_equal(string , "thing".u)
118
+ a = "test".u
119
+ a[0] = "BEA".u
120
+ assert_equal("BEAest".u, a)
121
+ end
122
+
123
+ def test_gsub
124
+ assert_equal("hello".u.gsub(ure("[aeiou]"), '*'.u) , "h*ll*".u)
125
+ assert_equal("hello".u.gsub(ure("([aeiou])"), '<$1>'.u) , "h<e>ll<o>".u)
126
+ i = 0
127
+ assert_equal("12345".u , "hello".u.gsub(ure(".")) {|s| i+=1; i.to_s})
128
+ assert_equal("214365".u, "123456".u.gsub(ure("(.)(.)")) {|s| s[2] + s[1] })
129
+ a = "test".u
130
+ a.gsub!(/t/.U, a)
131
+ assert_equal("testestest".u, a)
132
+ end
133
+
134
+ def test_ure_case_eq
135
+ a = "HELLO".u
136
+ v = case a
137
+ when ure("^[a-z]*$"); "Lower case"
138
+ when ure("^[A-Z]*$"); "Upper case"
139
+ else; "Mixed case"
140
+ end
141
+ assert_equal('Upper case', v)
142
+ end
143
+
144
+ # UString::strcoll("ÆSS".u, "AEß".u, "de", 0)
145
+ def test_empty
146
+ assert(! "hello".u.empty?)
147
+ assert("".empty?)
148
+ assert("test".u.clear.empty?)
149
+ assert(" \t\n".u.strip.empty?)
150
+ end
151
+
152
+ def test_clear
153
+ a = "test".u
154
+ a.clear
155
+ assert_equal(0, a.length)
156
+ end
157
+
158
+ def test_length
159
+ assert_equal(10, "12345АБВГД".u.length)
160
+ assert_equal(0,"".u.length)
161
+ assert_equal(3,"abc".u.length)
162
+ end
163
+
164
+ def test_replace
165
+ s = "hello".u
166
+ s.replace("world".u)
167
+ assert_equal(s, "world".u)
168
+ end
169
+
170
+ def test_cmp
171
+ assert_equal("абвгде".u <=> "абвгд".u , 1 )
172
+ assert_equal("абвгде".u <=> "абвгде".u , 0 )
173
+ assert_equal("абвгде".u <=> "абвгдеж".u , -1 )
174
+ assert_equal("абвгде".u <=> "АБВГДЕ".u , -1 ) # UCA
175
+ end
176
+
177
+ def test_plus
178
+ assert_equal("сложение".u, "сло".u + "жение".u)
179
+ end
180
+
181
+ def test_times
182
+ assert_equal("ААААА".u, "А".u * 5)
183
+ end
184
+
185
+ def test_concat
186
+ assert_equal("сложение".u, "сло".u << "жение".u)
187
+ assert_equal("сложение".u, "сло".u.concat("жение".u))
188
+ a = "сло".u
189
+ a << "жение".u
190
+ assert_equal("сложение".u, a)
191
+ end
192
+
193
+ def test_search
194
+ a = "A quick brown fox jumped over the lazy fox dancing foxtrote".u
195
+ assert_equal(a.search("fox".u) , [14..16, 39..41, 51..53])
196
+ assert_equal(a.search("FoX".u) , [])
197
+ assert_equal(a.search("FoX".u, :ignore_case => true) , [14..16, 39..41, 51..53])
198
+ assert_equal(a.search("FoX".u, :ignore_case => true, :whole_words => true) , [14..16, 39..41])
199
+ assert_equal(a.search("FoX".u, :ignore_case => true, :whole_words => true, :limit => 1) , [14..16])
200
+
201
+ b = "Iñtërnâtiônàlizætiøn îs cọmpłèx".u.upcase
202
+ assert_equal(b, "IÑTËRNÂTIÔNÀLIZÆTIØN ÎS CỌMPŁÈX".u)
203
+ assert_equal(b.search("nâtiôn".u, :locale => "en") , [])
204
+ assert_equal(b.search("nation".u) , [])
205
+ assert_equal(b.search("nation".u, :locale => "en", :ignore_case_accents => true) , [5..10])
206
+ assert_equal(b.search("nâtiôn".u, :locale => "en", :ignore_case => true) , [5..10])
207
+ assert_equal(b.search("zaeti".u, :locale => "en" ) , [])
208
+ assert_equal(b.search("zaeti".u, :locale => "en", :ignore_case => true) , [])
209
+ assert_equal(b.search("zaeti".u, :locale => "en", :ignore_case_accents => true) , [14..17])
210
+ assert_equal("İSTANBUL".u.search("istanbul".u, :locale => 'tr', :ignore_case => true), [0..7])
211
+ assert_equal("ёжий".u.norm_D.search("ЕЖИЙ".u, :locale => 'ru', :canonical => true, :ignore_case_accents => true), [0..4])
212
+ end
213
+
214
+ def test_dollar_sign_regexp
215
+ assert_equal("te$et".u, "test".u.gsub(/s/.U, '$e'.u))
216
+ end
217
+
218
+ def test_codepoints
219
+ a=[0x01234, 0x0434, 0x1D7D9, ?t, ?e, ?s]
220
+ b=a.pack("U*").u
221
+ assert_equal(a, b.codepoints)
222
+ assert_equal(b, a.to_u)
223
+ end
224
+
225
+ def test_chars
226
+ chr = ["I", "Ñ", "T", "Ë", "R", "N", "Â", "T", "I", "Ô", "N", "À", "L", "I", "Z", "Æ", "T", "I", "Ø", "N" ]
227
+ chr = chr.collect {|s| s.to_u.norm_C}
228
+ assert_equal(chr, "Iñtërnâtiônàlizætiøn".u.upcase.norm_D.chars)
229
+
230
+ end
231
+
232
+
233
+ def test_fmt
234
+ assert_equal("b a".u, "{1} {0}".u.fmt("en", "a".u, "b".u))
235
+ assert_equal("12,345.56".u, "{0, number}".u.fmt("en", 12345.56))
236
+ assert_equal("$12,345.56".u, "{0, number, currency}".u.fmt("en_US", 12345.56))
237
+ assert_equal("20:15:01 13/01/2006".u, "{0,date,HH:mm:ss dd/MM/yyyy}".u.fmt("en", Time.local(2006,"jan",13,20,15,1)))
238
+ end
239
+
240
+ def test_norm
241
+ v="Iñtërnâtiônàlizætiøn".u
242
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_C)
243
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_D)
244
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_D.norm_FCD)
245
+ assert_equal("Iñtërnâtiônàlizætiøn".u,v.norm_D.norm_KC)
246
+ end
247
+
248
+ def test_parse_date
249
+ assert_equal(Time.local(2006,"jan",13,20,15,1),
250
+ "HH:mm:ss E dd/MM/yyyy".u.parse_date("en", "20:15:01 Fri 13/01/2006".u))
251
+ assert_equal(Time.local(2006, "jan", 18, 0, 0), "d MMMM, yyyy".u.parse_date("ar_SY", "١٨ كانون الثاني, ٢٠٠٦".u))
252
+ end
253
+
254
+ def test_scan
255
+ a = "cruel world".u
256
+ assert_equal(a.scan(/\w+/.U) ,["cruel".u , "world".u ])
257
+ assert_equal(a.scan(/.../.U) ,["cru".u , "el ".u , "wor".u ])
258
+ assert_equal(a.scan(/(...)/.U) ,["cru".u , "el ".u , "wor".u ])
259
+ assert_equal(a.scan(/(..)(..)/.U) ,[["cr".u , "ue".u ], ["l ".u , "wo".u ]] )
260
+ end
261
+
262
+ def test_split
263
+ re = URegexp.new("[,:/]".u)
264
+ assert_equal(["split test".u , "west".u , "best".u , "east".u ], re.split("split test,west:best/east".u, nil))
265
+ assert_equal(["split test".u, "west:best/east".u], re.split("split test,west:best/east".u, 2))
266
+ end
267
+
268
+ def test_strcoll
269
+ assert_equal(0, UString::strcoll("a".u, "a".u))
270
+ assert_equal(0, UString::strcoll("ой её".u, "ОИ ЕЕ".u, "ru", 0))
271
+ assert_equal(0, UString::strcoll("ой её".u, "ОЙ ЕЁ".u, "ru", 1))
272
+ assert_equal(-1, UString::strcoll("y".u, "k".u, "lv"))
273
+ assert_equal(1, UString::strcoll("я".u, "а".u))
274
+ assert_equal(1, UString::strcoll("я".u, "А".u, "ru"))
275
+ assert_equal(0, UString::strcoll("İSTANBUL".u, "istanbul".u, "tr", 0))
276
+ end
277
+
278
+ def test_gsub_block
279
+ a = "АБРАКАДАБРА".u
280
+ r = URegexp.new("(.)(.)(А)".u, URegexp::IGNORECASE)
281
+ b = a.gsub(r) do |m|
282
+ assert_equal("ава".u, "бравада".u.gsub(r) {|v| v[3]} )
283
+ m[3] + m[2] + m[1]
284
+ end
285
+ assert_equal("ААРБКАДААРБ".u, b)
286
+ end
287
+
288
+ def test_match_range
289
+ t = "test\ntext".u
290
+ m = (t =~ /^.+$/m.U)
291
+ assert_equal('test'.u, m[0])
292
+ assert_equal(0..3, m.range(0))
293
+ end
294
+
295
+ def test_resbundle
296
+ b = UResourceBundle.open(nil, "en")
297
+ assert_equal("Russia".u, b["Countries"]["RU"])
298
+ b = UResourceBundle.open(nil, "ru")
299
+ assert_equal("Россия".u, b["Countries"]["RU"])
300
+
301
+ end
302
+
303
+ def test_translit
304
+ assert_equal('zees ees A tfs t'.u, "This is A test".u.translit("null".u, "a>b;b>c;c>d;d>e;e>f;i>ee;[Tt]h>z;t>\\ t".u))
305
+ assert_equal("matsumoto yukihiro".u.translit("Latin-Hiragana".u), "まつもと ゆきひろ".u)
306
+ end
307
+
308
+ def test_parse_double
309
+ assert_equal(456, "456".u.to_f)
310
+ assert_equal("123,001".u.to_f("ru"), 123.001)
311
+ assert_equal("123,001".u.to_f("en"), 123001.0)
312
+ assert_equal("Got 123,001".u.to_f("en", "Got ###,###".u), 123001)
313
+ assert_equal(123.45, "١٢٣٫٤٥".u.to_f("ar_YE"))
314
+ end
315
+
316
+ def test_unescape
317
+ a = '\u0054\u0068\u0069\u0073\u0020\u0069\u0073\u0020\u0041\u0020\u0074\u0065\u0073\u0074\n!'
318
+ assert_equal("This is A test\n!", a.u.unescape.to_s)
319
+ end
320
+
321
+ def test_ranges
322
+ v = "\\x{1D7D9}\\x{1d7da}\\x{1d7db}!".u.unescape
323
+ assert_equal(7, v.length)
324
+ assert_equal(4, v.point_count)
325
+ assert_equal(0..0, v.conv_unit_range(0..1))
326
+ assert_equal(0..1, v.conv_unit_range(0..2))
327
+ assert_equal(0..3, v.conv_unit_range(0..-1))
328
+ assert_equal(2..3, v.conv_unit_range(-3..-1))
329
+
330
+ assert_equal(0..3, v.conv_point_range(0..1))
331
+ assert_equal(0..5, v.conv_point_range(0..2))
332
+ assert_equal(0..6, v.conv_point_range(0..-1))
333
+ assert_equal(4..6, v.conv_point_range(-2..-1))
334
+ end
335
+
336
+ def test_char_span
337
+ v = "ЁРШ ТВОЙУ МЕДДЬ".u.norm_D
338
+ assert_equal("ЁРШ".u, v.char_span(0,3))
339
+ assert_equal('\u0415\u0308\u0420'.u.unescape, v[0,3])
340
+ assert_equal(v.norm_C, v.char_span(0,-1))
341
+ end
342
+
343
+ def test_sentinel_bug
344
+ ("test" * 10).u.gsub(/e/.U, 'abracadabra'.u)
345
+ end
346
+
347
+ def test_string_change
348
+ a = " 123456789Aa ".u
349
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.downcase!; m} };
350
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
351
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.upcase!; m} }
352
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
353
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.lstrip!; m} }
354
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
355
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.rstrip!; m} }
356
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
357
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.strip!; m} }
358
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
359
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!(/Aa/.U); m} }
360
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
361
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!("Aa".u); m} }
362
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
363
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!(3,5); m} }
364
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
365
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.sub!(/Aa/.U, "BUG!".u); m} }
366
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
367
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.gsub!(/Aa/.U, "BUG!".u); m} }
368
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
369
+ assert_raise(RuntimeError) { a.scan(/\d/.U) { |m| a.gsub!(/Aa/.U, "BUG!".u); m} }
370
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
371
+ assert_raise(RuntimeError) { a.each_char { |m| a[2]= "BUG!".u } }
372
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
373
+
374
+ end
375
+ def test_1_to_u_to_s
376
+ assert_equal(
377
+ "\355\350\367\345\343\356 \355\345 \360\340\341\356\362\340\345\362 :( ?".to_u("cp-1251").to_s("utf-8"),
378
+ "\320\275\320\270\321\207\320\265\320\263\320\276 \320\275\320\265 \321\200\320\260\320\261\320\276\321\202\320\260\320\265\321\202 :( ?")
379
+ end
380
+
381
+ end
data/tools/doc.sh ADDED
@@ -0,0 +1,2 @@
1
+ #!/bin/sh
2
+ rdoc -T ./tools/km.rb -c utf-8 README docs/FORMATTING docs/UNICODE_REGEXPS *.c