icu4r 0.1.3.2006.01.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,381 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ class UnicodeStringTest < Test::Unit::TestCase
4
+
5
+ def test_string
6
+ a = u("абвг", "utf8")
7
+ b = u("абвг", "utf8")
8
+ assert_equal(a,b )
9
+ end
10
+
11
+ def test_casecmp
12
+ assert_equal(0, u("Сцуко").casecmp("сЦуКо".u))
13
+ assert_equal(-1, u("Сцук").casecmp("сЦуКо".u))
14
+ assert_equal(1, u("Сцуко").casecmp("сЦуК".u))
15
+ end
16
+
17
+ def test_match
18
+ assert_match(ure("абвг"), u("абвг"))
19
+ assert("аавг".u !~ ure("^$"))
20
+ assert("авб\n".u !~ ure("^$"))
21
+ assert("абв".u !~ ure("^г*$"))
22
+ assert_equal("".u, ("абв".u =~ ure("г*$"))[0])
23
+ assert("".u =~ ure("^$"))
24
+ assert("абвабв".u =~ ure( ".*а") )
25
+ assert("абвабв".u =~ ure( ".*в") )
26
+ assert("абвабв".u =~ ure( ".*?а") )
27
+ assert("абвабв".u =~ ure( ".*?в") )
28
+ assert(ure("(.|\n)*?\n(б|\n)") =~ u("а\nб\n\n"))
29
+ end
30
+
31
+ def test_sub
32
+ x = "a.gif".u
33
+ assert_equal("gif".u, x.sub(ure(".*\\.([^\\.]+)$"), "$1".u))
34
+ assert_equal("b.gif".u, x.sub(ure(".*\\.([^\\.]+)$"), "b.$1".u))
35
+ assert_equal(x, "a.gif".u)
36
+ x.sub!(/gif/.U, ''.u)
37
+ assert_equal(x, "a.".u)
38
+ x= "1234561234".u
39
+ x.sub!(/123/.U, "".u)
40
+ assert_equal(x, "4561234".u)
41
+
42
+ end
43
+
44
+
45
+ def test_case_fold
46
+ assert_equal("А".u, "а".u.upcase!)
47
+ assert_equal("а".u, ("А".u.downcase!))
48
+
49
+ s = "аБв".u
50
+ s.upcase
51
+ assert_equal("аБв".u, s)
52
+ s.upcase!
53
+ assert_equal("АБВ".u, s)
54
+
55
+ s = "аБв".u
56
+ s.downcase
57
+ assert_equal("аБв".u, s)
58
+ s.downcase!
59
+ assert_equal("абв".u, s)
60
+ end
61
+
62
+ def test_index
63
+ assert_equal( "hello".u.rindex('e'.u), 1)
64
+ assert_equal( "hello".u.rindex('l'.u) , 3)
65
+ assert_equal( "hello".u.rindex('a'.u), nil)
66
+ assert_equal( "hello".u.index('e'.u),1)
67
+ assert_equal( "hello".u.index('lo'.u),3)
68
+ assert_equal( "hello".u.index('a'.u), nil)
69
+ assert_equal( "hello".u.index(ure('[aeiou]'), -3), 4)
70
+ assert_equal( "hello".u.rindex(ure('[aeiou]'), -2), 1)
71
+ end
72
+
73
+ def test_insert
74
+ assert_equal("abcd".u.insert(0, 'X'.u) , "Xabcd".u)
75
+ assert_equal("abcd".u.insert(3, 'X'.u) , "abcXd".u)
76
+ assert_equal("abcd".u.insert(4, 'X'.u) , "abcdX".u)
77
+ assert_equal("abcd".u.insert(-3, 'X'.u) , "abXcd".u)
78
+ assert_equal("abcd".u.insert(-1, 'X'.u) , "abcdX".u)
79
+ end
80
+
81
+ def test_include
82
+ assert( "hello".u.include?("lo".u))
83
+ assert(!("hello".u.include?("ol".u)))
84
+ end
85
+
86
+ def test_init
87
+ assert_equal( "нах!".u, UString.new("нах!".u))
88
+ a = "ГНУ!".u
89
+ a.replace("ФИГНУ!".u)
90
+ assert_equal(a, "ФИГНУ!".u)
91
+ assert_equal(a, a.clone)
92
+ end
93
+
94
+ def test_aref
95
+ a = "hello there".u
96
+ assert_equal('e'.u, a[1]) #=> 'e'
97
+ assert_equal('ell'.u, a[1,3]) #=> "ell"
98
+ assert_equal('ell'.u, a[1..3]) #=> "ell"
99
+ assert_equal('er'.u, a[-3,2]) #=> "er"
100
+ assert_equal('her'.u, a[-4..-2]) #=> "her"
101
+ assert_nil(a[12..-1]) #=> nil
102
+ assert_equal(''.u, a[-2..-4]) #=> ""
103
+ assert_equal('ell'.u, a[ure('[aeiou](.)\1')]) #=> "ell"
104
+ assert_equal('ell'.u, a[ure('[aeiou](.)\1'), 0]) #=> "ell"
105
+ assert_equal('l'.u, a[ure('[aeiou](l)\1'), 1]) #=> "l"
106
+ assert_nil( a[ure('[aeiou](.)$1'), 2]) #=> nil
107
+ assert_equal('lo'.u, a["lo".u]) #=> "lo"
108
+ assert_nil(a["bye".u]) #=> nil
109
+ end
110
+
111
+ def test_slice_bang
112
+ string = "this is a string".u
113
+ assert_equal(string.slice!(2) , 'i'.u)
114
+ assert_equal(string.slice!(3..6) , " is ".u)
115
+ assert_equal(string.slice!(ure("s.*t")) , "sa st".u)
116
+ assert_equal(string.slice!("r".u) , "r".u)
117
+ assert_equal(string , "thing".u)
118
+ a = "test".u
119
+ a[0] = "BEA".u
120
+ assert_equal("BEAest".u, a)
121
+ end
122
+
123
+ def test_gsub
124
+ assert_equal("hello".u.gsub(ure("[aeiou]"), '*'.u) , "h*ll*".u)
125
+ assert_equal("hello".u.gsub(ure("([aeiou])"), '<$1>'.u) , "h<e>ll<o>".u)
126
+ i = 0
127
+ assert_equal("12345".u , "hello".u.gsub(ure(".")) {|s| i+=1; i.to_s})
128
+ assert_equal("214365".u, "123456".u.gsub(ure("(.)(.)")) {|s| s[2] + s[1] })
129
+ a = "test".u
130
+ a.gsub!(/t/.U, a)
131
+ assert_equal("testestest".u, a)
132
+ end
133
+
134
+ def test_ure_case_eq
135
+ a = "HELLO".u
136
+ v = case a
137
+ when ure("^[a-z]*$"); "Lower case"
138
+ when ure("^[A-Z]*$"); "Upper case"
139
+ else; "Mixed case"
140
+ end
141
+ assert_equal('Upper case', v)
142
+ end
143
+
144
+ # UString::strcoll("ÆSS".u, "AEß".u, "de", 0)
145
+ def test_empty
146
+ assert(! "hello".u.empty?)
147
+ assert("".empty?)
148
+ assert("test".u.clear.empty?)
149
+ assert(" \t\n".u.strip.empty?)
150
+ end
151
+
152
+ def test_clear
153
+ a = "test".u
154
+ a.clear
155
+ assert_equal(0, a.length)
156
+ end
157
+
158
+ def test_length
159
+ assert_equal(10, "12345АБВГД".u.length)
160
+ assert_equal(0,"".u.length)
161
+ assert_equal(3,"abc".u.length)
162
+ end
163
+
164
+ def test_replace
165
+ s = "hello".u
166
+ s.replace("world".u)
167
+ assert_equal(s, "world".u)
168
+ end
169
+
170
+ def test_cmp
171
+ assert_equal("абвгде".u <=> "абвгд".u , 1 )
172
+ assert_equal("абвгде".u <=> "абвгде".u , 0 )
173
+ assert_equal("абвгде".u <=> "абвгдеж".u , -1 )
174
+ assert_equal("абвгде".u <=> "АБВГДЕ".u , -1 ) # UCA
175
+ end
176
+
177
+ def test_plus
178
+ assert_equal("сложение".u, "сло".u + "жение".u)
179
+ end
180
+
181
+ def test_times
182
+ assert_equal("ААААА".u, "А".u * 5)
183
+ end
184
+
185
+ def test_concat
186
+ assert_equal("сложение".u, "сло".u << "жение".u)
187
+ assert_equal("сложение".u, "сло".u.concat("жение".u))
188
+ a = "сло".u
189
+ a << "жение".u
190
+ assert_equal("сложение".u, a)
191
+ end
192
+
193
+ def test_search
194
+ a = "A quick brown fox jumped over the lazy fox dancing foxtrote".u
195
+ assert_equal(a.search("fox".u) , [14..16, 39..41, 51..53])
196
+ assert_equal(a.search("FoX".u) , [])
197
+ assert_equal(a.search("FoX".u, :ignore_case => true) , [14..16, 39..41, 51..53])
198
+ assert_equal(a.search("FoX".u, :ignore_case => true, :whole_words => true) , [14..16, 39..41])
199
+ assert_equal(a.search("FoX".u, :ignore_case => true, :whole_words => true, :limit => 1) , [14..16])
200
+
201
+ b = "Iñtërnâtiônàlizætiøn îs cọmpłèx".u.upcase
202
+ assert_equal(b, "IÑTËRNÂTIÔNÀLIZÆTIØN ÎS CỌMPŁÈX".u)
203
+ assert_equal(b.search("nâtiôn".u, :locale => "en") , [])
204
+ assert_equal(b.search("nation".u) , [])
205
+ assert_equal(b.search("nation".u, :locale => "en", :ignore_case_accents => true) , [5..10])
206
+ assert_equal(b.search("nâtiôn".u, :locale => "en", :ignore_case => true) , [5..10])
207
+ assert_equal(b.search("zaeti".u, :locale => "en" ) , [])
208
+ assert_equal(b.search("zaeti".u, :locale => "en", :ignore_case => true) , [])
209
+ assert_equal(b.search("zaeti".u, :locale => "en", :ignore_case_accents => true) , [14..17])
210
+ assert_equal("İSTANBUL".u.search("istanbul".u, :locale => 'tr', :ignore_case => true), [0..7])
211
+ assert_equal("ёжий".u.norm_D.search("ЕЖИЙ".u, :locale => 'ru', :canonical => true, :ignore_case_accents => true), [0..4])
212
+ end
213
+
214
+ def test_dollar_sign_regexp
215
+ assert_equal("te$et".u, "test".u.gsub(/s/.U, '$e'.u))
216
+ end
217
+
218
+ def test_codepoints
219
+ a=[0x01234, 0x0434, 0x1D7D9, ?t, ?e, ?s]
220
+ b=a.pack("U*").u
221
+ assert_equal(a, b.codepoints)
222
+ assert_equal(b, a.to_u)
223
+ end
224
+
225
+ def test_chars
226
+ chr = ["I", "Ñ", "T", "Ë", "R", "N", "Â", "T", "I", "Ô", "N", "À", "L", "I", "Z", "Æ", "T", "I", "Ø", "N" ]
227
+ chr = chr.collect {|s| s.to_u.norm_C}
228
+ assert_equal(chr, "Iñtërnâtiônàlizætiøn".u.upcase.norm_D.chars)
229
+
230
+ end
231
+
232
+
233
+ def test_fmt
234
+ assert_equal("b a".u, "{1} {0}".u.fmt("en", "a".u, "b".u))
235
+ assert_equal("12,345.56".u, "{0, number}".u.fmt("en", 12345.56))
236
+ assert_equal("$12,345.56".u, "{0, number, currency}".u.fmt("en_US", 12345.56))
237
+ assert_equal("20:15:01 13/01/2006".u, "{0,date,HH:mm:ss dd/MM/yyyy}".u.fmt("en", Time.local(2006,"jan",13,20,15,1)))
238
+ end
239
+
240
+ def test_norm
241
+ v="Iñtërnâtiônàlizætiøn".u
242
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_C)
243
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_D)
244
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_D.norm_FCD)
245
+ assert_equal("Iñtërnâtiônàlizætiøn".u,v.norm_D.norm_KC)
246
+ end
247
+
248
+ def test_parse_date
249
+ assert_equal(Time.local(2006,"jan",13,20,15,1),
250
+ "HH:mm:ss E dd/MM/yyyy".u.parse_date("en", "20:15:01 Fri 13/01/2006".u))
251
+ assert_equal(Time.local(2006, "jan", 18, 0, 0), "d MMMM, yyyy".u.parse_date("ar_SY", "١٨ كانون الثاني, ٢٠٠٦".u))
252
+ end
253
+
254
+ def test_scan
255
+ a = "cruel world".u
256
+ assert_equal(a.scan(/\w+/.U) ,["cruel".u , "world".u ])
257
+ assert_equal(a.scan(/.../.U) ,["cru".u , "el ".u , "wor".u ])
258
+ assert_equal(a.scan(/(...)/.U) ,["cru".u , "el ".u , "wor".u ])
259
+ assert_equal(a.scan(/(..)(..)/.U) ,[["cr".u , "ue".u ], ["l ".u , "wo".u ]] )
260
+ end
261
+
262
+ def test_split
263
+ re = URegexp.new("[,:/]".u)
264
+ assert_equal(["split test".u , "west".u , "best".u , "east".u ], re.split("split test,west:best/east".u, nil))
265
+ assert_equal(["split test".u, "west:best/east".u], re.split("split test,west:best/east".u, 2))
266
+ end
267
+
268
+ def test_strcoll
269
+ assert_equal(0, UString::strcoll("a".u, "a".u))
270
+ assert_equal(0, UString::strcoll("ой её".u, "ОИ ЕЕ".u, "ru", 0))
271
+ assert_equal(0, UString::strcoll("ой её".u, "ОЙ ЕЁ".u, "ru", 1))
272
+ assert_equal(-1, UString::strcoll("y".u, "k".u, "lv"))
273
+ assert_equal(1, UString::strcoll("я".u, "а".u))
274
+ assert_equal(1, UString::strcoll("я".u, "А".u, "ru"))
275
+ assert_equal(0, UString::strcoll("İSTANBUL".u, "istanbul".u, "tr", 0))
276
+ end
277
+
278
+ def test_gsub_block
279
+ a = "АБРАКАДАБРА".u
280
+ r = URegexp.new("(.)(.)(А)".u, URegexp::IGNORECASE)
281
+ b = a.gsub(r) do |m|
282
+ assert_equal("ава".u, "бравада".u.gsub(r) {|v| v[3]} )
283
+ m[3] + m[2] + m[1]
284
+ end
285
+ assert_equal("ААРБКАДААРБ".u, b)
286
+ end
287
+
288
+ def test_match_range
289
+ t = "test\ntext".u
290
+ m = (t =~ /^.+$/m.U)
291
+ assert_equal('test'.u, m[0])
292
+ assert_equal(0..3, m.range(0))
293
+ end
294
+
295
+ def test_resbundle
296
+ b = UResourceBundle.open(nil, "en")
297
+ assert_equal("Russia".u, b["Countries"]["RU"])
298
+ b = UResourceBundle.open(nil, "ru")
299
+ assert_equal("Россия".u, b["Countries"]["RU"])
300
+
301
+ end
302
+
303
+ def test_translit
304
+ assert_equal('zees ees A tfs t'.u, "This is A test".u.translit("null".u, "a>b;b>c;c>d;d>e;e>f;i>ee;[Tt]h>z;t>\\ t".u))
305
+ assert_equal("matsumoto yukihiro".u.translit("Latin-Hiragana".u), "まつもと ゆきひろ".u)
306
+ end
307
+
308
+ def test_parse_double
309
+ assert_equal(456, "456".u.to_f)
310
+ assert_equal("123,001".u.to_f("ru"), 123.001)
311
+ assert_equal("123,001".u.to_f("en"), 123001.0)
312
+ assert_equal("Got 123,001".u.to_f("en", "Got ###,###".u), 123001)
313
+ assert_equal(123.45, "١٢٣٫٤٥".u.to_f("ar_YE"))
314
+ end
315
+
316
+ def test_unescape
317
+ a = '\u0054\u0068\u0069\u0073\u0020\u0069\u0073\u0020\u0041\u0020\u0074\u0065\u0073\u0074\n!'
318
+ assert_equal("This is A test\n!", a.u.unescape.to_s)
319
+ end
320
+
321
+ def test_ranges
322
+ v = "\\x{1D7D9}\\x{1d7da}\\x{1d7db}!".u.unescape
323
+ assert_equal(7, v.length)
324
+ assert_equal(4, v.point_count)
325
+ assert_equal(0..0, v.conv_unit_range(0..1))
326
+ assert_equal(0..1, v.conv_unit_range(0..2))
327
+ assert_equal(0..3, v.conv_unit_range(0..-1))
328
+ assert_equal(2..3, v.conv_unit_range(-3..-1))
329
+
330
+ assert_equal(0..3, v.conv_point_range(0..1))
331
+ assert_equal(0..5, v.conv_point_range(0..2))
332
+ assert_equal(0..6, v.conv_point_range(0..-1))
333
+ assert_equal(4..6, v.conv_point_range(-2..-1))
334
+ end
335
+
336
+ def test_char_span
337
+ v = "ЁРШ ТВОЙУ МЕДДЬ".u.norm_D
338
+ assert_equal("ЁРШ".u, v.char_span(0,3))
339
+ assert_equal('\u0415\u0308\u0420'.u.unescape, v[0,3])
340
+ assert_equal(v.norm_C, v.char_span(0,-1))
341
+ end
342
+
343
+ def test_sentinel_bug
344
+ ("test" * 10).u.gsub(/e/.U, 'abracadabra'.u)
345
+ end
346
+
347
+ def test_string_change
348
+ a = " 123456789Aa ".u
349
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.downcase!; m} };
350
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
351
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.upcase!; m} }
352
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
353
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.lstrip!; m} }
354
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
355
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.rstrip!; m} }
356
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
357
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.strip!; m} }
358
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
359
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!(/Aa/.U); m} }
360
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
361
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!("Aa".u); m} }
362
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
363
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!(3,5); m} }
364
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
365
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.sub!(/Aa/.U, "BUG!".u); m} }
366
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
367
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.gsub!(/Aa/.U, "BUG!".u); m} }
368
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
369
+ assert_raise(RuntimeError) { a.scan(/\d/.U) { |m| a.gsub!(/Aa/.U, "BUG!".u); m} }
370
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
371
+ assert_raise(RuntimeError) { a.each_char { |m| a[2]= "BUG!".u } }
372
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
373
+
374
+ end
375
+ def test_1_to_u_to_s
376
+ assert_equal(
377
+ "\355\350\367\345\343\356 \355\345 \360\340\341\356\362\340\345\362 :( ?".to_u("cp-1251").to_s("utf-8"),
378
+ "\320\275\320\270\321\207\320\265\320\263\320\276 \320\275\320\265 \321\200\320\260\320\261\320\276\321\202\320\260\320\265\321\202 :( ?")
379
+ end
380
+
381
+ end
data/tools/doc.sh ADDED
@@ -0,0 +1,2 @@
1
+ #!/bin/sh
2
+ rdoc -T ./tools/km.rb -c utf-8 README docs/FORMATTING docs/UNICODE_REGEXPS *.c