icu4r_19 1.0

Sign up to get free protection for your applications and to get access to all the features.
data/icu4r.c ADDED
@@ -0,0 +1,18 @@
1
+ extern void initialize_ustring(void);
2
+ extern void initialize_calendar(void);
3
+ extern void initialize_uregexp(void);
4
+ extern void initialize_ucore_ext(void);
5
+ extern void initialize_ubundle(void);
6
+ extern void initialize_converter(void);
7
+ extern void initialize_collator(void);
8
+ void Init_icu4r (void) {
9
+
10
+ initialize_ustring();
11
+ initialize_uregexp();
12
+ initialize_ucore_ext();
13
+ initialize_ubundle();
14
+ initialize_calendar();
15
+ initialize_converter();
16
+ initialize_collator();
17
+
18
+ }
@@ -0,0 +1,45 @@
1
+ #include "ruby.h"
2
+ #include <unicode/utypes.h>
3
+ #include <unicode/ustring.h>
4
+ #include <unicode/ustdio.h>
5
+ #include <unicode/uchar.h>
6
+ #include <unicode/uclean.h>
7
+ #include <unicode/uregex.h>
8
+ #include <unicode/unorm.h>
9
+ #include <unicode/ubrk.h>
10
+ #include <unicode/ucnv.h>
11
+ #include <unicode/uset.h>
12
+ #include <unicode/uenum.h>
13
+ #include <unicode/utrans.h>
14
+ #include <unicode/ucol.h>
15
+ #include <unicode/usearch.h>
16
+ #include <unicode/ures.h>
17
+ #include <unicode/unum.h>
18
+
19
+ #ifdef HAVE_UNISTD_H
20
+ #include <unistd.h>
21
+ #endif
22
+ typedef struct {
23
+ long len;
24
+ long capa;
25
+ UChar *ptr;
26
+ unsigned char busy;
27
+ } ICUString ;
28
+ #define USTRING(obj) ((ICUString *)DATA_PTR(obj))
29
+ #define UREGEX(obj) ((ICURegexp *)DATA_PTR(obj))
30
+ #define ICU_PTR(str) USTRING(str)->ptr
31
+ #define ICU_LEN(str) USTRING(str)->len
32
+ #define ICU_CAPA(str) USTRING(str)->capa
33
+ #define ICU_RESIZE(str,capacity) REALLOC_N(ICU_PTR(str), UChar, (capacity)+1);
34
+
35
+ typedef struct {
36
+ URegularExpression *pattern;
37
+ int options;
38
+ } ICURegexp;
39
+
40
+
41
+ #define Check_Class(obj, klass) if(CLASS_OF(obj) != klass) rb_raise(rb_eTypeError, "Wrong type: expected %s, got %s", rb_class2name(klass), rb_class2name(rb_obj_class(obj)));
42
+
43
+
44
+ #define ICU_RAISE(status) if(U_FAILURE(status)) rb_raise(rb_eRuntimeError, u_errorName(status));
45
+
File without changes
@@ -0,0 +1,23 @@
1
+ require 'icu4r'
2
+ res = {}
3
+ src = <<-EOT
4
+ 外国語の勉強と教え
5
+ Изучение и обучение иностранных языков
6
+ Enseñanza y estudio de idiomas
7
+ 'læŋɡwidʒ 'lɘr:niŋ ænd 'ti:ʃiŋ
8
+ ‭‫ללמוד וללמד את השֵפה
9
+ L'enseignement et l'étude des langues
10
+ Γλωσσική Εκμὰθηση και Διδασκαλία
11
+ เรียนและสอนภาษา
12
+ EOT
13
+ src = src.u
14
+ ["line_break", "char", "sentence", "word"].each do |brk|
15
+ res[brk] = {}
16
+ ["ja", "en", "th"].each do |loc|
17
+ out = []
18
+ src.send("each_#{brk}".to_sym, loc) { |s| out << s }
19
+ res[brk][loc] = out.join("|")
20
+ puts "---------#{brk}-------#{loc}---------"
21
+ puts out.join("|")
22
+ end
23
+ end
@@ -0,0 +1,16 @@
1
+ require 'icu4r'
2
+ root = UResourceBundle.open(nil, "en")
3
+ today = Time.now
4
+ UString::list_locales.each do |locale|
5
+ b = UResourceBundle.open(nil, locale)
6
+ lang, ctry, var = locale.split '_', 3
7
+ ctry = var ? var : ctry
8
+ puts [
9
+ locale,
10
+ "("+root["Countries"][ctry].to_s + " : " + root["Languages"][lang].to_s+")",
11
+ "("+b["Countries"][ctry].to_s + " : " + b["Languages"][lang].to_s+")",
12
+ "[{0,date,long}]({1,number,currency})".u.fmt(locale, today, 123.45),
13
+ b["ExemplarCharacters"]
14
+ ].join("\t")
15
+
16
+ end
@@ -0,0 +1,11 @@
1
+ require 'icu4r'
2
+ str = " abcあいうえおアイウエオアイウエオ漢字,0123スクリプト".u
3
+ puts str.inspect_names
4
+ p str=~ ure('[\p{Script=Latin}]+')
5
+ p str=~ ure('[\p{Script=Hiragana}]+')
6
+ p str=~ ure('[\p{Script=Katakana}]+')
7
+ p str=~ ure('[\p{Script=Hiragana}\p{Script=Katakana}]+')
8
+ p str=~ ure('[\p{blk=CJKUnifiedIdeographs}]+')
9
+ p str=~ ure('[\p{L}]+')
10
+ p str=~ ure('\u3042') # あ
11
+ p str.scan(ure('[\p{N}]'))
@@ -0,0 +1,4 @@
1
+ require 'icu4r'
2
+ v = UResourceBundle.open(File.expand_path("appmsg"), "ru")
3
+ puts v["icu4r_hello"]
4
+ puts v["icu4r_classes"]
@@ -0,0 +1,4 @@
1
+ genrb root.txt
2
+ genrb -e UTF8 ru.txt
3
+ mkdir appmsg
4
+ mv *.res appmsg
@@ -0,0 +1,10 @@
1
+ root {
2
+ icu4r_hello { "Hello from ICU4R" }
3
+ icu4r_classes:array {
4
+ "UString",
5
+ "URegexp",
6
+ "UMatch",
7
+ "UResourceBundle"
8
+ }
9
+ ustring { "{0} - class to work with UTF16-encoded strings" }
10
+ }
@@ -0,0 +1,4 @@
1
+ ru {
2
+ icu4r_hello { "Привет от ICU4R" }
3
+ ustring { "{0} Класс для работы со строками в кодировке UTF16" }
4
+ }
@@ -0,0 +1,123 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ # these tests are ICU 3.4 dependent
4
+ class UCalendarTest < Test::Unit::TestCase
5
+
6
+ def test_time_zones
7
+ v = UCalendar.time_zones
8
+ assert_kind_of(Array, v)
9
+ assert_kind_of(UString, v[0])
10
+ assert(v.include?("Europe/Kiev".u))
11
+ end
12
+
13
+ def test_default
14
+ v = UCalendar.default_tz
15
+ UCalendar.default_tz ="Europe/Paris".u
16
+ assert_equal( "Europe/Paris".u, UCalendar.default_tz)
17
+ c = UCalendar.new
18
+ assert_equal( 3_600_000, c[:zone_offset])
19
+ # assert_equal( "GMT+01:00".u, c.time_zone("root")) # this should work also
20
+ end
21
+
22
+ def test_dst
23
+ assert_equal(UCalendar.dst_savings("America/Detroit".u), 3600000)
24
+ assert_equal(UCalendar.dst_savings("Australia/Lord_Howe".u), 1800000)
25
+ end
26
+
27
+ def test_tz_for_country
28
+ zones = %w{Europe/Kiev Europe/Simferopol Europe/Uzhgorod Europe/Zaporozhye}.collect {|s| s.to_u}
29
+ assert_equal(zones, UCalendar.tz_for_country("UA"))
30
+ end
31
+
32
+ def test_time_now
33
+ assert_equal(Time.now.to_i/100, UCalendar.now.to_i/100000)
34
+ end
35
+
36
+ def test_in_daylight
37
+ t = UCalendar.new
38
+ t.set_date(2006, 8, 22)
39
+ t.time_zone = "US/Hawaii".u
40
+ assert_equal(false, t.in_daylight_time?)
41
+ t.time_zone = "Europe/Berlin".u
42
+ assert_equal(true, t.in_daylight_time?)
43
+ end
44
+ def test_set_date
45
+ t = UCalendar.new
46
+ t.set_date(2006, 0, 22)
47
+ assert_equal(2006, t[:year])
48
+ assert_equal(0, t[:month])
49
+ assert_equal(22, t[:date])
50
+ t[:year] = 2007
51
+ t[:month] = 2
52
+ t[:date] = 23
53
+ assert_equal(2007, t[:year])
54
+ assert_equal(2, t[:month])
55
+ assert_equal(23, t[:date])
56
+
57
+ end
58
+
59
+ def test_set_date_time
60
+ t = UCalendar.new
61
+ t.set_date_time(2006, 0, 22, 11, 22, 33)
62
+ assert_equal(11, t[:hour])
63
+ assert_equal(22, t[:minute])
64
+ assert_equal(33, t[:second])
65
+ end
66
+
67
+ def test_millis
68
+ m = UCalendar.now
69
+ t = UCalendar.new
70
+ assert(m <= t.millis)
71
+ n = Time.now.to_i
72
+ t.millis = n * 1000.0
73
+ assert_equal(n*1000.0, t.millis)
74
+ end
75
+
76
+ def test_add_time
77
+ t = UCalendar.new
78
+ t.set_date_time(2006, 0, 22, 11, 22, 33)
79
+ t.add(:week_of_year, 1)
80
+ assert_equal(29, t[:date])
81
+ t.add(:hour, 48)
82
+ assert_equal(31, t[:date])
83
+ end
84
+
85
+ def test_format
86
+ t = UCalendar.new
87
+ t.set_date_time(2006, 0, 22, 11, 22, 33)
88
+ t.time_zone = "Europe/London".u
89
+ assert_equal("2006/01/22 11:22:33 GMT AD".u, t.format("yyyy/MM/dd HH:mm:ss z G".u, "en"))
90
+ end
91
+
92
+ def test_clone_and_compare
93
+ c = UCalendar.new
94
+ d = c.clone
95
+ assert(c == d)
96
+ assert(! (c < d) )
97
+ assert(! (c > d) )
98
+ assert(c.eql?(d))
99
+ c.add(:date, 1)
100
+ assert(c != d)
101
+ assert(! (c < d) )
102
+ assert( (c > d) )
103
+ assert(!c.eql?(d))
104
+ d.add(:date, 1)
105
+ assert(c.eql?(d))
106
+ d.time_zone = "Europe/Kiev".u
107
+ assert(!c.eql?(d))
108
+ assert(c == d)
109
+ end
110
+
111
+ def test_parse_date
112
+ UCalendar.default_tz="UTC".u
113
+ t1 = UCalendar.parse("HH:mm:ss E dd/MM/yyyy z".u, "en", "20:15:01 Fri 13/01/2006 GMT+00".u)
114
+ assert_equal(2006, t1[:year])
115
+ assert_equal(0, t1[:month])
116
+ assert_equal(13, t1[:date])
117
+ assert_equal(20, t1[:hour_of_day])
118
+ assert_equal(15, t1[:minute])
119
+ assert_equal(01, t1[:second])
120
+ end
121
+
122
+
123
+ end
@@ -0,0 +1,33 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ # these tests are ICU 3.4 dependent
4
+ class UCollatorTest < Test::Unit::TestCase
5
+ def test_strength
6
+ c = UCollator.new("root")
7
+ assert_equal(0, c.strcoll("a".u, "a".u))
8
+ assert_equal(1, c.strcoll("A".u, "a".u))
9
+ c.strength = UCollator::UCOL_SECONDARY
10
+ assert_equal(0, c.strcoll("A".u, "a".u))
11
+ end
12
+
13
+ def test_attrs
14
+ c = UCollator.new("root")
15
+ c[UCollator::UCOL_NUMERIC_COLLATION]= UCollator::UCOL_ON
16
+ ar = %w(100 10 20 30 200 300).map {|a| a.to_u }.sort {|a,b| c.strcoll(a,b)}.map {|s| s.to_s }
17
+ assert_equal(["10", "20", "30", "100", "200", "300"], ar)
18
+ c[UCollator::UCOL_NUMERIC_COLLATION]= UCollator::UCOL_OFF
19
+ ar = %w(100 10 20 30 200 300).map {|a| a.to_u }.sort {|a,b| c.strcoll(a,b)}.map {|s| s.to_s }
20
+ assert_equal( ["10", "100", "20", "200", "30", "300"], ar)
21
+ end
22
+
23
+ def test_sort_key
24
+ c = UCollator.new("root")
25
+ c[UCollator::UCOL_NUMERIC_COLLATION]= UCollator::UCOL_ON
26
+ ar = %w(100 10 20 30 200 300).sort_by {|a| c.sort_key(a.to_u) }
27
+ assert_equal(["10", "20", "30", "100", "200", "300"], ar)
28
+ c[UCollator::UCOL_NUMERIC_COLLATION]= UCollator::UCOL_OFF
29
+ ar = %w(100 10 20 30 200 300).sort_by {|a| c.sort_key(a.to_u) }
30
+ assert_equal( ["10", "100", "20", "200", "30", "300"], ar)
31
+ end
32
+
33
+ end
@@ -0,0 +1,72 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ # these tests are ICU 3.4 dependent
4
+ class UConverterTest < Test::Unit::TestCase
5
+
6
+ def test_a_new_and_name
7
+ c = UConverter.new("UTF8")
8
+ assert_kind_of( UConverter, c)
9
+ assert_equal('UTF-8', c.name)
10
+ end
11
+
12
+ def test_b_list_avail
13
+ a = UConverter.list_available
14
+ assert_kind_of(Array, a)
15
+ assert(a.include?("UTF-8"))
16
+ end
17
+
18
+ def test_c_all_names
19
+ a = UConverter.all_names
20
+ assert_kind_of(Array, a)
21
+ assert(a.include?("UTF-8"))
22
+ end
23
+
24
+ def test_d_std_names
25
+ a = UConverter.std_names("koi8r", "MIME")
26
+ assert_kind_of(Array, a)
27
+ assert(a.include?("KOI8-R"))
28
+ a = UConverter.std_names("cp1251", "IANA")
29
+ assert_kind_of(Array, a)
30
+ assert(a.include?("windows-1251"))
31
+ end
32
+
33
+ def test_e_convert_class_method
34
+ a_s = "\357\360\356\342\345\360\352\340 abcd"
35
+ u_s = UConverter.convert("utf8", "cp1251", a_s)
36
+ assert_equal("проверка abcd", u_s)
37
+ r_s = UConverter.convert("cp1251", "utf8", u_s)
38
+ assert_equal(r_s, a_s)
39
+ end
40
+
41
+ def test_f_to_from_u
42
+ c = UConverter.new("cp1251")
43
+ a_s = "\357\360\356\342\345\360\352\340 abcd"
44
+ u_s = c.to_u(a_s)
45
+ assert_kind_of(UString, u_s)
46
+ r_s = c.from_u(u_s)
47
+ assert_equal(r_s, a_s)
48
+ end
49
+
50
+ def test_g_convert_instance_method
51
+ c1 = UConverter.new("EUC-JP")
52
+ c2 = UConverter.new("Cp1251")
53
+ a_s = "\247\322\247\335\247\361!"
54
+ b_s = a_s * 1000
55
+ a1 = UConverter.convert("Cp1251", "EUC-JP", b_s)
56
+ a2 = c1.convert(c2, b_s)
57
+ assert_equal(a1.size, a2.size)
58
+ assert_equal(a2.size, 4 * 1000)
59
+ assert_equal(a1, a2)
60
+ assert_equal("\341\353\377!", c1.convert(c2, a_s))
61
+ end
62
+
63
+ def test_h_subst_chars
64
+ c1 = UConverter.new("US-ASCII")
65
+ assert_kind_of(String, c1.subst_chars)
66
+ c1.subst_chars="!"
67
+ assert_equal( "I!t!rn!ti!n!liz!ti!n", c1.from_u("Iñtërnâtiônàlizætiøn".u))
68
+ c1.subst_chars=" "
69
+ assert_equal( "I t rn ti n liz ti n", c1.from_u("Iñtërnâtiônàlizætiøn".u))
70
+ end
71
+
72
+ end
@@ -0,0 +1,508 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ class UnicodeStringTest < Test::Unit::TestCase
4
+
5
+ def test_string
6
+ a = u("абвг", "utf8")
7
+ b = u("абвг", "utf8")
8
+ assert_equal(a,b )
9
+ end
10
+
11
+ def test_casecmp
12
+ assert_equal(0, u("Сцуко").casecmp("сЦуКо".u))
13
+ assert_equal(-1, u("Сцук").casecmp("сЦуКо".u))
14
+ assert_equal(1, u("Сцуко").casecmp("сЦуК".u))
15
+ end
16
+
17
+ def test_match
18
+ assert_match(ure("абвг"), u("абвг"))
19
+ assert("аавг".u !~ ure("^$"))
20
+ assert("авб\n".u !~ ure("^$"))
21
+ assert("абв".u !~ ure("^г*$"))
22
+ assert_equal("".u, ("абв".u =~ ure("г*$"))[0])
23
+ assert("".u =~ ure("^$"))
24
+ assert("абвабв".u =~ ure( ".*а") )
25
+ assert("абвабв".u =~ ure( ".*в") )
26
+ assert("абвабв".u =~ ure( ".*?а") )
27
+ assert("абвабв".u =~ ure( ".*?в") )
28
+ assert(ure("(.|\n)*?\n(б|\n)") =~ u("а\nб\n\n"))
29
+ end
30
+
31
+ def test_sub
32
+ x = "a.gif".u
33
+ assert_equal("gif".u, x.sub(ure(".*\\.([^\\.]+)$"), "$1".u))
34
+ assert_equal("b.gif".u, x.sub(ure(".*\\.([^\\.]+)$"), "b.$1".u))
35
+ assert_equal(x, "a.gif".u)
36
+ x.sub!(/gif/.U, ''.u)
37
+ assert_equal(x, "a.".u)
38
+ x= "1234561234".u
39
+ x.sub!(/123/.U, "".u)
40
+ assert_equal(x, "4561234".u)
41
+
42
+ end
43
+
44
+
45
+ def test_case_fold
46
+ assert_equal("А".u, "а".u.upcase!)
47
+ assert_equal("а".u, ("А".u.downcase!))
48
+
49
+ s = "аБв".u
50
+ s.upcase
51
+ assert_equal("аБв".u, s)
52
+ s.upcase!
53
+ assert_equal("АБВ".u, s)
54
+
55
+ s = "аБв".u
56
+ s.downcase
57
+ assert_equal("аБв".u, s)
58
+ s.downcase!
59
+ assert_equal("абв".u, s)
60
+ end
61
+
62
+ def test_index
63
+ assert_equal( "hello".u.rindex('e'.u), 1)
64
+ assert_equal( "hello".u.rindex('l'.u) , 3)
65
+ assert_equal( "hello".u.rindex('a'.u), nil)
66
+ assert_equal( "hello".u.index('e'.u),1)
67
+ assert_equal( "hello".u.index('lo'.u),3)
68
+ assert_equal( "hello".u.index('a'.u), nil)
69
+ assert_equal( "hello".u.index(ure('[aeiou]'), -3), 4)
70
+ assert_equal( "hello".u.rindex(ure('[aeiou]'), -2), 1)
71
+
72
+ assert_equal(1, S("hello").index(S("ell")))
73
+ assert_equal(2, S("hello").index(/ll./.U))
74
+
75
+ assert_equal(3, S("hello").index(S("l"), 3))
76
+ assert_equal(3, S("hello").index(/l./.U, 3))
77
+
78
+ assert_nil(S("hello").index(S("z"), 3))
79
+ assert_nil(S("hello").index(/z./.U, 3))
80
+
81
+ assert_nil(S("hello").index(S("z")))
82
+ assert_nil(S("hello").index(/z./.U))
83
+
84
+ end
85
+
86
+ def test_insert
87
+ assert_equal("abcd".u.insert(0, 'X'.u) , "Xabcd".u)
88
+ assert_equal("abcd".u.insert(3, 'X'.u) , "abcXd".u)
89
+ assert_equal("abcd".u.insert(4, 'X'.u) , "abcdX".u)
90
+ assert_equal("abcd".u.insert(-3, 'X'.u) , "abXcd".u)
91
+ assert_equal("abcd".u.insert(-1, 'X'.u) , "abcdX".u)
92
+ end
93
+
94
+ def test_include
95
+ assert( "hello".u.include?("lo".u))
96
+ assert(!("hello".u.include?("ol".u)))
97
+ end
98
+
99
+ def test_init
100
+ assert_equal( "нах!".u, UString.new("нах!".u))
101
+ a = "ГНУ!".u
102
+ a.replace("ФИГНУ!".u)
103
+ assert_equal(a, "ФИГНУ!".u)
104
+ assert_equal(a, a.clone)
105
+ end
106
+
107
+ def test_aref
108
+ a = "hello there".u
109
+ assert_equal('e'.u, a[1]) #=> 'e'
110
+ assert_equal('ell'.u, a[1,3]) #=> "ell"
111
+ assert_equal('ell'.u, a[1..3]) #=> "ell"
112
+ assert_equal('er'.u, a[-3,2]) #=> "er"
113
+ assert_equal('her'.u, a[-4..-2]) #=> "her"
114
+ assert_nil(a[12..-1]) #=> nil
115
+ assert_equal(''.u, a[-2..-4]) #=> ""
116
+ assert_equal('ell'.u, a[ure('[aeiou](.)\1')]) #=> "ell"
117
+ assert_equal('ell'.u, a[ure('[aeiou](.)\1'), 0]) #=> "ell"
118
+ assert_equal('l'.u, a[ure('[aeiou](l)\1'), 1]) #=> "l"
119
+ assert_nil( a[ure('[aeiou](.)$1'), 2]) #=> nil
120
+ assert_equal('lo'.u, a["lo".u]) #=> "lo"
121
+ assert_nil(a["bye".u]) #=> nil
122
+ end
123
+
124
+ def test_slice_bang
125
+ string = "this is a string".u
126
+ assert_equal(string.slice!(2) , 'i'.u)
127
+ assert_equal(string.slice!(3..6) , " is ".u)
128
+ assert_equal(string.slice!(ure("s.*t")) , "sa st".u)
129
+ assert_equal(string.slice!("r".u) , "r".u)
130
+ assert_equal(string , "thing".u)
131
+ a = "test".u
132
+ a[0] = "BEA".u
133
+ assert_equal("BEAest".u, a)
134
+ end
135
+
136
+ def test_gsub
137
+ assert_equal("hello".u.gsub(ure("[aeiou]"), '*'.u) , "h*ll*".u)
138
+ assert_equal("hello".u.gsub(ure("([aeiou])"), '<$1>'.u) , "h<e>ll<o>".u)
139
+ i = 0
140
+ assert_equal("12345".u , "hello".u.gsub(ure(".")) {|s| i+=1; i.to_s})
141
+ assert_equal("214365".u, "123456".u.gsub(ure("(.)(.)")) {|s| s[2] + s[1] })
142
+ a = "test".u
143
+ a.gsub!(/t/.U, a)
144
+ assert_equal("testestest".u, a)
145
+ end
146
+
147
+ def test_ure_case_eq
148
+ a = "HELLO".u
149
+ v = case a
150
+ when ure("^[a-z]*$"); "Lower case"
151
+ when ure("^[A-Z]*$"); "Upper case"
152
+ else; "Mixed case"
153
+ end
154
+ assert_equal('Upper case', v)
155
+ end
156
+
157
+ # UString::strcoll("ÆSS".u, "AEß".u, "de", 0)
158
+ def test_empty
159
+ assert(! "hello".u.empty?)
160
+ assert("".empty?)
161
+ assert("test".u.clear.empty?)
162
+ assert(" \t\n".u.strip.empty?)
163
+ end
164
+
165
+ def test_clear
166
+ a = "test".u
167
+ a.clear
168
+ assert_equal(0, a.length)
169
+ end
170
+
171
+ def test_length
172
+ assert_equal(10, "12345АБВГД".u.length)
173
+ assert_equal(0,"".u.length)
174
+ assert_equal(3,"abc".u.length)
175
+ end
176
+
177
+ def test_replace
178
+ s = "hello".u
179
+ s.replace("world".u)
180
+ assert_equal(s, "world".u)
181
+ end
182
+
183
+ def test_cmp
184
+ assert_equal("абвгде".u <=> "абвгд".u , 1 )
185
+ assert_equal("абвгде".u <=> "абвгде".u , 0 )
186
+ assert_equal("абвгде".u <=> "абвгдеж".u , -1 )
187
+ assert_equal("абвгде".u <=> "АБВГДЕ".u , -1 ) # UCA
188
+ end
189
+
190
+ def test_plus
191
+ assert_equal("сложение".u, "сло".u + "жение".u)
192
+ end
193
+
194
+ def test_times
195
+ assert_equal("ААААА".u, "А".u * 5)
196
+ end
197
+
198
+ def test_concat
199
+ assert_equal("сложение".u, "сло".u << "жение".u)
200
+ assert_equal("сложение".u, "сло".u.concat("жение".u))
201
+ a = "сло".u
202
+ a << "жение".u
203
+ assert_equal("сложение".u, a)
204
+ end
205
+
206
+ def test_search
207
+ a = "A quick brown fox jumped over the lazy fox dancing foxtrote".u
208
+ assert_equal(a.search("fox".u) , [14..16, 39..41, 51..53])
209
+ assert_equal(a.search("FoX".u) , [])
210
+ assert_equal(a.search("FoX".u, :ignore_case => true) , [14..16, 39..41, 51..53])
211
+ assert_equal(a.search("FoX".u, :ignore_case => true, :whole_words => true) , [14..16, 39..41])
212
+ assert_equal(a.search("FoX".u, :ignore_case => true, :whole_words => true, :limit => 1) , [14..16])
213
+
214
+ b = "Iñtërnâtiônàlizætiøn îs cọmpłèx".u.upcase
215
+ assert_equal(b, "IÑTËRNÂTIÔNÀLIZÆTIØN ÎS CỌMPŁÈX".u)
216
+ assert_equal(b.search("nâtiôn".u, :locale => "en") , [])
217
+ assert_equal(b.search("nation".u) , [])
218
+ assert_equal(b.search("nation".u, :locale => "en", :ignore_case_accents => true) , [5..10])
219
+ assert_equal(b.search("nâtiôn".u, :locale => "en", :ignore_case => true) , [5..10])
220
+ assert_equal(b.search("zaeti".u, :locale => "en" ) , [])
221
+ assert_equal(b.search("zaeti".u, :locale => "en", :ignore_case => true) , [])
222
+ assert_equal(b.search("zaeti".u, :locale => "en", :ignore_case_accents => true) , [14..17])
223
+ assert_equal("İSTANBUL".u.search("istanbul".u, :locale => 'tr', :ignore_case => true), [0..7])
224
+ assert_equal("ёжий".u.norm_D.search("ЕЖИЙ".u, :locale => 'ru', :canonical => true, :ignore_case_accents => true), [0..4])
225
+ end
226
+
227
+ def test_dollar_sign_regexp
228
+ assert_equal("te$et".u, "test".u.gsub(/s/.U, '$e'.u))
229
+ end
230
+
231
+ def test_codepoints
232
+ a=[0x01234, 0x0434, 0x1D7D9, ?t, ?e, ?s]
233
+ b=a.pack("U*").u
234
+ assert_equal(a, b.codepoints)
235
+ assert_equal(b, a.to_u)
236
+ end
237
+
238
+ def test_chars
239
+ chr = ["I", "Ñ", "T", "Ë", "R", "N", "Â", "T", "I", "Ô", "N", "À", "L", "I", "Z", "Æ", "T", "I", "Ø", "N" ]
240
+ chr = chr.collect {|s| s.to_u.norm_C}
241
+ assert_equal(chr, "Iñtërnâtiônàlizætiøn".u.upcase.norm_D.chars)
242
+
243
+ end
244
+
245
+
246
+ def test_fmt
247
+ assert_equal("b a".u, "{1} {0}".u.fmt("en", "a".u, "b".u))
248
+ assert_equal("12,345.56".u, "{0, number}".u.fmt("en", 12345.56))
249
+ assert_equal("$12,345.56".u, "{0, number, currency}".u.fmt("en_US", 12345.56))
250
+ assert_equal("20:15:01 13/01/2006".u, "{0,date,HH:mm:ss dd/MM/yyyy}".u.fmt("en", Time.local(2006,"jan",13,20,15,1)))
251
+ end
252
+
253
+ def test_norm
254
+ v="Iñtërnâtiônàlizætiøn".u
255
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_C)
256
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_D)
257
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_D.norm_FCD)
258
+ assert_equal("Iñtërnâtiônàlizætiøn".u,v.norm_D.norm_KC)
259
+ end
260
+
261
+ def test_scan
262
+ a = "cruel world".u
263
+ assert_equal(a.scan(/\w+/.U) ,["cruel".u , "world".u ])
264
+ assert_equal(a.scan(/.../.U) ,["cru".u , "el ".u , "wor".u ])
265
+ assert_equal(a.scan(/(...)/.U) ,["cru".u , "el ".u , "wor".u ])
266
+ assert_equal(a.scan(/(..)(..)/.U) ,[["cr".u , "ue".u ], ["l ".u , "wo".u ]] )
267
+ end
268
+ def S(str)
269
+ str.to_u
270
+ end
271
+ def test_split
272
+ re = URegexp.new("[,:/]".u)
273
+ assert_equal(["split test".u , "west".u , "best".u , "east".u ], re.split("split test,west:best/east".u, nil))
274
+ assert_equal(["split test".u, "west:best/east".u], re.split("split test,west:best/east".u, 2))
275
+ assert_equal([S("a"), S("b"), S("c")], S("a b\t c").split(S("\\s+")))
276
+ assert_equal([S(" a "), S(" b "), S(" c ")], S(" a | b | c ").split(S("\\|")))
277
+ assert_equal([S("a"), S("b"), S("c")], S("aXXbXXcXX").split(/X./.U))
278
+ assert_equal([S("a|b|c")], S("a|b|c").split(S('\|'), 1))
279
+ assert_equal([S("a"), S("b|c")], S("a|b|c").split(S('\|'), 2))
280
+ assert_equal([S("a"), S("b"), S("c")], S("a|b|c").split(S('\|'), 3))
281
+ assert_equal([S("a"), S("b"), S("c")], S("a|b|c|").split(S('\|'), -1))
282
+ assert_equal([S("a"), S("b"), S("c"), S("") ], S("a|b|c||").split(S('\|'), -1))
283
+ assert_equal([S("a"), S(""), S("b"), S("c")], S("a||b|c|").split(S('\|'), -1))
284
+ end
285
+
286
+
287
+ def test_strcoll
288
+ assert_equal(0, UString::strcoll("a".u, "a".u))
289
+ assert_equal(-1, UString::strcoll("y".u, "k".u, "lv"))
290
+ assert_equal(1, UString::strcoll("я".u, "а".u))
291
+ assert_equal(1, UString::strcoll("я".u, "А".u, "ru"))
292
+ assert_equal(0, UString::strcoll("İSTANBUL".u, "istanbul".u, "tr", 0))
293
+ assert_equal(0, UString::strcoll("ой её".u, "ОЙ ЕЁ".u, "ru", 1))
294
+ end
295
+
296
+ def test_gsub_block
297
+ a = "АБРАКАДАБРА".u
298
+ r = URegexp.new("(.)(.)(А)".u, URegexp::IGNORECASE)
299
+ b = a.gsub(r) do |m|
300
+ assert_equal("ава".u, "бравада".u.gsub(r) {|v| v[3]} )
301
+ m[3] + m[2] + m[1]
302
+ end
303
+ assert_equal("ААРБКАДААРБ".u, b)
304
+ end
305
+
306
+ def test_match_range
307
+ t = "test\ntext".u
308
+ m = (t =~ /^.+$/m.U)
309
+ assert_equal('test'.u, m[0])
310
+ assert_equal(0..3, m.range(0))
311
+ end
312
+
313
+ def test_resbundle
314
+ b = UResourceBundle.open(nil, "en")
315
+ assert_equal("Russia".u, b["Countries"]["RU"])
316
+ b = UResourceBundle.open(nil, "ru")
317
+ assert_equal("Россия".u, b["Countries"]["RU"])
318
+
319
+ end
320
+
321
+ def test_translit
322
+ assert_equal('zees ees A tfs t'.u, "This is A test".u.translit("null".u, "a>b;b>c;c>d;d>e;e>f;i>ee;[Tt]h>z;t>\\ t".u))
323
+ assert_equal("matsumoto yukihiro".u.translit("Latin-Hiragana".u), "まつもと ゆきひろ".u)
324
+ end
325
+
326
+ def test_parse_double
327
+ assert_equal(456, "456".u.to_f)
328
+ assert_equal("123,001".u.to_f("ru"), 123.001)
329
+ assert_equal("123,001".u.to_f("en"), 123001.0)
330
+ assert_equal("Got 123,001".u.to_f("en", "Got ###,###".u), 123001)
331
+ assert_equal(123.45, "١٢٣٫٤٥".u.to_f("ar_YE"))
332
+ end
333
+
334
+ def test_unescape
335
+ a = '\u0054\u0068\u0069\u0073\u0020\u0069\u0073\u0020\u0041\u0020\u0074\u0065\u0073\u0074\n!'
336
+ assert_equal("This is A test\n!", a.u.unescape.to_s)
337
+ end
338
+
339
+ def test_ranges
340
+ v = "\\x{1D7D9}\\x{1d7da}\\x{1d7db}!".u.unescape
341
+ assert_equal(7, v.length)
342
+ assert_equal(4, v.point_count)
343
+ assert_equal(0..0, v.conv_unit_range(0..1))
344
+ assert_equal(0..1, v.conv_unit_range(0..2))
345
+ assert_equal(0..3, v.conv_unit_range(0..-1))
346
+ assert_equal(2..3, v.conv_unit_range(-3..-1))
347
+
348
+ assert_equal(0..3, v.conv_point_range(0..1))
349
+ assert_equal(0..5, v.conv_point_range(0..2))
350
+ assert_equal(0..6, v.conv_point_range(0..-1))
351
+ assert_equal(4..6, v.conv_point_range(-2..-1))
352
+ end
353
+
354
+ def test_char_span
355
+ v = "ЁРШ ТВОЙУ МЕДДЬ".u.norm_D
356
+ assert_equal("ЁРШ".u, v.char_span(0,3))
357
+ assert_equal('\u0415\u0308\u0420'.u.unescape, v[0,3])
358
+ assert_equal(v.norm_C, v.char_span(0,-1))
359
+ end
360
+
361
+ def test_sentinel_bug
362
+ ("test" * 10).u.gsub(/e/.U, 'abracadabra'.u)
363
+ end
364
+
365
+ def test_string_change
366
+ a = " 123456789Aa ".u
367
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.downcase!; m} };
368
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
369
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.upcase!; m} }
370
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
371
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.lstrip!; m} }
372
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
373
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.rstrip!; m} }
374
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
375
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.strip!; m} }
376
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
377
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!(/Aa/.U); m} }
378
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
379
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!("Aa".u); m} }
380
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
381
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!(3,5); m} }
382
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
383
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.sub!(/Aa/.U, "BUG!".u); m} }
384
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
385
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.gsub!(/Aa/.U, "BUG!".u); m} }
386
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
387
+ assert_raise(RuntimeError) { a.scan(/\d/.U) { |m| a.gsub!(/Aa/.U, "BUG!".u); m} }
388
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
389
+ assert_raise(RuntimeError) { a.each_char { |m| a[2]= "BUG!".u } }
390
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
391
+
392
+ end
393
+ def test_1_to_u_to_s
394
+ assert_equal(
395
+ "\355\350\367\345\343\356 \355\345 \360\340\341\356\362\340\345\362 :( ?".to_u("cp-1251").to_s("utf-8"),
396
+ "\320\275\320\270\321\207\320\265\320\263\320\276 \320\275\320\265 \321\200\320\260\320\261\320\276\321\202\320\260\320\265\321\202 :( ?")
397
+ end
398
+
399
+ def test_nested_blocks
400
+ a = "Модифицируемые строки иногда напрягают :)".u
401
+ b = "".u
402
+ assert_nothing_raised {
403
+ a.scan(/./.U) { |s|
404
+ b = a.gsub(ure('и')) { |m|
405
+ t = m[0] + "".u
406
+ a.each_char { |c|
407
+ t << c if c == 'о' .u
408
+ }
409
+ t
410
+ }
411
+ }
412
+ }
413
+ assert_equal("Модиооофиоооциоооруемые строкиооо иоооногда напрягают :)".u, b)
414
+ end
415
+
416
+ def test_AREF # '[]'
417
+ assert_equal(S("A"), S("AooBar")[0])
418
+ assert_equal(S("B"), S("FooBaB")[-1])
419
+ assert_equal(nil, S("FooBar")[6])
420
+ assert_equal(nil, S("FooBar")[-7])
421
+
422
+ assert_equal(S("Foo"), S("FooBar")[0,3])
423
+ assert_equal(S("Bar"), S("FooBar")[-3,3])
424
+ assert_equal(S(""), S("FooBar")[6,2])
425
+ assert_equal(nil, S("FooBar")[-7,10])
426
+
427
+ assert_equal(S("Foo"), S("FooBar")[0..2])
428
+ assert_equal(S("Foo"), S("FooBar")[0...3])
429
+ assert_equal(S("Bar"), S("FooBar")[-3..-1])
430
+ assert_equal(S(""), S("FooBar")[6..2])
431
+ assert_equal(nil, S("FooBar")[-10..-7])
432
+
433
+ assert_equal(S("Foo"), S("FooBar")[/^F../.U])
434
+ assert_equal(S("Bar"), S("FooBar")[/..r$/.U])
435
+ assert_equal(nil, S("FooBar")[/xyzzy/.U])
436
+ assert_equal(nil, S("FooBar")[/plugh/.U])
437
+
438
+ assert_equal(S("Foo"), S("FooBar")[S("Foo")])
439
+ assert_equal(S("Bar"), S("FooBar")[S("Bar")])
440
+ assert_equal(nil, S("FooBar")[S("xyzzy")])
441
+ assert_equal(nil, S("FooBar")[S("plugh")])
442
+
443
+ assert_equal(S("Foo"), S("FooBar")[/([A-Z]..)([A-Z]..)/.U, 1])
444
+ assert_equal(S("Bar"), S("FooBar")[/([A-Z]..)([A-Z]..)/.U, 2])
445
+ assert_equal(nil, S("FooBar")[/([A-Z]..)([A-Z]..)/.U, 3])
446
+ assert_equal(S("Bar"), S("FooBar")[/([A-Z]..)([A-Z]..)/.U, -1])
447
+ assert_equal(S("Foo"), S("FooBar")[/([A-Z]..)([A-Z]..)/.U, -2])
448
+ assert_equal(nil, S("FooBar")[ure("([A-Z]..)([A-Z]..)"), -3])
449
+ end
450
+
451
+ def test_ASET # '[]='
452
+ s = S("FooBar")
453
+ s[0] = S('A')
454
+ assert_equal(S("AooBar"), s)
455
+
456
+ s[-1]= S('B')
457
+ assert_equal(S("AooBaB"), s)
458
+ assert_raise(IndexError) { s[-7] = S("xyz") }
459
+ assert_equal(S("AooBaB"), s)
460
+ s[0] = S("ABC")
461
+ assert_equal(S("ABCooBaB"), s)
462
+
463
+ s = S("FooBar")
464
+ s[0,3] = S("A")
465
+ assert_equal(S("ABar"),s)
466
+ s[0] = S("Foo")
467
+ assert_equal(S("FooBar"), s)
468
+ s[-3,3] = S("Foo")
469
+ assert_equal(S("FooFoo"), s)
470
+ assert_raise (IndexError) { s[7,3] = S("Bar") }
471
+ assert_raise (IndexError) { s[-7,3] = S("Bar") }
472
+
473
+ s = S("FooBar")
474
+ s[0..2] = S("A")
475
+ assert_equal(S("ABar"), s)
476
+ s[1..3] = S("Foo")
477
+ assert_equal(S("AFoo"), s)
478
+ s[-4..-4] = S("Foo")
479
+ assert_equal(S("FooFoo"), s)
480
+ assert_raise (RangeError) { s[7..10] = S("Bar") }
481
+ assert_raise (RangeError) { s[-7..-10] = S("Bar") }
482
+
483
+ s = S("FooBar")
484
+ s[/^F../.U]= S("Bar")
485
+ assert_equal(S("BarBar"), s)
486
+ s[/..r$/.U] = S("Foo")
487
+ assert_equal(S("BarFoo"), s)
488
+
489
+ s[/([A-Z]..)([A-Z]..)/.U, 1] = S("Foo")
490
+ assert_equal(S("FooFoo"), s)
491
+ s[/([A-Z]..)([A-Z]..)/.U, 2] = S("Bar")
492
+ assert_equal(S("FooBar"), s)
493
+ assert_raise (IndexError) { s[/([A-Z]..)([A-Z]..)/.U, 3] = "None" }
494
+ s[ure("([A-Z]..)([A-Z]..)"), -1] = S("Foo")
495
+ assert_equal(S("FooFoo"), s)
496
+ s[/([A-Z]..)([A-Z]..)/.U, -2] = S("Bar")
497
+ assert_equal(S("BarFoo"), s)
498
+ # assert_raise (IndexError) { s[/([A-Z]..)([A-Z]..)/.U, -3] = "None" }
499
+
500
+ s = S("FooBar")
501
+ s[S("Foo")] = S("Bar")
502
+ assert_equal(S("BarBar"), s)
503
+
504
+ s = S("a string")
505
+ s[0..s.size] = S("another string")
506
+ assert_equal(S("another string"), s)
507
+ end
508
+ end