icu4r_19 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/icu4r.c ADDED
@@ -0,0 +1,18 @@
1
+ extern void initialize_ustring(void);
2
+ extern void initialize_calendar(void);
3
+ extern void initialize_uregexp(void);
4
+ extern void initialize_ucore_ext(void);
5
+ extern void initialize_ubundle(void);
6
+ extern void initialize_converter(void);
7
+ extern void initialize_collator(void);
8
+ void Init_icu4r (void) {
9
+
10
+ initialize_ustring();
11
+ initialize_uregexp();
12
+ initialize_ucore_ext();
13
+ initialize_ubundle();
14
+ initialize_calendar();
15
+ initialize_converter();
16
+ initialize_collator();
17
+
18
+ }
@@ -0,0 +1,45 @@
1
+ #include "ruby.h"
2
+ #include <unicode/utypes.h>
3
+ #include <unicode/ustring.h>
4
+ #include <unicode/ustdio.h>
5
+ #include <unicode/uchar.h>
6
+ #include <unicode/uclean.h>
7
+ #include <unicode/uregex.h>
8
+ #include <unicode/unorm.h>
9
+ #include <unicode/ubrk.h>
10
+ #include <unicode/ucnv.h>
11
+ #include <unicode/uset.h>
12
+ #include <unicode/uenum.h>
13
+ #include <unicode/utrans.h>
14
+ #include <unicode/ucol.h>
15
+ #include <unicode/usearch.h>
16
+ #include <unicode/ures.h>
17
+ #include <unicode/unum.h>
18
+
19
+ #ifdef HAVE_UNISTD_H
20
+ #include <unistd.h>
21
+ #endif
22
+ typedef struct {
23
+ long len;
24
+ long capa;
25
+ UChar *ptr;
26
+ unsigned char busy;
27
+ } ICUString ;
28
+ #define USTRING(obj) ((ICUString *)DATA_PTR(obj))
29
+ #define UREGEX(obj) ((ICURegexp *)DATA_PTR(obj))
30
+ #define ICU_PTR(str) USTRING(str)->ptr
31
+ #define ICU_LEN(str) USTRING(str)->len
32
+ #define ICU_CAPA(str) USTRING(str)->capa
33
+ #define ICU_RESIZE(str,capacity) REALLOC_N(ICU_PTR(str), UChar, (capacity)+1);
34
+
35
+ typedef struct {
36
+ URegularExpression *pattern;
37
+ int options;
38
+ } ICURegexp;
39
+
40
+
41
+ #define Check_Class(obj, klass) if(CLASS_OF(obj) != klass) rb_raise(rb_eTypeError, "Wrong type: expected %s, got %s", rb_class2name(klass), rb_class2name(rb_obj_class(obj)));
42
+
43
+
44
+ #define ICU_RAISE(status) if(U_FAILURE(status)) rb_raise(rb_eRuntimeError, u_errorName(status));
45
+
File without changes
@@ -0,0 +1,23 @@
1
+ require 'icu4r'
2
+ res = {}
3
+ src = <<-EOT
4
+ 外国語の勉強と教え
5
+ Изучение и обучение иностранных языков
6
+ Enseñanza y estudio de idiomas
7
+ 'læŋɡwidʒ 'lɘr:niŋ ænd 'ti:ʃiŋ
8
+ ‭‫ללמוד וללמד את השֵפה
9
+ L'enseignement et l'étude des langues
10
+ Γλωσσική Εκμὰθηση και Διδασκαλία
11
+ เรียนและสอนภาษา
12
+ EOT
13
+ src = src.u
14
+ ["line_break", "char", "sentence", "word"].each do |brk|
15
+ res[brk] = {}
16
+ ["ja", "en", "th"].each do |loc|
17
+ out = []
18
+ src.send("each_#{brk}".to_sym, loc) { |s| out << s }
19
+ res[brk][loc] = out.join("|")
20
+ puts "---------#{brk}-------#{loc}---------"
21
+ puts out.join("|")
22
+ end
23
+ end
@@ -0,0 +1,16 @@
1
+ require 'icu4r'
2
+ root = UResourceBundle.open(nil, "en")
3
+ today = Time.now
4
+ UString::list_locales.each do |locale|
5
+ b = UResourceBundle.open(nil, locale)
6
+ lang, ctry, var = locale.split '_', 3
7
+ ctry = var ? var : ctry
8
+ puts [
9
+ locale,
10
+ "("+root["Countries"][ctry].to_s + " : " + root["Languages"][lang].to_s+")",
11
+ "("+b["Countries"][ctry].to_s + " : " + b["Languages"][lang].to_s+")",
12
+ "[{0,date,long}]({1,number,currency})".u.fmt(locale, today, 123.45),
13
+ b["ExemplarCharacters"]
14
+ ].join("\t")
15
+
16
+ end
@@ -0,0 +1,11 @@
1
+ require 'icu4r'
2
+ str = " abcあいうえおアイウエオアイウエオ漢字,0123スクリプト".u
3
+ puts str.inspect_names
4
+ p str=~ ure('[\p{Script=Latin}]+')
5
+ p str=~ ure('[\p{Script=Hiragana}]+')
6
+ p str=~ ure('[\p{Script=Katakana}]+')
7
+ p str=~ ure('[\p{Script=Hiragana}\p{Script=Katakana}]+')
8
+ p str=~ ure('[\p{blk=CJKUnifiedIdeographs}]+')
9
+ p str=~ ure('[\p{L}]+')
10
+ p str=~ ure('\u3042') # あ
11
+ p str.scan(ure('[\p{N}]'))
@@ -0,0 +1,4 @@
1
+ require 'icu4r'
2
+ v = UResourceBundle.open(File.expand_path("appmsg"), "ru")
3
+ puts v["icu4r_hello"]
4
+ puts v["icu4r_classes"]
@@ -0,0 +1,4 @@
1
+ genrb root.txt
2
+ genrb -e UTF8 ru.txt
3
+ mkdir appmsg
4
+ mv *.res appmsg
@@ -0,0 +1,10 @@
1
+ root {
2
+ icu4r_hello { "Hello from ICU4R" }
3
+ icu4r_classes:array {
4
+ "UString",
5
+ "URegexp",
6
+ "UMatch",
7
+ "UResourceBundle"
8
+ }
9
+ ustring { "{0} - class to work with UTF16-encoded strings" }
10
+ }
@@ -0,0 +1,4 @@
1
+ ru {
2
+ icu4r_hello { "Привет от ICU4R" }
3
+ ustring { "{0} Класс для работы со строками в кодировке UTF16" }
4
+ }
@@ -0,0 +1,123 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ # these tests are ICU 3.4 dependent
4
+ class UCalendarTest < Test::Unit::TestCase
5
+
6
+ def test_time_zones
7
+ v = UCalendar.time_zones
8
+ assert_kind_of(Array, v)
9
+ assert_kind_of(UString, v[0])
10
+ assert(v.include?("Europe/Kiev".u))
11
+ end
12
+
13
+ def test_default
14
+ v = UCalendar.default_tz
15
+ UCalendar.default_tz ="Europe/Paris".u
16
+ assert_equal( "Europe/Paris".u, UCalendar.default_tz)
17
+ c = UCalendar.new
18
+ assert_equal( 3_600_000, c[:zone_offset])
19
+ # assert_equal( "GMT+01:00".u, c.time_zone("root")) # this should work also
20
+ end
21
+
22
+ def test_dst
23
+ assert_equal(UCalendar.dst_savings("America/Detroit".u), 3600000)
24
+ assert_equal(UCalendar.dst_savings("Australia/Lord_Howe".u), 1800000)
25
+ end
26
+
27
+ def test_tz_for_country
28
+ zones = %w{Europe/Kiev Europe/Simferopol Europe/Uzhgorod Europe/Zaporozhye}.collect {|s| s.to_u}
29
+ assert_equal(zones, UCalendar.tz_for_country("UA"))
30
+ end
31
+
32
+ def test_time_now
33
+ assert_equal(Time.now.to_i/100, UCalendar.now.to_i/100000)
34
+ end
35
+
36
+ def test_in_daylight
37
+ t = UCalendar.new
38
+ t.set_date(2006, 8, 22)
39
+ t.time_zone = "US/Hawaii".u
40
+ assert_equal(false, t.in_daylight_time?)
41
+ t.time_zone = "Europe/Berlin".u
42
+ assert_equal(true, t.in_daylight_time?)
43
+ end
44
+ def test_set_date
45
+ t = UCalendar.new
46
+ t.set_date(2006, 0, 22)
47
+ assert_equal(2006, t[:year])
48
+ assert_equal(0, t[:month])
49
+ assert_equal(22, t[:date])
50
+ t[:year] = 2007
51
+ t[:month] = 2
52
+ t[:date] = 23
53
+ assert_equal(2007, t[:year])
54
+ assert_equal(2, t[:month])
55
+ assert_equal(23, t[:date])
56
+
57
+ end
58
+
59
+ def test_set_date_time
60
+ t = UCalendar.new
61
+ t.set_date_time(2006, 0, 22, 11, 22, 33)
62
+ assert_equal(11, t[:hour])
63
+ assert_equal(22, t[:minute])
64
+ assert_equal(33, t[:second])
65
+ end
66
+
67
+ def test_millis
68
+ m = UCalendar.now
69
+ t = UCalendar.new
70
+ assert(m <= t.millis)
71
+ n = Time.now.to_i
72
+ t.millis = n * 1000.0
73
+ assert_equal(n*1000.0, t.millis)
74
+ end
75
+
76
+ def test_add_time
77
+ t = UCalendar.new
78
+ t.set_date_time(2006, 0, 22, 11, 22, 33)
79
+ t.add(:week_of_year, 1)
80
+ assert_equal(29, t[:date])
81
+ t.add(:hour, 48)
82
+ assert_equal(31, t[:date])
83
+ end
84
+
85
+ def test_format
86
+ t = UCalendar.new
87
+ t.set_date_time(2006, 0, 22, 11, 22, 33)
88
+ t.time_zone = "Europe/London".u
89
+ assert_equal("2006/01/22 11:22:33 GMT AD".u, t.format("yyyy/MM/dd HH:mm:ss z G".u, "en"))
90
+ end
91
+
92
+ def test_clone_and_compare
93
+ c = UCalendar.new
94
+ d = c.clone
95
+ assert(c == d)
96
+ assert(! (c < d) )
97
+ assert(! (c > d) )
98
+ assert(c.eql?(d))
99
+ c.add(:date, 1)
100
+ assert(c != d)
101
+ assert(! (c < d) )
102
+ assert( (c > d) )
103
+ assert(!c.eql?(d))
104
+ d.add(:date, 1)
105
+ assert(c.eql?(d))
106
+ d.time_zone = "Europe/Kiev".u
107
+ assert(!c.eql?(d))
108
+ assert(c == d)
109
+ end
110
+
111
+ def test_parse_date
112
+ UCalendar.default_tz="UTC".u
113
+ t1 = UCalendar.parse("HH:mm:ss E dd/MM/yyyy z".u, "en", "20:15:01 Fri 13/01/2006 GMT+00".u)
114
+ assert_equal(2006, t1[:year])
115
+ assert_equal(0, t1[:month])
116
+ assert_equal(13, t1[:date])
117
+ assert_equal(20, t1[:hour_of_day])
118
+ assert_equal(15, t1[:minute])
119
+ assert_equal(01, t1[:second])
120
+ end
121
+
122
+
123
+ end
@@ -0,0 +1,33 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ # these tests are ICU 3.4 dependent
4
+ class UCollatorTest < Test::Unit::TestCase
5
+ def test_strength
6
+ c = UCollator.new("root")
7
+ assert_equal(0, c.strcoll("a".u, "a".u))
8
+ assert_equal(1, c.strcoll("A".u, "a".u))
9
+ c.strength = UCollator::UCOL_SECONDARY
10
+ assert_equal(0, c.strcoll("A".u, "a".u))
11
+ end
12
+
13
+ def test_attrs
14
+ c = UCollator.new("root")
15
+ c[UCollator::UCOL_NUMERIC_COLLATION]= UCollator::UCOL_ON
16
+ ar = %w(100 10 20 30 200 300).map {|a| a.to_u }.sort {|a,b| c.strcoll(a,b)}.map {|s| s.to_s }
17
+ assert_equal(["10", "20", "30", "100", "200", "300"], ar)
18
+ c[UCollator::UCOL_NUMERIC_COLLATION]= UCollator::UCOL_OFF
19
+ ar = %w(100 10 20 30 200 300).map {|a| a.to_u }.sort {|a,b| c.strcoll(a,b)}.map {|s| s.to_s }
20
+ assert_equal( ["10", "100", "20", "200", "30", "300"], ar)
21
+ end
22
+
23
+ def test_sort_key
24
+ c = UCollator.new("root")
25
+ c[UCollator::UCOL_NUMERIC_COLLATION]= UCollator::UCOL_ON
26
+ ar = %w(100 10 20 30 200 300).sort_by {|a| c.sort_key(a.to_u) }
27
+ assert_equal(["10", "20", "30", "100", "200", "300"], ar)
28
+ c[UCollator::UCOL_NUMERIC_COLLATION]= UCollator::UCOL_OFF
29
+ ar = %w(100 10 20 30 200 300).sort_by {|a| c.sort_key(a.to_u) }
30
+ assert_equal( ["10", "100", "20", "200", "30", "300"], ar)
31
+ end
32
+
33
+ end
@@ -0,0 +1,72 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ # these tests are ICU 3.4 dependent
4
+ class UConverterTest < Test::Unit::TestCase
5
+
6
+ def test_a_new_and_name
7
+ c = UConverter.new("UTF8")
8
+ assert_kind_of( UConverter, c)
9
+ assert_equal('UTF-8', c.name)
10
+ end
11
+
12
+ def test_b_list_avail
13
+ a = UConverter.list_available
14
+ assert_kind_of(Array, a)
15
+ assert(a.include?("UTF-8"))
16
+ end
17
+
18
+ def test_c_all_names
19
+ a = UConverter.all_names
20
+ assert_kind_of(Array, a)
21
+ assert(a.include?("UTF-8"))
22
+ end
23
+
24
+ def test_d_std_names
25
+ a = UConverter.std_names("koi8r", "MIME")
26
+ assert_kind_of(Array, a)
27
+ assert(a.include?("KOI8-R"))
28
+ a = UConverter.std_names("cp1251", "IANA")
29
+ assert_kind_of(Array, a)
30
+ assert(a.include?("windows-1251"))
31
+ end
32
+
33
+ def test_e_convert_class_method
34
+ a_s = "\357\360\356\342\345\360\352\340 abcd"
35
+ u_s = UConverter.convert("utf8", "cp1251", a_s)
36
+ assert_equal("проверка abcd", u_s)
37
+ r_s = UConverter.convert("cp1251", "utf8", u_s)
38
+ assert_equal(r_s, a_s)
39
+ end
40
+
41
+ def test_f_to_from_u
42
+ c = UConverter.new("cp1251")
43
+ a_s = "\357\360\356\342\345\360\352\340 abcd"
44
+ u_s = c.to_u(a_s)
45
+ assert_kind_of(UString, u_s)
46
+ r_s = c.from_u(u_s)
47
+ assert_equal(r_s, a_s)
48
+ end
49
+
50
+ def test_g_convert_instance_method
51
+ c1 = UConverter.new("EUC-JP")
52
+ c2 = UConverter.new("Cp1251")
53
+ a_s = "\247\322\247\335\247\361!"
54
+ b_s = a_s * 1000
55
+ a1 = UConverter.convert("Cp1251", "EUC-JP", b_s)
56
+ a2 = c1.convert(c2, b_s)
57
+ assert_equal(a1.size, a2.size)
58
+ assert_equal(a2.size, 4 * 1000)
59
+ assert_equal(a1, a2)
60
+ assert_equal("\341\353\377!", c1.convert(c2, a_s))
61
+ end
62
+
63
+ def test_h_subst_chars
64
+ c1 = UConverter.new("US-ASCII")
65
+ assert_kind_of(String, c1.subst_chars)
66
+ c1.subst_chars="!"
67
+ assert_equal( "I!t!rn!ti!n!liz!ti!n", c1.from_u("Iñtërnâtiônàlizætiøn".u))
68
+ c1.subst_chars=" "
69
+ assert_equal( "I t rn ti n liz ti n", c1.from_u("Iñtërnâtiônàlizætiøn".u))
70
+ end
71
+
72
+ end
@@ -0,0 +1,508 @@
1
+ require './icu4r'
2
+ require 'test/unit'
3
+ class UnicodeStringTest < Test::Unit::TestCase
4
+
5
+ def test_string
6
+ a = u("абвг", "utf8")
7
+ b = u("абвг", "utf8")
8
+ assert_equal(a,b )
9
+ end
10
+
11
+ def test_casecmp
12
+ assert_equal(0, u("Сцуко").casecmp("сЦуКо".u))
13
+ assert_equal(-1, u("Сцук").casecmp("сЦуКо".u))
14
+ assert_equal(1, u("Сцуко").casecmp("сЦуК".u))
15
+ end
16
+
17
+ def test_match
18
+ assert_match(ure("абвг"), u("абвг"))
19
+ assert("аавг".u !~ ure("^$"))
20
+ assert("авб\n".u !~ ure("^$"))
21
+ assert("абв".u !~ ure("^г*$"))
22
+ assert_equal("".u, ("абв".u =~ ure("г*$"))[0])
23
+ assert("".u =~ ure("^$"))
24
+ assert("абвабв".u =~ ure( ".*а") )
25
+ assert("абвабв".u =~ ure( ".*в") )
26
+ assert("абвабв".u =~ ure( ".*?а") )
27
+ assert("абвабв".u =~ ure( ".*?в") )
28
+ assert(ure("(.|\n)*?\n(б|\n)") =~ u("а\nб\n\n"))
29
+ end
30
+
31
+ def test_sub
32
+ x = "a.gif".u
33
+ assert_equal("gif".u, x.sub(ure(".*\\.([^\\.]+)$"), "$1".u))
34
+ assert_equal("b.gif".u, x.sub(ure(".*\\.([^\\.]+)$"), "b.$1".u))
35
+ assert_equal(x, "a.gif".u)
36
+ x.sub!(/gif/.U, ''.u)
37
+ assert_equal(x, "a.".u)
38
+ x= "1234561234".u
39
+ x.sub!(/123/.U, "".u)
40
+ assert_equal(x, "4561234".u)
41
+
42
+ end
43
+
44
+
45
+ def test_case_fold
46
+ assert_equal("А".u, "а".u.upcase!)
47
+ assert_equal("а".u, ("А".u.downcase!))
48
+
49
+ s = "аБв".u
50
+ s.upcase
51
+ assert_equal("аБв".u, s)
52
+ s.upcase!
53
+ assert_equal("АБВ".u, s)
54
+
55
+ s = "аБв".u
56
+ s.downcase
57
+ assert_equal("аБв".u, s)
58
+ s.downcase!
59
+ assert_equal("абв".u, s)
60
+ end
61
+
62
+ def test_index
63
+ assert_equal( "hello".u.rindex('e'.u), 1)
64
+ assert_equal( "hello".u.rindex('l'.u) , 3)
65
+ assert_equal( "hello".u.rindex('a'.u), nil)
66
+ assert_equal( "hello".u.index('e'.u),1)
67
+ assert_equal( "hello".u.index('lo'.u),3)
68
+ assert_equal( "hello".u.index('a'.u), nil)
69
+ assert_equal( "hello".u.index(ure('[aeiou]'), -3), 4)
70
+ assert_equal( "hello".u.rindex(ure('[aeiou]'), -2), 1)
71
+
72
+ assert_equal(1, S("hello").index(S("ell")))
73
+ assert_equal(2, S("hello").index(/ll./.U))
74
+
75
+ assert_equal(3, S("hello").index(S("l"), 3))
76
+ assert_equal(3, S("hello").index(/l./.U, 3))
77
+
78
+ assert_nil(S("hello").index(S("z"), 3))
79
+ assert_nil(S("hello").index(/z./.U, 3))
80
+
81
+ assert_nil(S("hello").index(S("z")))
82
+ assert_nil(S("hello").index(/z./.U))
83
+
84
+ end
85
+
86
+ def test_insert
87
+ assert_equal("abcd".u.insert(0, 'X'.u) , "Xabcd".u)
88
+ assert_equal("abcd".u.insert(3, 'X'.u) , "abcXd".u)
89
+ assert_equal("abcd".u.insert(4, 'X'.u) , "abcdX".u)
90
+ assert_equal("abcd".u.insert(-3, 'X'.u) , "abXcd".u)
91
+ assert_equal("abcd".u.insert(-1, 'X'.u) , "abcdX".u)
92
+ end
93
+
94
+ def test_include
95
+ assert( "hello".u.include?("lo".u))
96
+ assert(!("hello".u.include?("ol".u)))
97
+ end
98
+
99
+ def test_init
100
+ assert_equal( "нах!".u, UString.new("нах!".u))
101
+ a = "ГНУ!".u
102
+ a.replace("ФИГНУ!".u)
103
+ assert_equal(a, "ФИГНУ!".u)
104
+ assert_equal(a, a.clone)
105
+ end
106
+
107
+ def test_aref
108
+ a = "hello there".u
109
+ assert_equal('e'.u, a[1]) #=> 'e'
110
+ assert_equal('ell'.u, a[1,3]) #=> "ell"
111
+ assert_equal('ell'.u, a[1..3]) #=> "ell"
112
+ assert_equal('er'.u, a[-3,2]) #=> "er"
113
+ assert_equal('her'.u, a[-4..-2]) #=> "her"
114
+ assert_nil(a[12..-1]) #=> nil
115
+ assert_equal(''.u, a[-2..-4]) #=> ""
116
+ assert_equal('ell'.u, a[ure('[aeiou](.)\1')]) #=> "ell"
117
+ assert_equal('ell'.u, a[ure('[aeiou](.)\1'), 0]) #=> "ell"
118
+ assert_equal('l'.u, a[ure('[aeiou](l)\1'), 1]) #=> "l"
119
+ assert_nil( a[ure('[aeiou](.)$1'), 2]) #=> nil
120
+ assert_equal('lo'.u, a["lo".u]) #=> "lo"
121
+ assert_nil(a["bye".u]) #=> nil
122
+ end
123
+
124
+ def test_slice_bang
125
+ string = "this is a string".u
126
+ assert_equal(string.slice!(2) , 'i'.u)
127
+ assert_equal(string.slice!(3..6) , " is ".u)
128
+ assert_equal(string.slice!(ure("s.*t")) , "sa st".u)
129
+ assert_equal(string.slice!("r".u) , "r".u)
130
+ assert_equal(string , "thing".u)
131
+ a = "test".u
132
+ a[0] = "BEA".u
133
+ assert_equal("BEAest".u, a)
134
+ end
135
+
136
+ def test_gsub
137
+ assert_equal("hello".u.gsub(ure("[aeiou]"), '*'.u) , "h*ll*".u)
138
+ assert_equal("hello".u.gsub(ure("([aeiou])"), '<$1>'.u) , "h<e>ll<o>".u)
139
+ i = 0
140
+ assert_equal("12345".u , "hello".u.gsub(ure(".")) {|s| i+=1; i.to_s})
141
+ assert_equal("214365".u, "123456".u.gsub(ure("(.)(.)")) {|s| s[2] + s[1] })
142
+ a = "test".u
143
+ a.gsub!(/t/.U, a)
144
+ assert_equal("testestest".u, a)
145
+ end
146
+
147
+ def test_ure_case_eq
148
+ a = "HELLO".u
149
+ v = case a
150
+ when ure("^[a-z]*$"); "Lower case"
151
+ when ure("^[A-Z]*$"); "Upper case"
152
+ else; "Mixed case"
153
+ end
154
+ assert_equal('Upper case', v)
155
+ end
156
+
157
+ # UString::strcoll("ÆSS".u, "AEß".u, "de", 0)
158
+ def test_empty
159
+ assert(! "hello".u.empty?)
160
+ assert("".empty?)
161
+ assert("test".u.clear.empty?)
162
+ assert(" \t\n".u.strip.empty?)
163
+ end
164
+
165
+ def test_clear
166
+ a = "test".u
167
+ a.clear
168
+ assert_equal(0, a.length)
169
+ end
170
+
171
+ def test_length
172
+ assert_equal(10, "12345АБВГД".u.length)
173
+ assert_equal(0,"".u.length)
174
+ assert_equal(3,"abc".u.length)
175
+ end
176
+
177
+ def test_replace
178
+ s = "hello".u
179
+ s.replace("world".u)
180
+ assert_equal(s, "world".u)
181
+ end
182
+
183
+ def test_cmp
184
+ assert_equal("абвгде".u <=> "абвгд".u , 1 )
185
+ assert_equal("абвгде".u <=> "абвгде".u , 0 )
186
+ assert_equal("абвгде".u <=> "абвгдеж".u , -1 )
187
+ assert_equal("абвгде".u <=> "АБВГДЕ".u , -1 ) # UCA
188
+ end
189
+
190
+ def test_plus
191
+ assert_equal("сложение".u, "сло".u + "жение".u)
192
+ end
193
+
194
+ def test_times
195
+ assert_equal("ААААА".u, "А".u * 5)
196
+ end
197
+
198
+ def test_concat
199
+ assert_equal("сложение".u, "сло".u << "жение".u)
200
+ assert_equal("сложение".u, "сло".u.concat("жение".u))
201
+ a = "сло".u
202
+ a << "жение".u
203
+ assert_equal("сложение".u, a)
204
+ end
205
+
206
+ def test_search
207
+ a = "A quick brown fox jumped over the lazy fox dancing foxtrote".u
208
+ assert_equal(a.search("fox".u) , [14..16, 39..41, 51..53])
209
+ assert_equal(a.search("FoX".u) , [])
210
+ assert_equal(a.search("FoX".u, :ignore_case => true) , [14..16, 39..41, 51..53])
211
+ assert_equal(a.search("FoX".u, :ignore_case => true, :whole_words => true) , [14..16, 39..41])
212
+ assert_equal(a.search("FoX".u, :ignore_case => true, :whole_words => true, :limit => 1) , [14..16])
213
+
214
+ b = "Iñtërnâtiônàlizætiøn îs cọmpłèx".u.upcase
215
+ assert_equal(b, "IÑTËRNÂTIÔNÀLIZÆTIØN ÎS CỌMPŁÈX".u)
216
+ assert_equal(b.search("nâtiôn".u, :locale => "en") , [])
217
+ assert_equal(b.search("nation".u) , [])
218
+ assert_equal(b.search("nation".u, :locale => "en", :ignore_case_accents => true) , [5..10])
219
+ assert_equal(b.search("nâtiôn".u, :locale => "en", :ignore_case => true) , [5..10])
220
+ assert_equal(b.search("zaeti".u, :locale => "en" ) , [])
221
+ assert_equal(b.search("zaeti".u, :locale => "en", :ignore_case => true) , [])
222
+ assert_equal(b.search("zaeti".u, :locale => "en", :ignore_case_accents => true) , [14..17])
223
+ assert_equal("İSTANBUL".u.search("istanbul".u, :locale => 'tr', :ignore_case => true), [0..7])
224
+ assert_equal("ёжий".u.norm_D.search("ЕЖИЙ".u, :locale => 'ru', :canonical => true, :ignore_case_accents => true), [0..4])
225
+ end
226
+
227
+ def test_dollar_sign_regexp
228
+ assert_equal("te$et".u, "test".u.gsub(/s/.U, '$e'.u))
229
+ end
230
+
231
+ def test_codepoints
232
+ a=[0x01234, 0x0434, 0x1D7D9, ?t, ?e, ?s]
233
+ b=a.pack("U*").u
234
+ assert_equal(a, b.codepoints)
235
+ assert_equal(b, a.to_u)
236
+ end
237
+
238
+ def test_chars
239
+ chr = ["I", "Ñ", "T", "Ë", "R", "N", "Â", "T", "I", "Ô", "N", "À", "L", "I", "Z", "Æ", "T", "I", "Ø", "N" ]
240
+ chr = chr.collect {|s| s.to_u.norm_C}
241
+ assert_equal(chr, "Iñtërnâtiônàlizætiøn".u.upcase.norm_D.chars)
242
+
243
+ end
244
+
245
+
246
+ def test_fmt
247
+ assert_equal("b a".u, "{1} {0}".u.fmt("en", "a".u, "b".u))
248
+ assert_equal("12,345.56".u, "{0, number}".u.fmt("en", 12345.56))
249
+ assert_equal("$12,345.56".u, "{0, number, currency}".u.fmt("en_US", 12345.56))
250
+ assert_equal("20:15:01 13/01/2006".u, "{0,date,HH:mm:ss dd/MM/yyyy}".u.fmt("en", Time.local(2006,"jan",13,20,15,1)))
251
+ end
252
+
253
+ def test_norm
254
+ v="Iñtërnâtiônàlizætiøn".u
255
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_C)
256
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_D)
257
+ assert_equal("Iñtërnâtiônàlizætiøn".u, v.norm_D.norm_FCD)
258
+ assert_equal("Iñtërnâtiônàlizætiøn".u,v.norm_D.norm_KC)
259
+ end
260
+
261
+ def test_scan
262
+ a = "cruel world".u
263
+ assert_equal(a.scan(/\w+/.U) ,["cruel".u , "world".u ])
264
+ assert_equal(a.scan(/.../.U) ,["cru".u , "el ".u , "wor".u ])
265
+ assert_equal(a.scan(/(...)/.U) ,["cru".u , "el ".u , "wor".u ])
266
+ assert_equal(a.scan(/(..)(..)/.U) ,[["cr".u , "ue".u ], ["l ".u , "wo".u ]] )
267
+ end
268
+ def S(str)
269
+ str.to_u
270
+ end
271
+ def test_split
272
+ re = URegexp.new("[,:/]".u)
273
+ assert_equal(["split test".u , "west".u , "best".u , "east".u ], re.split("split test,west:best/east".u, nil))
274
+ assert_equal(["split test".u, "west:best/east".u], re.split("split test,west:best/east".u, 2))
275
+ assert_equal([S("a"), S("b"), S("c")], S("a b\t c").split(S("\\s+")))
276
+ assert_equal([S(" a "), S(" b "), S(" c ")], S(" a | b | c ").split(S("\\|")))
277
+ assert_equal([S("a"), S("b"), S("c")], S("aXXbXXcXX").split(/X./.U))
278
+ assert_equal([S("a|b|c")], S("a|b|c").split(S('\|'), 1))
279
+ assert_equal([S("a"), S("b|c")], S("a|b|c").split(S('\|'), 2))
280
+ assert_equal([S("a"), S("b"), S("c")], S("a|b|c").split(S('\|'), 3))
281
+ assert_equal([S("a"), S("b"), S("c")], S("a|b|c|").split(S('\|'), -1))
282
+ assert_equal([S("a"), S("b"), S("c"), S("") ], S("a|b|c||").split(S('\|'), -1))
283
+ assert_equal([S("a"), S(""), S("b"), S("c")], S("a||b|c|").split(S('\|'), -1))
284
+ end
285
+
286
+
287
+ def test_strcoll
288
+ assert_equal(0, UString::strcoll("a".u, "a".u))
289
+ assert_equal(-1, UString::strcoll("y".u, "k".u, "lv"))
290
+ assert_equal(1, UString::strcoll("я".u, "а".u))
291
+ assert_equal(1, UString::strcoll("я".u, "А".u, "ru"))
292
+ assert_equal(0, UString::strcoll("İSTANBUL".u, "istanbul".u, "tr", 0))
293
+ assert_equal(0, UString::strcoll("ой её".u, "ОЙ ЕЁ".u, "ru", 1))
294
+ end
295
+
296
+ def test_gsub_block
297
+ a = "АБРАКАДАБРА".u
298
+ r = URegexp.new("(.)(.)(А)".u, URegexp::IGNORECASE)
299
+ b = a.gsub(r) do |m|
300
+ assert_equal("ава".u, "бравада".u.gsub(r) {|v| v[3]} )
301
+ m[3] + m[2] + m[1]
302
+ end
303
+ assert_equal("ААРБКАДААРБ".u, b)
304
+ end
305
+
306
+ def test_match_range
307
+ t = "test\ntext".u
308
+ m = (t =~ /^.+$/m.U)
309
+ assert_equal('test'.u, m[0])
310
+ assert_equal(0..3, m.range(0))
311
+ end
312
+
313
+ def test_resbundle
314
+ b = UResourceBundle.open(nil, "en")
315
+ assert_equal("Russia".u, b["Countries"]["RU"])
316
+ b = UResourceBundle.open(nil, "ru")
317
+ assert_equal("Россия".u, b["Countries"]["RU"])
318
+
319
+ end
320
+
321
+ def test_translit
322
+ assert_equal('zees ees A tfs t'.u, "This is A test".u.translit("null".u, "a>b;b>c;c>d;d>e;e>f;i>ee;[Tt]h>z;t>\\ t".u))
323
+ assert_equal("matsumoto yukihiro".u.translit("Latin-Hiragana".u), "まつもと ゆきひろ".u)
324
+ end
325
+
326
+ def test_parse_double
327
+ assert_equal(456, "456".u.to_f)
328
+ assert_equal("123,001".u.to_f("ru"), 123.001)
329
+ assert_equal("123,001".u.to_f("en"), 123001.0)
330
+ assert_equal("Got 123,001".u.to_f("en", "Got ###,###".u), 123001)
331
+ assert_equal(123.45, "١٢٣٫٤٥".u.to_f("ar_YE"))
332
+ end
333
+
334
+ def test_unescape
335
+ a = '\u0054\u0068\u0069\u0073\u0020\u0069\u0073\u0020\u0041\u0020\u0074\u0065\u0073\u0074\n!'
336
+ assert_equal("This is A test\n!", a.u.unescape.to_s)
337
+ end
338
+
339
+ def test_ranges
340
+ v = "\\x{1D7D9}\\x{1d7da}\\x{1d7db}!".u.unescape
341
+ assert_equal(7, v.length)
342
+ assert_equal(4, v.point_count)
343
+ assert_equal(0..0, v.conv_unit_range(0..1))
344
+ assert_equal(0..1, v.conv_unit_range(0..2))
345
+ assert_equal(0..3, v.conv_unit_range(0..-1))
346
+ assert_equal(2..3, v.conv_unit_range(-3..-1))
347
+
348
+ assert_equal(0..3, v.conv_point_range(0..1))
349
+ assert_equal(0..5, v.conv_point_range(0..2))
350
+ assert_equal(0..6, v.conv_point_range(0..-1))
351
+ assert_equal(4..6, v.conv_point_range(-2..-1))
352
+ end
353
+
354
+ def test_char_span
355
+ v = "ЁРШ ТВОЙУ МЕДДЬ".u.norm_D
356
+ assert_equal("ЁРШ".u, v.char_span(0,3))
357
+ assert_equal('\u0415\u0308\u0420'.u.unescape, v[0,3])
358
+ assert_equal(v.norm_C, v.char_span(0,-1))
359
+ end
360
+
361
+ def test_sentinel_bug
362
+ ("test" * 10).u.gsub(/e/.U, 'abracadabra'.u)
363
+ end
364
+
365
+ def test_string_change
366
+ a = " 123456789Aa ".u
367
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.downcase!; m} };
368
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
369
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.upcase!; m} }
370
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
371
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.lstrip!; m} }
372
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
373
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.rstrip!; m} }
374
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
375
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.strip!; m} }
376
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
377
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!(/Aa/.U); m} }
378
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
379
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!("Aa".u); m} }
380
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
381
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.slice!(3,5); m} }
382
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
383
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.sub!(/Aa/.U, "BUG!".u); m} }
384
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
385
+ assert_raise(RuntimeError) { a.gsub!(/\d/.U) { |m| a.gsub!(/Aa/.U, "BUG!".u); m} }
386
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
387
+ assert_raise(RuntimeError) { a.scan(/\d/.U) { |m| a.gsub!(/Aa/.U, "BUG!".u); m} }
388
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
389
+ assert_raise(RuntimeError) { a.each_char { |m| a[2]= "BUG!".u } }
390
+ assert_equal(" 123456789Aa ".u , a); a = a.clone
391
+
392
+ end
393
+ def test_1_to_u_to_s
394
+ assert_equal(
395
+ "\355\350\367\345\343\356 \355\345 \360\340\341\356\362\340\345\362 :( ?".to_u("cp-1251").to_s("utf-8"),
396
+ "\320\275\320\270\321\207\320\265\320\263\320\276 \320\275\320\265 \321\200\320\260\320\261\320\276\321\202\320\260\320\265\321\202 :( ?")
397
+ end
398
+
399
+ def test_nested_blocks
400
+ a = "Модифицируемые строки иногда напрягают :)".u
401
+ b = "".u
402
+ assert_nothing_raised {
403
+ a.scan(/./.U) { |s|
404
+ b = a.gsub(ure('и')) { |m|
405
+ t = m[0] + "".u
406
+ a.each_char { |c|
407
+ t << c if c == 'о' .u
408
+ }
409
+ t
410
+ }
411
+ }
412
+ }
413
+ assert_equal("Модиооофиоооциоооруемые строкиооо иоооногда напрягают :)".u, b)
414
+ end
415
+
416
+ def test_AREF # '[]'
417
+ assert_equal(S("A"), S("AooBar")[0])
418
+ assert_equal(S("B"), S("FooBaB")[-1])
419
+ assert_equal(nil, S("FooBar")[6])
420
+ assert_equal(nil, S("FooBar")[-7])
421
+
422
+ assert_equal(S("Foo"), S("FooBar")[0,3])
423
+ assert_equal(S("Bar"), S("FooBar")[-3,3])
424
+ assert_equal(S(""), S("FooBar")[6,2])
425
+ assert_equal(nil, S("FooBar")[-7,10])
426
+
427
+ assert_equal(S("Foo"), S("FooBar")[0..2])
428
+ assert_equal(S("Foo"), S("FooBar")[0...3])
429
+ assert_equal(S("Bar"), S("FooBar")[-3..-1])
430
+ assert_equal(S(""), S("FooBar")[6..2])
431
+ assert_equal(nil, S("FooBar")[-10..-7])
432
+
433
+ assert_equal(S("Foo"), S("FooBar")[/^F../.U])
434
+ assert_equal(S("Bar"), S("FooBar")[/..r$/.U])
435
+ assert_equal(nil, S("FooBar")[/xyzzy/.U])
436
+ assert_equal(nil, S("FooBar")[/plugh/.U])
437
+
438
+ assert_equal(S("Foo"), S("FooBar")[S("Foo")])
439
+ assert_equal(S("Bar"), S("FooBar")[S("Bar")])
440
+ assert_equal(nil, S("FooBar")[S("xyzzy")])
441
+ assert_equal(nil, S("FooBar")[S("plugh")])
442
+
443
+ assert_equal(S("Foo"), S("FooBar")[/([A-Z]..)([A-Z]..)/.U, 1])
444
+ assert_equal(S("Bar"), S("FooBar")[/([A-Z]..)([A-Z]..)/.U, 2])
445
+ assert_equal(nil, S("FooBar")[/([A-Z]..)([A-Z]..)/.U, 3])
446
+ assert_equal(S("Bar"), S("FooBar")[/([A-Z]..)([A-Z]..)/.U, -1])
447
+ assert_equal(S("Foo"), S("FooBar")[/([A-Z]..)([A-Z]..)/.U, -2])
448
+ assert_equal(nil, S("FooBar")[ure("([A-Z]..)([A-Z]..)"), -3])
449
+ end
450
+
451
+ def test_ASET # '[]='
452
+ s = S("FooBar")
453
+ s[0] = S('A')
454
+ assert_equal(S("AooBar"), s)
455
+
456
+ s[-1]= S('B')
457
+ assert_equal(S("AooBaB"), s)
458
+ assert_raise(IndexError) { s[-7] = S("xyz") }
459
+ assert_equal(S("AooBaB"), s)
460
+ s[0] = S("ABC")
461
+ assert_equal(S("ABCooBaB"), s)
462
+
463
+ s = S("FooBar")
464
+ s[0,3] = S("A")
465
+ assert_equal(S("ABar"),s)
466
+ s[0] = S("Foo")
467
+ assert_equal(S("FooBar"), s)
468
+ s[-3,3] = S("Foo")
469
+ assert_equal(S("FooFoo"), s)
470
+ assert_raise (IndexError) { s[7,3] = S("Bar") }
471
+ assert_raise (IndexError) { s[-7,3] = S("Bar") }
472
+
473
+ s = S("FooBar")
474
+ s[0..2] = S("A")
475
+ assert_equal(S("ABar"), s)
476
+ s[1..3] = S("Foo")
477
+ assert_equal(S("AFoo"), s)
478
+ s[-4..-4] = S("Foo")
479
+ assert_equal(S("FooFoo"), s)
480
+ assert_raise (RangeError) { s[7..10] = S("Bar") }
481
+ assert_raise (RangeError) { s[-7..-10] = S("Bar") }
482
+
483
+ s = S("FooBar")
484
+ s[/^F../.U]= S("Bar")
485
+ assert_equal(S("BarBar"), s)
486
+ s[/..r$/.U] = S("Foo")
487
+ assert_equal(S("BarFoo"), s)
488
+
489
+ s[/([A-Z]..)([A-Z]..)/.U, 1] = S("Foo")
490
+ assert_equal(S("FooFoo"), s)
491
+ s[/([A-Z]..)([A-Z]..)/.U, 2] = S("Bar")
492
+ assert_equal(S("FooBar"), s)
493
+ assert_raise (IndexError) { s[/([A-Z]..)([A-Z]..)/.U, 3] = "None" }
494
+ s[ure("([A-Z]..)([A-Z]..)"), -1] = S("Foo")
495
+ assert_equal(S("FooFoo"), s)
496
+ s[/([A-Z]..)([A-Z]..)/.U, -2] = S("Bar")
497
+ assert_equal(S("BarFoo"), s)
498
+ # assert_raise (IndexError) { s[/([A-Z]..)([A-Z]..)/.U, -3] = "None" }
499
+
500
+ s = S("FooBar")
501
+ s[S("Foo")] = S("Bar")
502
+ assert_equal(S("BarBar"), s)
503
+
504
+ s = S("a string")
505
+ s[0..s.size] = S("another string")
506
+ assert_equal(S("another string"), s)
507
+ end
508
+ end