u 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/README +38 -0
  2. data/Rakefile +64 -0
  3. data/ext/encoding/character/utf-8/break.c +25 -0
  4. data/ext/encoding/character/utf-8/data/break.h +22931 -0
  5. data/ext/encoding/character/utf-8/data/character-tables.h +14358 -0
  6. data/ext/encoding/character/utf-8/data/compose.h +1607 -0
  7. data/ext/encoding/character/utf-8/data/decompose.h +10926 -0
  8. data/ext/encoding/character/utf-8/data/generate-unicode-data.rb +1070 -0
  9. data/ext/encoding/character/utf-8/decompose.c +444 -0
  10. data/ext/encoding/character/utf-8/depend +65 -0
  11. data/ext/encoding/character/utf-8/extconf.rb +67 -0
  12. data/ext/encoding/character/utf-8/private.c +62 -0
  13. data/ext/encoding/character/utf-8/private.h +51 -0
  14. data/ext/encoding/character/utf-8/properties.c +1056 -0
  15. data/ext/encoding/character/utf-8/rb_includes.h +19 -0
  16. data/ext/encoding/character/utf-8/rb_methods.h +49 -0
  17. data/ext/encoding/character/utf-8/rb_private.h +52 -0
  18. data/ext/encoding/character/utf-8/rb_utf_aref.c +111 -0
  19. data/ext/encoding/character/utf-8/rb_utf_aset.c +105 -0
  20. data/ext/encoding/character/utf-8/rb_utf_casecmp.c +24 -0
  21. data/ext/encoding/character/utf-8/rb_utf_chomp.c +114 -0
  22. data/ext/encoding/character/utf-8/rb_utf_chop.c +44 -0
  23. data/ext/encoding/character/utf-8/rb_utf_collate.c +13 -0
  24. data/ext/encoding/character/utf-8/rb_utf_count.c +30 -0
  25. data/ext/encoding/character/utf-8/rb_utf_delete.c +60 -0
  26. data/ext/encoding/character/utf-8/rb_utf_downcase.c +13 -0
  27. data/ext/encoding/character/utf-8/rb_utf_each_char.c +27 -0
  28. data/ext/encoding/character/utf-8/rb_utf_foldcase.c +13 -0
  29. data/ext/encoding/character/utf-8/rb_utf_hex.c +14 -0
  30. data/ext/encoding/character/utf-8/rb_utf_index.c +50 -0
  31. data/ext/encoding/character/utf-8/rb_utf_insert.c +48 -0
  32. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.c +332 -0
  33. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.h +12 -0
  34. data/ext/encoding/character/utf-8/rb_utf_internal_tr.c +142 -0
  35. data/ext/encoding/character/utf-8/rb_utf_internal_tr.h +41 -0
  36. data/ext/encoding/character/utf-8/rb_utf_justify.c +96 -0
  37. data/ext/encoding/character/utf-8/rb_utf_length.c +14 -0
  38. data/ext/encoding/character/utf-8/rb_utf_lstrip.c +41 -0
  39. data/ext/encoding/character/utf-8/rb_utf_normalize.c +51 -0
  40. data/ext/encoding/character/utf-8/rb_utf_oct.c +14 -0
  41. data/ext/encoding/character/utf-8/rb_utf_reverse.c +13 -0
  42. data/ext/encoding/character/utf-8/rb_utf_rindex.c +88 -0
  43. data/ext/encoding/character/utf-8/rb_utf_rstrip.c +51 -0
  44. data/ext/encoding/character/utf-8/rb_utf_squeeze.c +70 -0
  45. data/ext/encoding/character/utf-8/rb_utf_strip.c +27 -0
  46. data/ext/encoding/character/utf-8/rb_utf_to_i.c +25 -0
  47. data/ext/encoding/character/utf-8/rb_utf_tr.c +250 -0
  48. data/ext/encoding/character/utf-8/rb_utf_upcase.c +13 -0
  49. data/ext/encoding/character/utf-8/tables.h +38 -0
  50. data/ext/encoding/character/utf-8/unicode.c +319 -0
  51. data/ext/encoding/character/utf-8/unicode.h +216 -0
  52. data/ext/encoding/character/utf-8/utf.c +1334 -0
  53. data/lib/encoding/character/utf-8.rb +201 -0
  54. data/lib/u.rb +16 -0
  55. data/lib/u/string.rb +185 -0
  56. data/lib/u/version.rb +5 -0
  57. data/test/unit/u.rb +5 -0
  58. data/test/unit/u/string.rb +91 -0
  59. metadata +174 -0
@@ -0,0 +1,201 @@
1
+ # contents: UTF-8 String methods.
2
+ #
3
+ # Copyright © 2006 Nikolai Weibull <now@bitwi.se>
4
+
5
+ require 'encoding/character/utf-8/utf8'
6
+
7
+ # TODO: Rework this to use a dispatch object instead, so that the encoding can
8
+ # be changed on the fly.
9
+ # TODO: Add String#encoding.
10
+ module U::String
11
+ def self.def_thunk_replacing_variant(method)
12
+ define_method(:"#{method}!") do
13
+ replace(send(method))
14
+ end
15
+ end
16
+
17
+ def <=>(other)
18
+ Encoding::Character::UTF8.collate(self, other)
19
+ end
20
+
21
+ def [](*args)
22
+ Encoding::Character::UTF8.aref(self, *args)
23
+ end
24
+
25
+ def slice(*args)
26
+ Encoding::Character::UTF8.aref(self, *args)
27
+ end
28
+
29
+ def []=(*args)
30
+ Encoding::Character::UTF8.aset(self, *args)
31
+ end
32
+
33
+ def casecmp(other)
34
+ Encoding::Character::UTF8.casecmp(self, other)
35
+ end
36
+
37
+ def center(*args)
38
+ Encoding::Character::UTF8.center(self, *args)
39
+ end
40
+
41
+ def chomp(*args)
42
+ Encoding::Character::UTF8.chomp(self, *args)
43
+ end
44
+
45
+ def chomp!(*args)
46
+ Encoding::Character::UTF8.chomp!(self, *args)
47
+ end
48
+
49
+ def chop
50
+ Encoding::Character::UTF8.chop(self)
51
+ end
52
+
53
+ def chop!
54
+ Encoding::Character::UTF8.chop!(self)
55
+ end
56
+
57
+ def count(*args)
58
+ Encoding::Character::UTF8.count(self, *args)
59
+ end
60
+
61
+ def delete(*args)
62
+ Encoding::Character::UTF8.delete(self, *args)
63
+ end
64
+
65
+ def delete!(*args)
66
+ Encoding::Character::UTF8.delete!(self, *args)
67
+ end
68
+
69
+ def downcase
70
+ Encoding::Character::UTF8.downcase(self)
71
+ end
72
+ def_thunk_replacing_variant :downcase
73
+
74
+ def each_char(&block)
75
+ Encoding::Character::UTF8.each_char(self, &block)
76
+ end
77
+
78
+ def index(*args)
79
+ Encoding::Character::UTF8.index(self, *args)
80
+ end
81
+
82
+ def insert(index, other)
83
+ Encoding::Character::UTF8.insert(self, index, other)
84
+ end
85
+
86
+ def length
87
+ Encoding::Character::UTF8.length(self)
88
+ end
89
+
90
+ def lstrip
91
+ Encoding::Character::UTF8.lstrip(self)
92
+ end
93
+
94
+ def lstrip!
95
+ Encoding::Character::UTF8.lstrip!(self)
96
+ end
97
+
98
+ def normalize(*args)
99
+ Encoding::Character::UTF8.normalize(self, *args)
100
+ end
101
+
102
+ def rindex(*args)
103
+ Encoding::Character::UTF8.rindex(self, *args)
104
+ end
105
+
106
+ def rstrip
107
+ Encoding::Character::UTF8.rstrip(self)
108
+ end
109
+
110
+ def rstrip!
111
+ Encoding::Character::UTF8.rstrip!(self)
112
+ end
113
+
114
+ def reverse
115
+ Encoding::Character::UTF8.reverse(self)
116
+ end
117
+ def_thunk_replacing_variant :reverse
118
+
119
+ def squeeze
120
+ Encoding::Character::UTF8.squeeze(self)
121
+ end
122
+
123
+ def squeeze!
124
+ Encoding::Character::UTF8.squeeze!(self)
125
+ end
126
+
127
+ def strip
128
+ Encoding::Character::UTF8.strip(self)
129
+ end
130
+
131
+ def strip!
132
+ Encoding::Character::UTF8.strip!(self)
133
+ end
134
+
135
+ def to_i(*args)
136
+ Encoding::Character::UTF8.to_i(self, *args)
137
+ end
138
+
139
+ def tr(from, to)
140
+ Encoding::Character::UTF8.tr(self, from, to)
141
+ end
142
+
143
+ def tr!(from, to)
144
+ replace(tr(from, to))
145
+ end
146
+
147
+ def tr_s(from, to)
148
+ Encoding::Character::UTF8.tr_s(self, from, to)
149
+ end
150
+
151
+ def tr_s!(from, to)
152
+ replace(tr_s(from, to))
153
+ end
154
+
155
+ def inspect
156
+ "u#{_inspect}"
157
+ end
158
+
159
+ def ljust(*args)
160
+ Encoding::Character::UTF8.ljust(self, *args)
161
+ end
162
+
163
+ def rjust(*args)
164
+ Encoding::Character::UTF8.rjust(self, *args)
165
+ end
166
+
167
+ def upcase
168
+ Encoding::Character::UTF8.upcase(self)
169
+ end
170
+ def_thunk_replacing_variant :upcase
171
+
172
+ def capitalize
173
+ self[0].upcase + self[1..-1].downcase
174
+ end
175
+ def_thunk_replacing_variant :capitalize
176
+
177
+ def foldcase
178
+ Encoding::Character::UTF8.foldcase(self)
179
+ end
180
+ def_thunk_replacing_variant :foldcase
181
+
182
+ private
183
+
184
+ Inspect = String.instance_method(:inspect)
185
+
186
+ def _inspect
187
+ Inspect.bind(self).call
188
+ end
189
+ end
190
+
191
+ class String
192
+ def +@
193
+ self.extend(Encoding::Character::UTF8::Methods)
194
+ end
195
+ end
196
+
197
+ module Kernel
198
+ def u(str)
199
+ str.extend(Encoding::Character::UTF8::Methods)
200
+ end
201
+ end
data/lib/u.rb ADDED
@@ -0,0 +1,16 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module U
4
+ require 'u/string'
5
+ require 'u/version'
6
+ end
7
+
8
+ class String
9
+ def u
10
+ dup.u!
11
+ end
12
+
13
+ def u!
14
+ self.extend(U::String)
15
+ end
16
+ end
data/lib/u/string.rb ADDED
@@ -0,0 +1,185 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'encoding/character/utf-8/utf8'
4
+
5
+ module U::String
6
+ def <=>(other)
7
+ Encoding::Character::UTF8.collate(self, other)
8
+ end
9
+
10
+ def [](*args)
11
+ Encoding::Character::UTF8.aref(self, *args)
12
+ end
13
+
14
+ def slice(*args)
15
+ Encoding::Character::UTF8.aref(self, *args)
16
+ end
17
+
18
+ def []=(*args)
19
+ Encoding::Character::UTF8.aset(self, *args)
20
+ end
21
+
22
+ def casecmp(other)
23
+ Encoding::Character::UTF8.casecmp(self, other)
24
+ end
25
+
26
+ def center(*args)
27
+ Encoding::Character::UTF8.center(self, *args)
28
+ end
29
+
30
+ def chomp(*args)
31
+ Encoding::Character::UTF8.chomp(self, *args)
32
+ end
33
+
34
+ def chomp!(*args)
35
+ Encoding::Character::UTF8.chomp!(self, *args)
36
+ end
37
+
38
+ def chop
39
+ Encoding::Character::UTF8.chop(self)
40
+ end
41
+
42
+ def chop!
43
+ Encoding::Character::UTF8.chop!(self)
44
+ end
45
+
46
+ def count(*args)
47
+ Encoding::Character::UTF8.count(self, *args)
48
+ end
49
+
50
+ def delete(*args)
51
+ Encoding::Character::UTF8.delete(self, *args)
52
+ end
53
+
54
+ def delete!(*args)
55
+ Encoding::Character::UTF8.delete!(self, *args)
56
+ end
57
+
58
+ def downcase
59
+ Encoding::Character::UTF8.downcase(self)
60
+ end
61
+
62
+ def downcase!
63
+ replace(downcase)
64
+ end
65
+
66
+ def each_char(&block)
67
+ Encoding::Character::UTF8.each_char(self, &block)
68
+ end
69
+
70
+ def index(*args)
71
+ Encoding::Character::UTF8.index(self, *args)
72
+ end
73
+
74
+ def insert(index, other)
75
+ Encoding::Character::UTF8.insert(self, index, other)
76
+ end
77
+
78
+ def length
79
+ Encoding::Character::UTF8.length(self)
80
+ end
81
+
82
+ def lstrip
83
+ Encoding::Character::UTF8.lstrip(self)
84
+ end
85
+
86
+ def lstrip!
87
+ Encoding::Character::UTF8.lstrip!(self)
88
+ end
89
+
90
+ def normalize(*args)
91
+ Encoding::Character::UTF8.normalize(self, *args)
92
+ end
93
+
94
+ def rindex(*args)
95
+ Encoding::Character::UTF8.rindex(self, *args)
96
+ end
97
+
98
+ def rstrip
99
+ Encoding::Character::UTF8.rstrip(self)
100
+ end
101
+
102
+ def rstrip!
103
+ Encoding::Character::UTF8.rstrip!(self)
104
+ end
105
+
106
+ def reverse
107
+ Encoding::Character::UTF8.reverse(self)
108
+ end
109
+
110
+ def reverse!
111
+ replace(reverse)
112
+ end
113
+
114
+ def squeeze
115
+ Encoding::Character::UTF8.squeeze(self)
116
+ end
117
+
118
+ def squeeze!
119
+ Encoding::Character::UTF8.squeeze!(self)
120
+ end
121
+
122
+ def strip
123
+ Encoding::Character::UTF8.strip(self)
124
+ end
125
+
126
+ def strip!
127
+ Encoding::Character::UTF8.strip!(self)
128
+ end
129
+
130
+ def to_i(*args)
131
+ Encoding::Character::UTF8.to_i(self, *args)
132
+ end
133
+
134
+ def tr(from, to)
135
+ Encoding::Character::UTF8.tr(self, from, to)
136
+ end
137
+
138
+ def tr!(from, to)
139
+ replace(tr(from, to))
140
+ end
141
+
142
+ def tr_s(from, to)
143
+ Encoding::Character::UTF8.tr_s(self, from, to)
144
+ end
145
+
146
+ def tr_s!(from, to)
147
+ replace(tr_s(from, to))
148
+ end
149
+
150
+ def inspect
151
+ 'u%s' % super
152
+ end
153
+
154
+ def ljust(*args)
155
+ Encoding::Character::UTF8.ljust(self, *args)
156
+ end
157
+
158
+ def rjust(*args)
159
+ Encoding::Character::UTF8.rjust(self, *args)
160
+ end
161
+
162
+ def upcase
163
+ Encoding::Character::UTF8.upcase(self)
164
+ end
165
+
166
+ def upcase!
167
+ replace(upcase)
168
+ end
169
+
170
+ def capitalize
171
+ self[0].upcase + self[1..-1].downcase
172
+ end
173
+
174
+ def capitalize!
175
+ replace(capitalize)
176
+ end
177
+
178
+ def foldcase
179
+ Encoding::Character::UTF8.foldcase(self)
180
+ end
181
+
182
+ def foldcase!
183
+ replace(foldcase)
184
+ end
185
+ end
data/lib/u/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module U
4
+ Version = '0.5.0'
5
+ end
data/test/unit/u.rb ADDED
@@ -0,0 +1,5 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ Expectations do
4
+
5
+ end
@@ -0,0 +1,91 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ Expectations do
4
+ expect ''.u[0, -2].to.be.nil?
5
+ expect ''.u[0, -1].to.be.nil?
6
+
7
+ expect '' do ''.u[0, 0] end
8
+ expect '' do ''.u[0, 1] end
9
+ expect '' do ''.u[0, 2] end
10
+
11
+ expect ''.u[-1, -2].to.be.nil?
12
+ expect ''.u[-1, -1].to.be.nil?
13
+ expect ''.u[-1, 0].to.be.nil?
14
+ expect ''.u[-1, 1].to.be.nil?
15
+ expect ''.u[-1, 2].to.be.nil?
16
+ expect ''.u[1, -2].to.be.nil?
17
+ expect ''.u[1, -1].to.be.nil?
18
+ expect ''.u[1, 0].to.be.nil?
19
+ expect ''.u[1, 1].to.be.nil?
20
+ expect ''.u[1, 2].to.be.nil?
21
+
22
+ expect 'hë' do 'hëllö'.u[0, 2] end
23
+ expect 'lö' do 'hëllö'.u[3, 2] end
24
+
25
+ expect 0 do ''.u.count('whatever') end
26
+ expect 1 do 'helo'.u.count('l') end
27
+ expect 1 do 'helo'.u.count('wrld') end
28
+ expect 2 do 'helo'.u.count('world') end
29
+ expect 3 do 'hello'.u.count('world') end
30
+
31
+ expect ''.u.delete('whatever').to.be.empty?
32
+ expect 'hëll' do 'hëllö'.u.delete('ö') end
33
+
34
+ expect 0 do
35
+ i = 0
36
+ ''.u.each_char{ i += 1 }
37
+ i
38
+ end
39
+
40
+ expect %w[h ë l l ö] do
41
+ [].tap{ |cs| 'hëllö'.u.each_char{ |c| cs << c } }
42
+ end
43
+
44
+ expect 0 do ''.u.index('') end
45
+ expect nil do ''.u.index('', 1) end
46
+ expect nil do ''.u.index('', -1) end
47
+ expect 0 do 'hëllö'.u.index('hë') end
48
+ expect 3 do 'hëllö'.u.index('lö') end
49
+ expect 3 do 'hëllö'.u.index('lö', 3) end
50
+
51
+ expect '' do ''.u.insert(0, '') end
52
+ expect IndexError do ''.u.insert(1, '') end
53
+ expect 'äbc' do ''.u.insert(0, 'äbc') end
54
+ expect 'hëöll' do 'hëö'.u.insert(3, 'll') end
55
+ expect 'hëöll' do 'hëö'.u.insert(-1, 'll') end
56
+ expect 'hëllö' do 'hëö'.u.insert(2, 'll') end
57
+ expect 'hëllö' do 'hëö'.u.insert(-2, 'll') end
58
+ expect 'llhëö' do 'hëö'.u.insert(0, 'll') end
59
+ #expect 'llhëö' do 'hëö'.u.insert(-4, 'll') end
60
+
61
+ expect 0 do ''.u.length end
62
+ expect 5 do 'hëllö'.u.length end
63
+ expect 11 do "hëllö\0agäin".u.length end
64
+ # TODO: Why not throw an error here?
65
+ expect 11 do "hëllö\0agäin\303".u.length end
66
+
67
+ expect 0 do ''.u.rindex('') end
68
+ expect 0 do 'hëllö'.u.rindex('hë') end
69
+ expect 3 do 'hëllö'.u.rindex('lö') end
70
+ expect 3 do 'hëllö'.u.rindex('lö', 3) end
71
+ expect 5 do 'hëllölö'.u.rindex('lö') end
72
+ expect 5 do 'hëllölö'.u.rindex('lö', 5) end
73
+ expect 3 do 'hëllölö'.u.rindex('lö', 4) end
74
+
75
+ expect ''.u.squeeze.to.be.empty?
76
+ expect 'hëlö' do 'hëllö'.u.squeeze end
77
+
78
+ expect 0 do ''.u.to_i end
79
+ expect 0 do ''.u.to_i(0) end
80
+ expect ArgumentError do ''.u.to_i(-1) end
81
+ expect ArgumentError do ''.u.to_i(1) end
82
+ expect ArgumentError do ''.u.to_i(37) end
83
+ expect 1 do '1'.u.to_i end
84
+ expect 1 do '1'.u.to_i(2) end
85
+ expect 1 do '١'.u.to_i end
86
+
87
+ expect ''.u.tr('abc', 'def').to.be.empty?
88
+ expect 'abcde' do 'äbcdë'.u.tr('äë', 'ae') end
89
+ expect 'ëëëëë' do 'äbcdë'.u.tr('a-zäë', 'ë') end
90
+ expect 'ëëëëë' do 'aaaaa'.u.tr('a', 'ä-ë') end
91
+ end