u 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/README +38 -0
  2. data/Rakefile +64 -0
  3. data/ext/encoding/character/utf-8/break.c +25 -0
  4. data/ext/encoding/character/utf-8/data/break.h +22931 -0
  5. data/ext/encoding/character/utf-8/data/character-tables.h +14358 -0
  6. data/ext/encoding/character/utf-8/data/compose.h +1607 -0
  7. data/ext/encoding/character/utf-8/data/decompose.h +10926 -0
  8. data/ext/encoding/character/utf-8/data/generate-unicode-data.rb +1070 -0
  9. data/ext/encoding/character/utf-8/decompose.c +444 -0
  10. data/ext/encoding/character/utf-8/depend +65 -0
  11. data/ext/encoding/character/utf-8/extconf.rb +67 -0
  12. data/ext/encoding/character/utf-8/private.c +62 -0
  13. data/ext/encoding/character/utf-8/private.h +51 -0
  14. data/ext/encoding/character/utf-8/properties.c +1056 -0
  15. data/ext/encoding/character/utf-8/rb_includes.h +19 -0
  16. data/ext/encoding/character/utf-8/rb_methods.h +49 -0
  17. data/ext/encoding/character/utf-8/rb_private.h +52 -0
  18. data/ext/encoding/character/utf-8/rb_utf_aref.c +111 -0
  19. data/ext/encoding/character/utf-8/rb_utf_aset.c +105 -0
  20. data/ext/encoding/character/utf-8/rb_utf_casecmp.c +24 -0
  21. data/ext/encoding/character/utf-8/rb_utf_chomp.c +114 -0
  22. data/ext/encoding/character/utf-8/rb_utf_chop.c +44 -0
  23. data/ext/encoding/character/utf-8/rb_utf_collate.c +13 -0
  24. data/ext/encoding/character/utf-8/rb_utf_count.c +30 -0
  25. data/ext/encoding/character/utf-8/rb_utf_delete.c +60 -0
  26. data/ext/encoding/character/utf-8/rb_utf_downcase.c +13 -0
  27. data/ext/encoding/character/utf-8/rb_utf_each_char.c +27 -0
  28. data/ext/encoding/character/utf-8/rb_utf_foldcase.c +13 -0
  29. data/ext/encoding/character/utf-8/rb_utf_hex.c +14 -0
  30. data/ext/encoding/character/utf-8/rb_utf_index.c +50 -0
  31. data/ext/encoding/character/utf-8/rb_utf_insert.c +48 -0
  32. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.c +332 -0
  33. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.h +12 -0
  34. data/ext/encoding/character/utf-8/rb_utf_internal_tr.c +142 -0
  35. data/ext/encoding/character/utf-8/rb_utf_internal_tr.h +41 -0
  36. data/ext/encoding/character/utf-8/rb_utf_justify.c +96 -0
  37. data/ext/encoding/character/utf-8/rb_utf_length.c +14 -0
  38. data/ext/encoding/character/utf-8/rb_utf_lstrip.c +41 -0
  39. data/ext/encoding/character/utf-8/rb_utf_normalize.c +51 -0
  40. data/ext/encoding/character/utf-8/rb_utf_oct.c +14 -0
  41. data/ext/encoding/character/utf-8/rb_utf_reverse.c +13 -0
  42. data/ext/encoding/character/utf-8/rb_utf_rindex.c +88 -0
  43. data/ext/encoding/character/utf-8/rb_utf_rstrip.c +51 -0
  44. data/ext/encoding/character/utf-8/rb_utf_squeeze.c +70 -0
  45. data/ext/encoding/character/utf-8/rb_utf_strip.c +27 -0
  46. data/ext/encoding/character/utf-8/rb_utf_to_i.c +25 -0
  47. data/ext/encoding/character/utf-8/rb_utf_tr.c +250 -0
  48. data/ext/encoding/character/utf-8/rb_utf_upcase.c +13 -0
  49. data/ext/encoding/character/utf-8/tables.h +38 -0
  50. data/ext/encoding/character/utf-8/unicode.c +319 -0
  51. data/ext/encoding/character/utf-8/unicode.h +216 -0
  52. data/ext/encoding/character/utf-8/utf.c +1334 -0
  53. data/lib/encoding/character/utf-8.rb +201 -0
  54. data/lib/u.rb +16 -0
  55. data/lib/u/string.rb +185 -0
  56. data/lib/u/version.rb +5 -0
  57. data/test/unit/u.rb +5 -0
  58. data/test/unit/u/string.rb +91 -0
  59. metadata +174 -0
@@ -0,0 +1,201 @@
1
+ # contents: UTF-8 String methods.
2
+ #
3
+ # Copyright © 2006 Nikolai Weibull <now@bitwi.se>
4
+
5
+ require 'encoding/character/utf-8/utf8'
6
+
7
+ # TODO: Rework this to use a dispatch object instead, so that the encoding can
8
+ # be changed on the fly.
9
+ # TODO: Add String#encoding.
10
+ module U::String
11
+ def self.def_thunk_replacing_variant(method)
12
+ define_method(:"#{method}!") do
13
+ replace(send(method))
14
+ end
15
+ end
16
+
17
+ def <=>(other)
18
+ Encoding::Character::UTF8.collate(self, other)
19
+ end
20
+
21
+ def [](*args)
22
+ Encoding::Character::UTF8.aref(self, *args)
23
+ end
24
+
25
+ def slice(*args)
26
+ Encoding::Character::UTF8.aref(self, *args)
27
+ end
28
+
29
+ def []=(*args)
30
+ Encoding::Character::UTF8.aset(self, *args)
31
+ end
32
+
33
+ def casecmp(other)
34
+ Encoding::Character::UTF8.casecmp(self, other)
35
+ end
36
+
37
+ def center(*args)
38
+ Encoding::Character::UTF8.center(self, *args)
39
+ end
40
+
41
+ def chomp(*args)
42
+ Encoding::Character::UTF8.chomp(self, *args)
43
+ end
44
+
45
+ def chomp!(*args)
46
+ Encoding::Character::UTF8.chomp!(self, *args)
47
+ end
48
+
49
+ def chop
50
+ Encoding::Character::UTF8.chop(self)
51
+ end
52
+
53
+ def chop!
54
+ Encoding::Character::UTF8.chop!(self)
55
+ end
56
+
57
+ def count(*args)
58
+ Encoding::Character::UTF8.count(self, *args)
59
+ end
60
+
61
+ def delete(*args)
62
+ Encoding::Character::UTF8.delete(self, *args)
63
+ end
64
+
65
+ def delete!(*args)
66
+ Encoding::Character::UTF8.delete!(self, *args)
67
+ end
68
+
69
+ def downcase
70
+ Encoding::Character::UTF8.downcase(self)
71
+ end
72
+ def_thunk_replacing_variant :downcase
73
+
74
+ def each_char(&block)
75
+ Encoding::Character::UTF8.each_char(self, &block)
76
+ end
77
+
78
+ def index(*args)
79
+ Encoding::Character::UTF8.index(self, *args)
80
+ end
81
+
82
+ def insert(index, other)
83
+ Encoding::Character::UTF8.insert(self, index, other)
84
+ end
85
+
86
+ def length
87
+ Encoding::Character::UTF8.length(self)
88
+ end
89
+
90
+ def lstrip
91
+ Encoding::Character::UTF8.lstrip(self)
92
+ end
93
+
94
+ def lstrip!
95
+ Encoding::Character::UTF8.lstrip!(self)
96
+ end
97
+
98
+ def normalize(*args)
99
+ Encoding::Character::UTF8.normalize(self, *args)
100
+ end
101
+
102
+ def rindex(*args)
103
+ Encoding::Character::UTF8.rindex(self, *args)
104
+ end
105
+
106
+ def rstrip
107
+ Encoding::Character::UTF8.rstrip(self)
108
+ end
109
+
110
+ def rstrip!
111
+ Encoding::Character::UTF8.rstrip!(self)
112
+ end
113
+
114
+ def reverse
115
+ Encoding::Character::UTF8.reverse(self)
116
+ end
117
+ def_thunk_replacing_variant :reverse
118
+
119
+ def squeeze
120
+ Encoding::Character::UTF8.squeeze(self)
121
+ end
122
+
123
+ def squeeze!
124
+ Encoding::Character::UTF8.squeeze!(self)
125
+ end
126
+
127
+ def strip
128
+ Encoding::Character::UTF8.strip(self)
129
+ end
130
+
131
+ def strip!
132
+ Encoding::Character::UTF8.strip!(self)
133
+ end
134
+
135
+ def to_i(*args)
136
+ Encoding::Character::UTF8.to_i(self, *args)
137
+ end
138
+
139
+ def tr(from, to)
140
+ Encoding::Character::UTF8.tr(self, from, to)
141
+ end
142
+
143
+ def tr!(from, to)
144
+ replace(tr(from, to))
145
+ end
146
+
147
+ def tr_s(from, to)
148
+ Encoding::Character::UTF8.tr_s(self, from, to)
149
+ end
150
+
151
+ def tr_s!(from, to)
152
+ replace(tr_s(from, to))
153
+ end
154
+
155
+ def inspect
156
+ "u#{_inspect}"
157
+ end
158
+
159
+ def ljust(*args)
160
+ Encoding::Character::UTF8.ljust(self, *args)
161
+ end
162
+
163
+ def rjust(*args)
164
+ Encoding::Character::UTF8.rjust(self, *args)
165
+ end
166
+
167
+ def upcase
168
+ Encoding::Character::UTF8.upcase(self)
169
+ end
170
+ def_thunk_replacing_variant :upcase
171
+
172
+ def capitalize
173
+ self[0].upcase + self[1..-1].downcase
174
+ end
175
+ def_thunk_replacing_variant :capitalize
176
+
177
+ def foldcase
178
+ Encoding::Character::UTF8.foldcase(self)
179
+ end
180
+ def_thunk_replacing_variant :foldcase
181
+
182
+ private
183
+
184
+ Inspect = String.instance_method(:inspect)
185
+
186
+ def _inspect
187
+ Inspect.bind(self).call
188
+ end
189
+ end
190
+
191
+ class String
192
+ def +@
193
+ self.extend(Encoding::Character::UTF8::Methods)
194
+ end
195
+ end
196
+
197
+ module Kernel
198
+ def u(str)
199
+ str.extend(Encoding::Character::UTF8::Methods)
200
+ end
201
+ end
data/lib/u.rb ADDED
@@ -0,0 +1,16 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module U
4
+ require 'u/string'
5
+ require 'u/version'
6
+ end
7
+
8
+ class String
9
+ def u
10
+ dup.u!
11
+ end
12
+
13
+ def u!
14
+ self.extend(U::String)
15
+ end
16
+ end
data/lib/u/string.rb ADDED
@@ -0,0 +1,185 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'encoding/character/utf-8/utf8'
4
+
5
+ module U::String
6
+ def <=>(other)
7
+ Encoding::Character::UTF8.collate(self, other)
8
+ end
9
+
10
+ def [](*args)
11
+ Encoding::Character::UTF8.aref(self, *args)
12
+ end
13
+
14
+ def slice(*args)
15
+ Encoding::Character::UTF8.aref(self, *args)
16
+ end
17
+
18
+ def []=(*args)
19
+ Encoding::Character::UTF8.aset(self, *args)
20
+ end
21
+
22
+ def casecmp(other)
23
+ Encoding::Character::UTF8.casecmp(self, other)
24
+ end
25
+
26
+ def center(*args)
27
+ Encoding::Character::UTF8.center(self, *args)
28
+ end
29
+
30
+ def chomp(*args)
31
+ Encoding::Character::UTF8.chomp(self, *args)
32
+ end
33
+
34
+ def chomp!(*args)
35
+ Encoding::Character::UTF8.chomp!(self, *args)
36
+ end
37
+
38
+ def chop
39
+ Encoding::Character::UTF8.chop(self)
40
+ end
41
+
42
+ def chop!
43
+ Encoding::Character::UTF8.chop!(self)
44
+ end
45
+
46
+ def count(*args)
47
+ Encoding::Character::UTF8.count(self, *args)
48
+ end
49
+
50
+ def delete(*args)
51
+ Encoding::Character::UTF8.delete(self, *args)
52
+ end
53
+
54
+ def delete!(*args)
55
+ Encoding::Character::UTF8.delete!(self, *args)
56
+ end
57
+
58
+ def downcase
59
+ Encoding::Character::UTF8.downcase(self)
60
+ end
61
+
62
+ def downcase!
63
+ replace(downcase)
64
+ end
65
+
66
+ def each_char(&block)
67
+ Encoding::Character::UTF8.each_char(self, &block)
68
+ end
69
+
70
+ def index(*args)
71
+ Encoding::Character::UTF8.index(self, *args)
72
+ end
73
+
74
+ def insert(index, other)
75
+ Encoding::Character::UTF8.insert(self, index, other)
76
+ end
77
+
78
+ def length
79
+ Encoding::Character::UTF8.length(self)
80
+ end
81
+
82
+ def lstrip
83
+ Encoding::Character::UTF8.lstrip(self)
84
+ end
85
+
86
+ def lstrip!
87
+ Encoding::Character::UTF8.lstrip!(self)
88
+ end
89
+
90
+ def normalize(*args)
91
+ Encoding::Character::UTF8.normalize(self, *args)
92
+ end
93
+
94
+ def rindex(*args)
95
+ Encoding::Character::UTF8.rindex(self, *args)
96
+ end
97
+
98
+ def rstrip
99
+ Encoding::Character::UTF8.rstrip(self)
100
+ end
101
+
102
+ def rstrip!
103
+ Encoding::Character::UTF8.rstrip!(self)
104
+ end
105
+
106
+ def reverse
107
+ Encoding::Character::UTF8.reverse(self)
108
+ end
109
+
110
+ def reverse!
111
+ replace(reverse)
112
+ end
113
+
114
+ def squeeze
115
+ Encoding::Character::UTF8.squeeze(self)
116
+ end
117
+
118
+ def squeeze!
119
+ Encoding::Character::UTF8.squeeze!(self)
120
+ end
121
+
122
+ def strip
123
+ Encoding::Character::UTF8.strip(self)
124
+ end
125
+
126
+ def strip!
127
+ Encoding::Character::UTF8.strip!(self)
128
+ end
129
+
130
+ def to_i(*args)
131
+ Encoding::Character::UTF8.to_i(self, *args)
132
+ end
133
+
134
+ def tr(from, to)
135
+ Encoding::Character::UTF8.tr(self, from, to)
136
+ end
137
+
138
+ def tr!(from, to)
139
+ replace(tr(from, to))
140
+ end
141
+
142
+ def tr_s(from, to)
143
+ Encoding::Character::UTF8.tr_s(self, from, to)
144
+ end
145
+
146
+ def tr_s!(from, to)
147
+ replace(tr_s(from, to))
148
+ end
149
+
150
+ def inspect
151
+ 'u%s' % super
152
+ end
153
+
154
+ def ljust(*args)
155
+ Encoding::Character::UTF8.ljust(self, *args)
156
+ end
157
+
158
+ def rjust(*args)
159
+ Encoding::Character::UTF8.rjust(self, *args)
160
+ end
161
+
162
+ def upcase
163
+ Encoding::Character::UTF8.upcase(self)
164
+ end
165
+
166
+ def upcase!
167
+ replace(upcase)
168
+ end
169
+
170
+ def capitalize
171
+ self[0].upcase + self[1..-1].downcase
172
+ end
173
+
174
+ def capitalize!
175
+ replace(capitalize)
176
+ end
177
+
178
+ def foldcase
179
+ Encoding::Character::UTF8.foldcase(self)
180
+ end
181
+
182
+ def foldcase!
183
+ replace(foldcase)
184
+ end
185
+ end
data/lib/u/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module U
4
+ Version = '0.5.0'
5
+ end
data/test/unit/u.rb ADDED
@@ -0,0 +1,5 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ Expectations do
4
+
5
+ end
@@ -0,0 +1,91 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ Expectations do
4
+ expect ''.u[0, -2].to.be.nil?
5
+ expect ''.u[0, -1].to.be.nil?
6
+
7
+ expect '' do ''.u[0, 0] end
8
+ expect '' do ''.u[0, 1] end
9
+ expect '' do ''.u[0, 2] end
10
+
11
+ expect ''.u[-1, -2].to.be.nil?
12
+ expect ''.u[-1, -1].to.be.nil?
13
+ expect ''.u[-1, 0].to.be.nil?
14
+ expect ''.u[-1, 1].to.be.nil?
15
+ expect ''.u[-1, 2].to.be.nil?
16
+ expect ''.u[1, -2].to.be.nil?
17
+ expect ''.u[1, -1].to.be.nil?
18
+ expect ''.u[1, 0].to.be.nil?
19
+ expect ''.u[1, 1].to.be.nil?
20
+ expect ''.u[1, 2].to.be.nil?
21
+
22
+ expect 'hë' do 'hëllö'.u[0, 2] end
23
+ expect 'lö' do 'hëllö'.u[3, 2] end
24
+
25
+ expect 0 do ''.u.count('whatever') end
26
+ expect 1 do 'helo'.u.count('l') end
27
+ expect 1 do 'helo'.u.count('wrld') end
28
+ expect 2 do 'helo'.u.count('world') end
29
+ expect 3 do 'hello'.u.count('world') end
30
+
31
+ expect ''.u.delete('whatever').to.be.empty?
32
+ expect 'hëll' do 'hëllö'.u.delete('ö') end
33
+
34
+ expect 0 do
35
+ i = 0
36
+ ''.u.each_char{ i += 1 }
37
+ i
38
+ end
39
+
40
+ expect %w[h ë l l ö] do
41
+ [].tap{ |cs| 'hëllö'.u.each_char{ |c| cs << c } }
42
+ end
43
+
44
+ expect 0 do ''.u.index('') end
45
+ expect nil do ''.u.index('', 1) end
46
+ expect nil do ''.u.index('', -1) end
47
+ expect 0 do 'hëllö'.u.index('hë') end
48
+ expect 3 do 'hëllö'.u.index('lö') end
49
+ expect 3 do 'hëllö'.u.index('lö', 3) end
50
+
51
+ expect '' do ''.u.insert(0, '') end
52
+ expect IndexError do ''.u.insert(1, '') end
53
+ expect 'äbc' do ''.u.insert(0, 'äbc') end
54
+ expect 'hëöll' do 'hëö'.u.insert(3, 'll') end
55
+ expect 'hëöll' do 'hëö'.u.insert(-1, 'll') end
56
+ expect 'hëllö' do 'hëö'.u.insert(2, 'll') end
57
+ expect 'hëllö' do 'hëö'.u.insert(-2, 'll') end
58
+ expect 'llhëö' do 'hëö'.u.insert(0, 'll') end
59
+ #expect 'llhëö' do 'hëö'.u.insert(-4, 'll') end
60
+
61
+ expect 0 do ''.u.length end
62
+ expect 5 do 'hëllö'.u.length end
63
+ expect 11 do "hëllö\0agäin".u.length end
64
+ # TODO: Why not throw an error here?
65
+ expect 11 do "hëllö\0agäin\303".u.length end
66
+
67
+ expect 0 do ''.u.rindex('') end
68
+ expect 0 do 'hëllö'.u.rindex('hë') end
69
+ expect 3 do 'hëllö'.u.rindex('lö') end
70
+ expect 3 do 'hëllö'.u.rindex('lö', 3) end
71
+ expect 5 do 'hëllölö'.u.rindex('lö') end
72
+ expect 5 do 'hëllölö'.u.rindex('lö', 5) end
73
+ expect 3 do 'hëllölö'.u.rindex('lö', 4) end
74
+
75
+ expect ''.u.squeeze.to.be.empty?
76
+ expect 'hëlö' do 'hëllö'.u.squeeze end
77
+
78
+ expect 0 do ''.u.to_i end
79
+ expect 0 do ''.u.to_i(0) end
80
+ expect ArgumentError do ''.u.to_i(-1) end
81
+ expect ArgumentError do ''.u.to_i(1) end
82
+ expect ArgumentError do ''.u.to_i(37) end
83
+ expect 1 do '1'.u.to_i end
84
+ expect 1 do '1'.u.to_i(2) end
85
+ expect 1 do '١'.u.to_i end
86
+
87
+ expect ''.u.tr('abc', 'def').to.be.empty?
88
+ expect 'abcde' do 'äbcdë'.u.tr('äë', 'ae') end
89
+ expect 'ëëëëë' do 'äbcdë'.u.tr('a-zäë', 'ë') end
90
+ expect 'ëëëëë' do 'aaaaa'.u.tr('a', 'ä-ë') end
91
+ end