unicode 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +29 -7
- data/tools/README +3 -2
- data/tools/mkunidata.rb +136 -12
- data/unicode.c +379 -16
- data/unidata.map +24536 -24435
- data/wstring.c +69 -1
- data/wstring.h +2 -0
- metadata +20 -38
data/wstring.c
CHANGED
@@ -43,7 +43,10 @@ WStr_free(WString* str)
|
|
43
43
|
{
|
44
44
|
str->size = 0;
|
45
45
|
str->len = 0;
|
46
|
-
|
46
|
+
if (str->str) {
|
47
|
+
free(str->str);
|
48
|
+
str->str = NULL;
|
49
|
+
}
|
47
50
|
}
|
48
51
|
|
49
52
|
int
|
@@ -164,6 +167,59 @@ WStr_allocWithUTF8(WString* s, const char* in)
|
|
164
167
|
return s;
|
165
168
|
}
|
166
169
|
|
170
|
+
WString*
|
171
|
+
WStr_allocWithUTF8L(WString* s, const char* in, int len)
|
172
|
+
{
|
173
|
+
int i;
|
174
|
+
int u = 0;
|
175
|
+
int rest = 0;
|
176
|
+
|
177
|
+
WStr_alloc(s);
|
178
|
+
if (in == NULL)
|
179
|
+
return s;
|
180
|
+
for (i = 0; i < len; i++) {
|
181
|
+
unsigned char c = in[i];
|
182
|
+
if ((c & 0xc0) == 0x80) {
|
183
|
+
if (rest == 0)
|
184
|
+
return NULL;
|
185
|
+
u = (u << 6) | (c & 63);
|
186
|
+
rest--;
|
187
|
+
if (rest == 0) {
|
188
|
+
WStr_addWChar(s, u);
|
189
|
+
}
|
190
|
+
}
|
191
|
+
else if ((c & 0x80) == 0) { /* 0b0nnnnnnn (7bit) */
|
192
|
+
WStr_addWChar(s, c);
|
193
|
+
rest = 0;
|
194
|
+
}
|
195
|
+
else if ((c & 0xe0) == 0xc0) { /* 0b110nnnnn (11bit) */
|
196
|
+
rest = 1;
|
197
|
+
u = c & 31;
|
198
|
+
}
|
199
|
+
else if ((c & 0xf0) == 0xe0) { /* 0b1110nnnn (16bit) */
|
200
|
+
rest = 2;
|
201
|
+
u = c & 15;
|
202
|
+
}
|
203
|
+
else if ((c & 0xf8) == 0xf0) { /* 0b11110nnn (21bit) */
|
204
|
+
rest = 3;
|
205
|
+
u = c & 7;
|
206
|
+
}
|
207
|
+
else if ((c & 0xfc) == 0xf8) { /* 0b111110nn (26bit) */
|
208
|
+
rest = 4;
|
209
|
+
u = c & 3;
|
210
|
+
}
|
211
|
+
else if ((c & 0xfe) == 0xfc) { /* 0b1111110n (31bit) */
|
212
|
+
rest = 5;
|
213
|
+
u = c & 1;
|
214
|
+
}
|
215
|
+
else {
|
216
|
+
return NULL;
|
217
|
+
}
|
218
|
+
}
|
219
|
+
|
220
|
+
return s;
|
221
|
+
}
|
222
|
+
|
167
223
|
UString*
|
168
224
|
WStr_convertIntoUString(WString* wstr, UString* ustr)
|
169
225
|
{
|
@@ -176,6 +232,18 @@ WStr_convertIntoUString(WString* wstr, UString* ustr)
|
|
176
232
|
return ustr;
|
177
233
|
}
|
178
234
|
|
235
|
+
UString*
|
236
|
+
WStr_convertIntoUString2(WString* wstr, int start, int len, UString* ustr)
|
237
|
+
{
|
238
|
+
int i;
|
239
|
+
|
240
|
+
for (i = start; i < wstr->len && i < start + len; i++) {
|
241
|
+
UniStr_addWChar(ustr, wstr->str[i]);
|
242
|
+
}
|
243
|
+
|
244
|
+
return ustr;
|
245
|
+
}
|
246
|
+
|
179
247
|
void
|
180
248
|
WStr_dump(WString* s)
|
181
249
|
{
|
data/wstring.h
CHANGED
@@ -24,6 +24,7 @@ typedef struct _WString {
|
|
24
24
|
|
25
25
|
WString* WStr_alloc(WString* str);
|
26
26
|
WString* WStr_allocWithUTF8(WString* s, const char* u);
|
27
|
+
WString* WStr_allocWithUTF8L(WString* s, const char* u, int len);
|
27
28
|
WString* WStr_enlarge(WString* str, int size);
|
28
29
|
void WStr_free(WString* str);
|
29
30
|
int WStr_addWChars(WString* s, const int* a, int len);
|
@@ -32,6 +33,7 @@ int WStr_pushWString(WString* s, const WString* add);
|
|
32
33
|
int WStr_addWChar2(WString* s, int a1, int a2);
|
33
34
|
int WStr_addWChar3(WString* s, int a1, int a2, int a3);
|
34
35
|
UString* WStr_convertIntoUString(WString* wstr, UString* ustr);
|
36
|
+
UString* WStr_convertIntoUString2(WString* wstr, int start, int len, UString* ustr);
|
35
37
|
void WStr_dump(WString* s);
|
36
38
|
|
37
39
|
#ifdef __cplusplus
|
metadata
CHANGED
@@ -1,32 +1,24 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.3
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
- 2
|
10
|
-
version: 0.4.2
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Yoshida Masato
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
date: 2011-02-03 00:00:00 Z
|
12
|
+
date: 2012-08-07 00:00:00.000000000 Z
|
19
13
|
dependencies: []
|
20
|
-
|
21
14
|
description: Unicode normalization library.
|
22
15
|
email: yoshidam@yoshidam.net
|
23
16
|
executables: []
|
24
|
-
|
25
|
-
extensions:
|
17
|
+
extensions:
|
26
18
|
- extconf.rb
|
27
|
-
extra_rdoc_files:
|
19
|
+
extra_rdoc_files:
|
28
20
|
- README
|
29
|
-
files:
|
21
|
+
files:
|
30
22
|
- extconf.rb
|
31
23
|
- unicode.c
|
32
24
|
- ustring.c
|
@@ -40,36 +32,26 @@ files:
|
|
40
32
|
- unidata.map
|
41
33
|
homepage: http://www.yoshidam.net/Ruby.html#unicode
|
42
34
|
licenses: []
|
43
|
-
|
44
35
|
post_install_message:
|
45
36
|
rdoc_options: []
|
46
|
-
|
47
|
-
require_paths:
|
37
|
+
require_paths:
|
48
38
|
- .
|
49
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
40
|
none: false
|
51
|
-
requirements:
|
52
|
-
- -
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
|
55
|
-
|
56
|
-
- 0
|
57
|
-
version: "0"
|
58
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
46
|
none: false
|
60
|
-
requirements:
|
61
|
-
- -
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
|
64
|
-
segments:
|
65
|
-
- 0
|
66
|
-
version: "0"
|
47
|
+
requirements:
|
48
|
+
- - ! '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
67
51
|
requirements: []
|
68
|
-
|
69
52
|
rubyforge_project:
|
70
|
-
rubygems_version: 1.8.
|
53
|
+
rubygems_version: 1.8.24
|
71
54
|
signing_key:
|
72
55
|
specification_version: 3
|
73
56
|
summary: Unicode normalization library.
|
74
57
|
test_files: []
|
75
|
-
|