unicode 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +16 -11
- data/test.rb +9 -8
- data/tools/README +2 -2
- data/tools/mkunidata.rb +20 -10
- data/unicode.c +132 -54
- data/unidata.map +12976 -1764
- data/ustring.c +27 -25
- data/ustring.h +12 -12
- data/wstring.c +11 -11
- metadata +4 -4
data/ustring.c
CHANGED
@@ -5,10 +5,12 @@
|
|
5
5
|
*/
|
6
6
|
|
7
7
|
#include <stdio.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <string.h>
|
8
10
|
#include "ustring.h"
|
9
11
|
|
10
12
|
UString*
|
11
|
-
|
13
|
+
UniStr_alloc(UString* str)
|
12
14
|
{
|
13
15
|
str->size = USTR_INITIAL_STRING_LEN;
|
14
16
|
str->len = 0;
|
@@ -21,7 +23,7 @@ UStr_alloc(UString* str)
|
|
21
23
|
}
|
22
24
|
|
23
25
|
UString*
|
24
|
-
|
26
|
+
UniStr_enlarge(UString* str, int size)
|
25
27
|
{
|
26
28
|
unsigned char* newptr;
|
27
29
|
|
@@ -36,7 +38,7 @@ UStr_enlarge(UString* str, int size)
|
|
36
38
|
}
|
37
39
|
|
38
40
|
void
|
39
|
-
|
41
|
+
UniStr_free(UString* str)
|
40
42
|
{
|
41
43
|
str->size = 0;
|
42
44
|
str->len = 0;
|
@@ -44,10 +46,10 @@ UStr_free(UString* str)
|
|
44
46
|
}
|
45
47
|
|
46
48
|
int
|
47
|
-
|
49
|
+
UniStr_addChars(UString* s, const unsigned char* a, int len)
|
48
50
|
{
|
49
51
|
if (s->len + len >= s->size) {
|
50
|
-
|
52
|
+
UniStr_enlarge(s, len + USTR_STRING_EXTEND_LEN);
|
51
53
|
}
|
52
54
|
memcpy(s->str + s->len, a, len);
|
53
55
|
s->len += len;
|
@@ -56,10 +58,10 @@ UStr_addChars(UString* s, const unsigned char* a, int len)
|
|
56
58
|
}
|
57
59
|
|
58
60
|
int
|
59
|
-
|
61
|
+
UniStr_addChar(UString* s, unsigned char a)
|
60
62
|
{
|
61
63
|
if (s->len + 1 >= s->size) {
|
62
|
-
|
64
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
63
65
|
}
|
64
66
|
*(s->str + s->len) = a;
|
65
67
|
(s->len)++;
|
@@ -68,10 +70,10 @@ UStr_addChar(UString* s, unsigned char a)
|
|
68
70
|
}
|
69
71
|
|
70
72
|
int
|
71
|
-
|
73
|
+
UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
|
72
74
|
{
|
73
75
|
if (s->len + 2 >= s->size) {
|
74
|
-
|
76
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
75
77
|
}
|
76
78
|
*(s->str + s->len) = a1;
|
77
79
|
*(s->str + s->len + 1) = a2;
|
@@ -81,10 +83,10 @@ UStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
|
|
81
83
|
}
|
82
84
|
|
83
85
|
int
|
84
|
-
|
86
|
+
UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
|
85
87
|
{
|
86
88
|
if (s->len + 3 >= s->size) {
|
87
|
-
|
89
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
88
90
|
}
|
89
91
|
*(s->str + s->len) = a1;
|
90
92
|
*(s->str + s->len + 1) = a2;
|
@@ -95,11 +97,11 @@ UStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
|
|
95
97
|
}
|
96
98
|
|
97
99
|
int
|
98
|
-
|
100
|
+
UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
|
99
101
|
unsigned char a3, unsigned char a4)
|
100
102
|
{
|
101
103
|
if (s->len + 4 >= s->size) {
|
102
|
-
|
104
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
103
105
|
}
|
104
106
|
*(s->str + s->len) = a1;
|
105
107
|
*(s->str + s->len + 1) = a2;
|
@@ -111,11 +113,11 @@ UStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
|
|
111
113
|
}
|
112
114
|
|
113
115
|
int
|
114
|
-
|
116
|
+
UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
|
115
117
|
unsigned char a3, unsigned char a4, unsigned char a5)
|
116
118
|
{
|
117
119
|
if (s->len + 5 >= s->size) {
|
118
|
-
|
120
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
119
121
|
}
|
120
122
|
*(s->str + s->len) = a1;
|
121
123
|
*(s->str + s->len + 1) = a2;
|
@@ -128,12 +130,12 @@ UStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
|
|
128
130
|
}
|
129
131
|
|
130
132
|
int
|
131
|
-
|
133
|
+
UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
|
132
134
|
unsigned char a3, unsigned char a4,
|
133
135
|
unsigned char a5, unsigned char a6)
|
134
136
|
{
|
135
137
|
if (s->len + 6 >= s->size) {
|
136
|
-
|
138
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
137
139
|
}
|
138
140
|
*(s->str + s->len) = a1;
|
139
141
|
*(s->str + s->len + 1) = a2;
|
@@ -147,29 +149,29 @@ UStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
|
|
147
149
|
}
|
148
150
|
|
149
151
|
int
|
150
|
-
|
152
|
+
UniStr_addWChar(UString* ustr, unsigned int c)
|
151
153
|
{
|
152
154
|
if (c < 128) { /* 0x0000-0x00FF */
|
153
|
-
|
155
|
+
UniStr_addChar(ustr, c);
|
154
156
|
}
|
155
157
|
else if (c < 2048) { /* 0x0100-0x07FF */
|
156
158
|
unsigned char b2 = c & 63;
|
157
159
|
unsigned char b1 = c >> 6;
|
158
|
-
|
160
|
+
UniStr_addChar2(ustr, b1 | 192, b2 | 128);
|
159
161
|
|
160
162
|
}
|
161
163
|
else if (c < 0x10000) { /* 0x0800-0xFFFF */
|
162
164
|
unsigned char b3 = c & 63;
|
163
165
|
unsigned char b2 = (c >> 6) & 63;
|
164
166
|
unsigned char b1 = c >> 12;
|
165
|
-
|
167
|
+
UniStr_addChar3(ustr, b1 | 224, b2 | 128, b3 | 128);
|
166
168
|
}
|
167
169
|
else if (c < 0x200000) { /* 0x00010000-0x001FFFFF */
|
168
170
|
unsigned char b4 = c & 63;
|
169
171
|
unsigned char b3 = (c >> 6) & 63;
|
170
172
|
unsigned char b2 = (c >> 12) & 63;
|
171
173
|
unsigned char b1 = c >> 18;
|
172
|
-
|
174
|
+
UniStr_addChar4(ustr, b1 | 240, b2 | 128, b3 | 128, b4 | 128);
|
173
175
|
}
|
174
176
|
else if (c < 0x4000000) { /* 0x00200000-0x03FFFFFF */
|
175
177
|
unsigned char b5 = c & 63;
|
@@ -177,7 +179,7 @@ UStr_addWChar(UString* ustr, int c)
|
|
177
179
|
unsigned char b3 = (c >> 12) & 63;
|
178
180
|
unsigned char b2 = (c >> 18) & 63;
|
179
181
|
unsigned char b1 = c >> 24;
|
180
|
-
|
182
|
+
UniStr_addChar5(ustr, b1 | 248, b2 | 128, b3 | 128, b4 | 128, b5 | 128);
|
181
183
|
}
|
182
184
|
else if (c < 0x80000000) { /* 0x04000000-0x7FFFFFFF */
|
183
185
|
unsigned char b6 = c & 63;
|
@@ -186,7 +188,7 @@ UStr_addWChar(UString* ustr, int c)
|
|
186
188
|
unsigned char b3 = (c >> 18) & 63;
|
187
189
|
unsigned char b2 = (c >> 24) & 63;
|
188
190
|
unsigned char b1 = (c >> 30) & 63;
|
189
|
-
|
191
|
+
UniStr_addChar6(ustr, b1 | 252, b2 | 128, b3 | 128,
|
190
192
|
b4 | 128, b5 | 128, b6 | 128);
|
191
193
|
}
|
192
194
|
|
@@ -194,7 +196,7 @@ UStr_addWChar(UString* ustr, int c)
|
|
194
196
|
}
|
195
197
|
|
196
198
|
void
|
197
|
-
|
199
|
+
UniStr_dump(UString* s)
|
198
200
|
{
|
199
201
|
int i;
|
200
202
|
|
data/ustring.h
CHANGED
@@ -23,23 +23,23 @@ typedef struct _UString {
|
|
23
23
|
int size;
|
24
24
|
} UString;
|
25
25
|
|
26
|
-
UString*
|
27
|
-
UString*
|
28
|
-
void
|
29
|
-
int
|
30
|
-
int
|
31
|
-
int
|
32
|
-
int
|
26
|
+
UString* UniStr_alloc(UString* str);
|
27
|
+
UString* UniStr_enlarge(UString* str, int size);
|
28
|
+
void UniStr_free(UString* str);
|
29
|
+
int UniStr_addChars(UString* s, const unsigned char* a, int len);
|
30
|
+
int UniStr_addChar(UString* s, unsigned char a);
|
31
|
+
int UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2);
|
32
|
+
int UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2,
|
33
33
|
unsigned char a3);
|
34
|
-
int
|
34
|
+
int UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
|
35
35
|
unsigned char a3, unsigned char a4);
|
36
|
-
int
|
36
|
+
int UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
|
37
37
|
unsigned char a3, unsigned char a4, unsigned char a5);
|
38
|
-
int
|
38
|
+
int UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
|
39
39
|
unsigned char a3, unsigned char a4,
|
40
40
|
unsigned char a5, unsigned char a6);
|
41
|
-
int
|
42
|
-
void
|
41
|
+
int UniStr_addWChar(UString* s, unsigned int c);
|
42
|
+
void UniStr_dump(UString* s);
|
43
43
|
|
44
44
|
#ifdef __cplusplus
|
45
45
|
}
|
data/wstring.c
CHANGED
@@ -5,6 +5,8 @@
|
|
5
5
|
*/
|
6
6
|
|
7
7
|
#include <stdio.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <string.h>
|
8
10
|
#include "wstring.h"
|
9
11
|
|
10
12
|
WString*
|
@@ -115,13 +117,11 @@ WStr_allocWithUTF8(WString* s, const char* in)
|
|
115
117
|
int rest = 0;
|
116
118
|
|
117
119
|
WStr_alloc(s);
|
118
|
-
|
119
|
-
if (in == 0)
|
120
|
+
if (in == NULL)
|
120
121
|
return s;
|
121
|
-
|
122
122
|
for (i = 0; in[i] != '\0'; i++) {
|
123
123
|
unsigned char c = in[i];
|
124
|
-
if (c
|
124
|
+
if ((c & 0xc0) == 0x80) {
|
125
125
|
if (rest == 0)
|
126
126
|
return NULL;
|
127
127
|
u = (u << 6) | (c & 63);
|
@@ -130,29 +130,29 @@ WStr_allocWithUTF8(WString* s, const char* in)
|
|
130
130
|
WStr_addWChar(s, u);
|
131
131
|
}
|
132
132
|
}
|
133
|
-
else if (c
|
133
|
+
else if ((c & 0x80) == 0) { /* 0b0nnnnnnn (7bit) */
|
134
134
|
if (c == 0)
|
135
135
|
return NULL;
|
136
136
|
WStr_addWChar(s, c);
|
137
137
|
rest = 0;
|
138
138
|
}
|
139
|
-
else if (c
|
139
|
+
else if ((c & 0xe0) == 0xc0) { /* 0b110nnnnn (11bit) */
|
140
140
|
rest = 1;
|
141
141
|
u = c & 31;
|
142
142
|
}
|
143
|
-
else if (c
|
143
|
+
else if ((c & 0xf0) == 0xe0) { /* 0b1110nnnn (16bit) */
|
144
144
|
rest = 2;
|
145
145
|
u = c & 15;
|
146
146
|
}
|
147
|
-
else if (c
|
147
|
+
else if ((c & 0xf8) == 0xf0) { /* 0b11110nnn (21bit) */
|
148
148
|
rest = 3;
|
149
149
|
u = c & 7;
|
150
150
|
}
|
151
|
-
else if (c
|
151
|
+
else if ((c & 0xfc) == 0xf8) { /* 0b111110nn (26bit) */
|
152
152
|
rest = 4;
|
153
153
|
u = c & 3;
|
154
154
|
}
|
155
|
-
else if (c
|
155
|
+
else if ((c & 0xfe) == 0xfc) { /* 0b1111110n (31bit) */
|
156
156
|
rest = 5;
|
157
157
|
u = c & 1;
|
158
158
|
}
|
@@ -170,7 +170,7 @@ WStr_convertIntoUString(WString* wstr, UString* ustr)
|
|
170
170
|
int i;
|
171
171
|
|
172
172
|
for (i = 0; i < wstr->len; i++) {
|
173
|
-
|
173
|
+
UniStr_addWChar(ustr, wstr->str[i]);
|
174
174
|
}
|
175
175
|
|
176
176
|
return ustr;
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 2
|
8
|
+
- 0
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Yoshida Masato
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date:
|
17
|
+
date: 2009-12-29 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|