unicode 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +16 -11
- data/test.rb +9 -8
- data/tools/README +2 -2
- data/tools/mkunidata.rb +20 -10
- data/unicode.c +132 -54
- data/unidata.map +12976 -1764
- data/ustring.c +27 -25
- data/ustring.h +12 -12
- data/wstring.c +11 -11
- metadata +4 -4
data/ustring.c
CHANGED
@@ -5,10 +5,12 @@
|
|
5
5
|
*/
|
6
6
|
|
7
7
|
#include <stdio.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <string.h>
|
8
10
|
#include "ustring.h"
|
9
11
|
|
10
12
|
UString*
|
11
|
-
|
13
|
+
UniStr_alloc(UString* str)
|
12
14
|
{
|
13
15
|
str->size = USTR_INITIAL_STRING_LEN;
|
14
16
|
str->len = 0;
|
@@ -21,7 +23,7 @@ UStr_alloc(UString* str)
|
|
21
23
|
}
|
22
24
|
|
23
25
|
UString*
|
24
|
-
|
26
|
+
UniStr_enlarge(UString* str, int size)
|
25
27
|
{
|
26
28
|
unsigned char* newptr;
|
27
29
|
|
@@ -36,7 +38,7 @@ UStr_enlarge(UString* str, int size)
|
|
36
38
|
}
|
37
39
|
|
38
40
|
void
|
39
|
-
|
41
|
+
UniStr_free(UString* str)
|
40
42
|
{
|
41
43
|
str->size = 0;
|
42
44
|
str->len = 0;
|
@@ -44,10 +46,10 @@ UStr_free(UString* str)
|
|
44
46
|
}
|
45
47
|
|
46
48
|
int
|
47
|
-
|
49
|
+
UniStr_addChars(UString* s, const unsigned char* a, int len)
|
48
50
|
{
|
49
51
|
if (s->len + len >= s->size) {
|
50
|
-
|
52
|
+
UniStr_enlarge(s, len + USTR_STRING_EXTEND_LEN);
|
51
53
|
}
|
52
54
|
memcpy(s->str + s->len, a, len);
|
53
55
|
s->len += len;
|
@@ -56,10 +58,10 @@ UStr_addChars(UString* s, const unsigned char* a, int len)
|
|
56
58
|
}
|
57
59
|
|
58
60
|
int
|
59
|
-
|
61
|
+
UniStr_addChar(UString* s, unsigned char a)
|
60
62
|
{
|
61
63
|
if (s->len + 1 >= s->size) {
|
62
|
-
|
64
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
63
65
|
}
|
64
66
|
*(s->str + s->len) = a;
|
65
67
|
(s->len)++;
|
@@ -68,10 +70,10 @@ UStr_addChar(UString* s, unsigned char a)
|
|
68
70
|
}
|
69
71
|
|
70
72
|
int
|
71
|
-
|
73
|
+
UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
|
72
74
|
{
|
73
75
|
if (s->len + 2 >= s->size) {
|
74
|
-
|
76
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
75
77
|
}
|
76
78
|
*(s->str + s->len) = a1;
|
77
79
|
*(s->str + s->len + 1) = a2;
|
@@ -81,10 +83,10 @@ UStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
|
|
81
83
|
}
|
82
84
|
|
83
85
|
int
|
84
|
-
|
86
|
+
UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
|
85
87
|
{
|
86
88
|
if (s->len + 3 >= s->size) {
|
87
|
-
|
89
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
88
90
|
}
|
89
91
|
*(s->str + s->len) = a1;
|
90
92
|
*(s->str + s->len + 1) = a2;
|
@@ -95,11 +97,11 @@ UStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
|
|
95
97
|
}
|
96
98
|
|
97
99
|
int
|
98
|
-
|
100
|
+
UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
|
99
101
|
unsigned char a3, unsigned char a4)
|
100
102
|
{
|
101
103
|
if (s->len + 4 >= s->size) {
|
102
|
-
|
104
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
103
105
|
}
|
104
106
|
*(s->str + s->len) = a1;
|
105
107
|
*(s->str + s->len + 1) = a2;
|
@@ -111,11 +113,11 @@ UStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
|
|
111
113
|
}
|
112
114
|
|
113
115
|
int
|
114
|
-
|
116
|
+
UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
|
115
117
|
unsigned char a3, unsigned char a4, unsigned char a5)
|
116
118
|
{
|
117
119
|
if (s->len + 5 >= s->size) {
|
118
|
-
|
120
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
119
121
|
}
|
120
122
|
*(s->str + s->len) = a1;
|
121
123
|
*(s->str + s->len + 1) = a2;
|
@@ -128,12 +130,12 @@ UStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
|
|
128
130
|
}
|
129
131
|
|
130
132
|
int
|
131
|
-
|
133
|
+
UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
|
132
134
|
unsigned char a3, unsigned char a4,
|
133
135
|
unsigned char a5, unsigned char a6)
|
134
136
|
{
|
135
137
|
if (s->len + 6 >= s->size) {
|
136
|
-
|
138
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
137
139
|
}
|
138
140
|
*(s->str + s->len) = a1;
|
139
141
|
*(s->str + s->len + 1) = a2;
|
@@ -147,29 +149,29 @@ UStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
|
|
147
149
|
}
|
148
150
|
|
149
151
|
int
|
150
|
-
|
152
|
+
UniStr_addWChar(UString* ustr, unsigned int c)
|
151
153
|
{
|
152
154
|
if (c < 128) { /* 0x0000-0x00FF */
|
153
|
-
|
155
|
+
UniStr_addChar(ustr, c);
|
154
156
|
}
|
155
157
|
else if (c < 2048) { /* 0x0100-0x07FF */
|
156
158
|
unsigned char b2 = c & 63;
|
157
159
|
unsigned char b1 = c >> 6;
|
158
|
-
|
160
|
+
UniStr_addChar2(ustr, b1 | 192, b2 | 128);
|
159
161
|
|
160
162
|
}
|
161
163
|
else if (c < 0x10000) { /* 0x0800-0xFFFF */
|
162
164
|
unsigned char b3 = c & 63;
|
163
165
|
unsigned char b2 = (c >> 6) & 63;
|
164
166
|
unsigned char b1 = c >> 12;
|
165
|
-
|
167
|
+
UniStr_addChar3(ustr, b1 | 224, b2 | 128, b3 | 128);
|
166
168
|
}
|
167
169
|
else if (c < 0x200000) { /* 0x00010000-0x001FFFFF */
|
168
170
|
unsigned char b4 = c & 63;
|
169
171
|
unsigned char b3 = (c >> 6) & 63;
|
170
172
|
unsigned char b2 = (c >> 12) & 63;
|
171
173
|
unsigned char b1 = c >> 18;
|
172
|
-
|
174
|
+
UniStr_addChar4(ustr, b1 | 240, b2 | 128, b3 | 128, b4 | 128);
|
173
175
|
}
|
174
176
|
else if (c < 0x4000000) { /* 0x00200000-0x03FFFFFF */
|
175
177
|
unsigned char b5 = c & 63;
|
@@ -177,7 +179,7 @@ UStr_addWChar(UString* ustr, int c)
|
|
177
179
|
unsigned char b3 = (c >> 12) & 63;
|
178
180
|
unsigned char b2 = (c >> 18) & 63;
|
179
181
|
unsigned char b1 = c >> 24;
|
180
|
-
|
182
|
+
UniStr_addChar5(ustr, b1 | 248, b2 | 128, b3 | 128, b4 | 128, b5 | 128);
|
181
183
|
}
|
182
184
|
else if (c < 0x80000000) { /* 0x04000000-0x7FFFFFFF */
|
183
185
|
unsigned char b6 = c & 63;
|
@@ -186,7 +188,7 @@ UStr_addWChar(UString* ustr, int c)
|
|
186
188
|
unsigned char b3 = (c >> 18) & 63;
|
187
189
|
unsigned char b2 = (c >> 24) & 63;
|
188
190
|
unsigned char b1 = (c >> 30) & 63;
|
189
|
-
|
191
|
+
UniStr_addChar6(ustr, b1 | 252, b2 | 128, b3 | 128,
|
190
192
|
b4 | 128, b5 | 128, b6 | 128);
|
191
193
|
}
|
192
194
|
|
@@ -194,7 +196,7 @@ UStr_addWChar(UString* ustr, int c)
|
|
194
196
|
}
|
195
197
|
|
196
198
|
void
|
197
|
-
|
199
|
+
UniStr_dump(UString* s)
|
198
200
|
{
|
199
201
|
int i;
|
200
202
|
|
data/ustring.h
CHANGED
@@ -23,23 +23,23 @@ typedef struct _UString {
|
|
23
23
|
int size;
|
24
24
|
} UString;
|
25
25
|
|
26
|
-
UString*
|
27
|
-
UString*
|
28
|
-
void
|
29
|
-
int
|
30
|
-
int
|
31
|
-
int
|
32
|
-
int
|
26
|
+
UString* UniStr_alloc(UString* str);
|
27
|
+
UString* UniStr_enlarge(UString* str, int size);
|
28
|
+
void UniStr_free(UString* str);
|
29
|
+
int UniStr_addChars(UString* s, const unsigned char* a, int len);
|
30
|
+
int UniStr_addChar(UString* s, unsigned char a);
|
31
|
+
int UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2);
|
32
|
+
int UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2,
|
33
33
|
unsigned char a3);
|
34
|
-
int
|
34
|
+
int UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
|
35
35
|
unsigned char a3, unsigned char a4);
|
36
|
-
int
|
36
|
+
int UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
|
37
37
|
unsigned char a3, unsigned char a4, unsigned char a5);
|
38
|
-
int
|
38
|
+
int UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
|
39
39
|
unsigned char a3, unsigned char a4,
|
40
40
|
unsigned char a5, unsigned char a6);
|
41
|
-
int
|
42
|
-
void
|
41
|
+
int UniStr_addWChar(UString* s, unsigned int c);
|
42
|
+
void UniStr_dump(UString* s);
|
43
43
|
|
44
44
|
#ifdef __cplusplus
|
45
45
|
}
|
data/wstring.c
CHANGED
@@ -5,6 +5,8 @@
|
|
5
5
|
*/
|
6
6
|
|
7
7
|
#include <stdio.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <string.h>
|
8
10
|
#include "wstring.h"
|
9
11
|
|
10
12
|
WString*
|
@@ -115,13 +117,11 @@ WStr_allocWithUTF8(WString* s, const char* in)
|
|
115
117
|
int rest = 0;
|
116
118
|
|
117
119
|
WStr_alloc(s);
|
118
|
-
|
119
|
-
if (in == 0)
|
120
|
+
if (in == NULL)
|
120
121
|
return s;
|
121
|
-
|
122
122
|
for (i = 0; in[i] != '\0'; i++) {
|
123
123
|
unsigned char c = in[i];
|
124
|
-
if (c
|
124
|
+
if ((c & 0xc0) == 0x80) {
|
125
125
|
if (rest == 0)
|
126
126
|
return NULL;
|
127
127
|
u = (u << 6) | (c & 63);
|
@@ -130,29 +130,29 @@ WStr_allocWithUTF8(WString* s, const char* in)
|
|
130
130
|
WStr_addWChar(s, u);
|
131
131
|
}
|
132
132
|
}
|
133
|
-
else if (c
|
133
|
+
else if ((c & 0x80) == 0) { /* 0b0nnnnnnn (7bit) */
|
134
134
|
if (c == 0)
|
135
135
|
return NULL;
|
136
136
|
WStr_addWChar(s, c);
|
137
137
|
rest = 0;
|
138
138
|
}
|
139
|
-
else if (c
|
139
|
+
else if ((c & 0xe0) == 0xc0) { /* 0b110nnnnn (11bit) */
|
140
140
|
rest = 1;
|
141
141
|
u = c & 31;
|
142
142
|
}
|
143
|
-
else if (c
|
143
|
+
else if ((c & 0xf0) == 0xe0) { /* 0b1110nnnn (16bit) */
|
144
144
|
rest = 2;
|
145
145
|
u = c & 15;
|
146
146
|
}
|
147
|
-
else if (c
|
147
|
+
else if ((c & 0xf8) == 0xf0) { /* 0b11110nnn (21bit) */
|
148
148
|
rest = 3;
|
149
149
|
u = c & 7;
|
150
150
|
}
|
151
|
-
else if (c
|
151
|
+
else if ((c & 0xfc) == 0xf8) { /* 0b111110nn (26bit) */
|
152
152
|
rest = 4;
|
153
153
|
u = c & 3;
|
154
154
|
}
|
155
|
-
else if (c
|
155
|
+
else if ((c & 0xfe) == 0xfc) { /* 0b1111110n (31bit) */
|
156
156
|
rest = 5;
|
157
157
|
u = c & 1;
|
158
158
|
}
|
@@ -170,7 +170,7 @@ WStr_convertIntoUString(WString* wstr, UString* ustr)
|
|
170
170
|
int i;
|
171
171
|
|
172
172
|
for (i = 0; i < wstr->len; i++) {
|
173
|
-
|
173
|
+
UniStr_addWChar(ustr, wstr->str[i]);
|
174
174
|
}
|
175
175
|
|
176
176
|
return ustr;
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 2
|
8
|
+
- 0
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Yoshida Masato
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date:
|
17
|
+
date: 2009-12-29 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|