unicode 0.3.1-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- data/README +113 -0
- data/Rakefile +16 -0
- data/ext/unicode/extconf.rb +3 -0
- data/ext/unicode/unicode.c +789 -0
- data/ext/unicode/unidata.map +21854 -0
- data/ext/unicode/ustring.c +208 -0
- data/ext/unicode/ustring.h +48 -0
- data/ext/unicode/wstring.c +189 -0
- data/ext/unicode/wstring.h +41 -0
- data/lib/unicode.rb +6 -0
- data/lib/unicode/unicode_native.so +0 -0
- data/test/test.rb +69 -0
- data/tools/README +6 -0
- data/tools/mkunidata.rb +169 -0
- data/unicode.gemspec +13 -0
- metadata +81 -0
@@ -0,0 +1,208 @@
|
|
1
|
+
/*
|
2
|
+
* Simple string library
|
3
|
+
* Version 0.2
|
4
|
+
* 1999 by yoshidam
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include <stdio.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <string.h>
|
10
|
+
#include "ustring.h"
|
11
|
+
|
12
|
+
UString*
|
13
|
+
UniStr_alloc(UString* str)
|
14
|
+
{
|
15
|
+
str->size = USTR_INITIAL_STRING_LEN;
|
16
|
+
str->len = 0;
|
17
|
+
if ((str->str = (unsigned char*)malloc(USTR_INITIAL_STRING_LEN)) == NULL) {
|
18
|
+
str->size = 0;
|
19
|
+
return NULL;
|
20
|
+
}
|
21
|
+
|
22
|
+
return str;
|
23
|
+
}
|
24
|
+
|
25
|
+
UString*
|
26
|
+
UniStr_enlarge(UString* str, int size)
|
27
|
+
{
|
28
|
+
unsigned char* newptr;
|
29
|
+
|
30
|
+
if ((newptr = (unsigned char*)realloc(str->str, str->size + size))
|
31
|
+
== NULL) {
|
32
|
+
return NULL;
|
33
|
+
}
|
34
|
+
str->str = newptr;
|
35
|
+
str->size += size;
|
36
|
+
|
37
|
+
return str;
|
38
|
+
}
|
39
|
+
|
40
|
+
void
|
41
|
+
UniStr_free(UString* str)
|
42
|
+
{
|
43
|
+
str->size = 0;
|
44
|
+
str->len = 0;
|
45
|
+
free(str->str);
|
46
|
+
}
|
47
|
+
|
48
|
+
int
|
49
|
+
UniStr_addChars(UString* s, const unsigned char* a, int len)
|
50
|
+
{
|
51
|
+
if (s->len + len >= s->size) {
|
52
|
+
UniStr_enlarge(s, len + USTR_STRING_EXTEND_LEN);
|
53
|
+
}
|
54
|
+
memcpy(s->str + s->len, a, len);
|
55
|
+
s->len += len;
|
56
|
+
|
57
|
+
return s->len;
|
58
|
+
}
|
59
|
+
|
60
|
+
int
|
61
|
+
UniStr_addChar(UString* s, unsigned char a)
|
62
|
+
{
|
63
|
+
if (s->len + 1 >= s->size) {
|
64
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
65
|
+
}
|
66
|
+
*(s->str + s->len) = a;
|
67
|
+
(s->len)++;
|
68
|
+
|
69
|
+
return s->len;
|
70
|
+
}
|
71
|
+
|
72
|
+
int
|
73
|
+
UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
|
74
|
+
{
|
75
|
+
if (s->len + 2 >= s->size) {
|
76
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
77
|
+
}
|
78
|
+
*(s->str + s->len) = a1;
|
79
|
+
*(s->str + s->len + 1) = a2;
|
80
|
+
s->len += 2;
|
81
|
+
|
82
|
+
return s->len;
|
83
|
+
}
|
84
|
+
|
85
|
+
int
|
86
|
+
UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
|
87
|
+
{
|
88
|
+
if (s->len + 3 >= s->size) {
|
89
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
90
|
+
}
|
91
|
+
*(s->str + s->len) = a1;
|
92
|
+
*(s->str + s->len + 1) = a2;
|
93
|
+
*(s->str + s->len + 2) = a3;
|
94
|
+
s->len += 3;
|
95
|
+
|
96
|
+
return s->len;
|
97
|
+
}
|
98
|
+
|
99
|
+
int
|
100
|
+
UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
|
101
|
+
unsigned char a3, unsigned char a4)
|
102
|
+
{
|
103
|
+
if (s->len + 4 >= s->size) {
|
104
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
105
|
+
}
|
106
|
+
*(s->str + s->len) = a1;
|
107
|
+
*(s->str + s->len + 1) = a2;
|
108
|
+
*(s->str + s->len + 2) = a3;
|
109
|
+
*(s->str + s->len + 3) = a4;
|
110
|
+
s->len += 4;
|
111
|
+
|
112
|
+
return s->len;
|
113
|
+
}
|
114
|
+
|
115
|
+
int
|
116
|
+
UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
|
117
|
+
unsigned char a3, unsigned char a4, unsigned char a5)
|
118
|
+
{
|
119
|
+
if (s->len + 5 >= s->size) {
|
120
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
121
|
+
}
|
122
|
+
*(s->str + s->len) = a1;
|
123
|
+
*(s->str + s->len + 1) = a2;
|
124
|
+
*(s->str + s->len + 2) = a3;
|
125
|
+
*(s->str + s->len + 3) = a4;
|
126
|
+
*(s->str + s->len + 4) = a5;
|
127
|
+
s->len += 5;
|
128
|
+
|
129
|
+
return s->len;
|
130
|
+
}
|
131
|
+
|
132
|
+
int
|
133
|
+
UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
|
134
|
+
unsigned char a3, unsigned char a4,
|
135
|
+
unsigned char a5, unsigned char a6)
|
136
|
+
{
|
137
|
+
if (s->len + 6 >= s->size) {
|
138
|
+
UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
|
139
|
+
}
|
140
|
+
*(s->str + s->len) = a1;
|
141
|
+
*(s->str + s->len + 1) = a2;
|
142
|
+
*(s->str + s->len + 2) = a3;
|
143
|
+
*(s->str + s->len + 3) = a4;
|
144
|
+
*(s->str + s->len + 4) = a5;
|
145
|
+
*(s->str + s->len + 5) = a6;
|
146
|
+
s->len += 6;
|
147
|
+
|
148
|
+
return s->len;
|
149
|
+
}
|
150
|
+
|
151
|
+
int
|
152
|
+
UniStr_addWChar(UString* ustr, unsigned int c)
|
153
|
+
{
|
154
|
+
if (c < 128) { /* 0x0000-0x00FF */
|
155
|
+
UniStr_addChar(ustr, c);
|
156
|
+
}
|
157
|
+
else if (c < 2048) { /* 0x0100-0x07FF */
|
158
|
+
unsigned char b2 = c & 63;
|
159
|
+
unsigned char b1 = c >> 6;
|
160
|
+
UniStr_addChar2(ustr, b1 | 192, b2 | 128);
|
161
|
+
|
162
|
+
}
|
163
|
+
else if (c < 0x10000) { /* 0x0800-0xFFFF */
|
164
|
+
unsigned char b3 = c & 63;
|
165
|
+
unsigned char b2 = (c >> 6) & 63;
|
166
|
+
unsigned char b1 = c >> 12;
|
167
|
+
UniStr_addChar3(ustr, b1 | 224, b2 | 128, b3 | 128);
|
168
|
+
}
|
169
|
+
else if (c < 0x200000) { /* 0x00010000-0x001FFFFF */
|
170
|
+
unsigned char b4 = c & 63;
|
171
|
+
unsigned char b3 = (c >> 6) & 63;
|
172
|
+
unsigned char b2 = (c >> 12) & 63;
|
173
|
+
unsigned char b1 = c >> 18;
|
174
|
+
UniStr_addChar4(ustr, b1 | 240, b2 | 128, b3 | 128, b4 | 128);
|
175
|
+
}
|
176
|
+
else if (c < 0x4000000) { /* 0x00200000-0x03FFFFFF */
|
177
|
+
unsigned char b5 = c & 63;
|
178
|
+
unsigned char b4 = (c >> 6) & 63;
|
179
|
+
unsigned char b3 = (c >> 12) & 63;
|
180
|
+
unsigned char b2 = (c >> 18) & 63;
|
181
|
+
unsigned char b1 = c >> 24;
|
182
|
+
UniStr_addChar5(ustr, b1 | 248, b2 | 128, b3 | 128, b4 | 128, b5 | 128);
|
183
|
+
}
|
184
|
+
else if (c < 0x80000000) { /* 0x04000000-0x7FFFFFFF */
|
185
|
+
unsigned char b6 = c & 63;
|
186
|
+
unsigned char b5 = (c >> 6) & 63;
|
187
|
+
unsigned char b4 = (c >> 12) & 63;
|
188
|
+
unsigned char b3 = (c >> 18) & 63;
|
189
|
+
unsigned char b2 = (c >> 24) & 63;
|
190
|
+
unsigned char b1 = (c >> 30) & 63;
|
191
|
+
UniStr_addChar6(ustr, b1 | 252, b2 | 128, b3 | 128,
|
192
|
+
b4 | 128, b5 | 128, b6 | 128);
|
193
|
+
}
|
194
|
+
|
195
|
+
return ustr->len;
|
196
|
+
}
|
197
|
+
|
198
|
+
void
|
199
|
+
UniStr_dump(UString* s)
|
200
|
+
{
|
201
|
+
int i;
|
202
|
+
|
203
|
+
printf("[%d/%d] ", s->len, s->size);
|
204
|
+
for (i = 0; i < s->len ; i++) {
|
205
|
+
printf("%02x ", *(s->str + i));
|
206
|
+
}
|
207
|
+
printf("\n");
|
208
|
+
}
|
@@ -0,0 +1,48 @@
|
|
1
|
+
/*
|
2
|
+
* Simple string library
|
3
|
+
* Version 0.2
|
4
|
+
* 1999 by yoshidam
|
5
|
+
*/
|
6
|
+
|
7
|
+
#ifndef _USTRING_H
|
8
|
+
#define _USTRING_H
|
9
|
+
|
10
|
+
#ifdef __cplusplus
|
11
|
+
extern "C" {
|
12
|
+
#endif
|
13
|
+
|
14
|
+
#define USTR_INITIAL_STRING_LEN 1024
|
15
|
+
#define USTR_STRING_EXTEND_LEN 1024
|
16
|
+
|
17
|
+
/*#define malloc(s) xmalloc(s)*/
|
18
|
+
/*#define relloc(p, s) xrelloc(p, s)*/
|
19
|
+
|
20
|
+
typedef struct _UString {
|
21
|
+
unsigned char* str;
|
22
|
+
int len;
|
23
|
+
int size;
|
24
|
+
} UString;
|
25
|
+
|
26
|
+
UString* UniStr_alloc(UString* str);
|
27
|
+
UString* UniStr_enlarge(UString* str, int size);
|
28
|
+
void UniStr_free(UString* str);
|
29
|
+
int UniStr_addChars(UString* s, const unsigned char* a, int len);
|
30
|
+
int UniStr_addChar(UString* s, unsigned char a);
|
31
|
+
int UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2);
|
32
|
+
int UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2,
|
33
|
+
unsigned char a3);
|
34
|
+
int UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
|
35
|
+
unsigned char a3, unsigned char a4);
|
36
|
+
int UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
|
37
|
+
unsigned char a3, unsigned char a4, unsigned char a5);
|
38
|
+
int UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
|
39
|
+
unsigned char a3, unsigned char a4,
|
40
|
+
unsigned char a5, unsigned char a6);
|
41
|
+
int UniStr_addWChar(UString* s, unsigned int c);
|
42
|
+
void UniStr_dump(UString* s);
|
43
|
+
|
44
|
+
#ifdef __cplusplus
|
45
|
+
}
|
46
|
+
#endif
|
47
|
+
|
48
|
+
#endif
|
@@ -0,0 +1,189 @@
|
|
1
|
+
/*
|
2
|
+
* Simple wide string Library
|
3
|
+
* Version 0.1
|
4
|
+
* 1999 by yoshidam
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include <stdio.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <string.h>
|
10
|
+
#include "wstring.h"
|
11
|
+
|
12
|
+
WString*
|
13
|
+
WStr_alloc(WString* str)
|
14
|
+
{
|
15
|
+
str->size = WSTR_INITIAL_STRING_LEN;
|
16
|
+
str->len = 0;
|
17
|
+
if ((str->str =
|
18
|
+
(int*)malloc(WSTR_INITIAL_STRING_LEN * sizeof(int))) == NULL) {
|
19
|
+
str->size = 0;
|
20
|
+
return NULL;
|
21
|
+
}
|
22
|
+
|
23
|
+
return str;
|
24
|
+
}
|
25
|
+
|
26
|
+
WString*
|
27
|
+
WStr_enlarge(WString* str, int size)
|
28
|
+
{
|
29
|
+
int* newptr;
|
30
|
+
|
31
|
+
if ((newptr = (int*)realloc(str->str, (str->size + size) * sizeof(int)))
|
32
|
+
== NULL) {
|
33
|
+
return NULL;
|
34
|
+
}
|
35
|
+
str->str = newptr;
|
36
|
+
str->size += size;
|
37
|
+
|
38
|
+
return str;
|
39
|
+
}
|
40
|
+
|
41
|
+
void
|
42
|
+
WStr_free(WString* str)
|
43
|
+
{
|
44
|
+
str->size = 0;
|
45
|
+
str->len = 0;
|
46
|
+
free(str->str);
|
47
|
+
}
|
48
|
+
|
49
|
+
int
|
50
|
+
WStr_addWChars(WString* s, const int* a, int len)
|
51
|
+
{
|
52
|
+
if (s->len + len >= s->size) {
|
53
|
+
WStr_enlarge(s, len + WSTR_STRING_EXTEND_LEN);
|
54
|
+
}
|
55
|
+
memcpy(s->str + s->len, a, len * sizeof(int));
|
56
|
+
s->len += len;
|
57
|
+
|
58
|
+
return s->len;
|
59
|
+
}
|
60
|
+
|
61
|
+
int
|
62
|
+
WStr_addWChar(WString* s, int a)
|
63
|
+
{
|
64
|
+
if (s->len + 1 >= s->size) {
|
65
|
+
WStr_enlarge(s, WSTR_STRING_EXTEND_LEN);
|
66
|
+
}
|
67
|
+
*(s->str + s->len) = a;
|
68
|
+
(s->len)++;
|
69
|
+
|
70
|
+
return s->len;
|
71
|
+
}
|
72
|
+
|
73
|
+
int
|
74
|
+
WStr_pushWString(WString* s, const WString* add)
|
75
|
+
{
|
76
|
+
if (s->len + add->len >= s->size) {
|
77
|
+
WStr_enlarge(s, add->len + WSTR_STRING_EXTEND_LEN);
|
78
|
+
}
|
79
|
+
memcpy(s->str + s->len, add->str, add->len * sizeof(int));
|
80
|
+
s->len += add->len;
|
81
|
+
|
82
|
+
return s->len;
|
83
|
+
}
|
84
|
+
|
85
|
+
int
|
86
|
+
WStr_addWChar2(WString* s, int a1, int a2)
|
87
|
+
{
|
88
|
+
if (s->len + 2 >= s->size) {
|
89
|
+
WStr_enlarge(s, WSTR_STRING_EXTEND_LEN);
|
90
|
+
}
|
91
|
+
*(s->str + s->len) = a1;
|
92
|
+
*(s->str + s->len + 1) = a2;
|
93
|
+
s->len += 2;
|
94
|
+
|
95
|
+
return s->len;
|
96
|
+
}
|
97
|
+
|
98
|
+
int
|
99
|
+
WStr_addWChar3(WString* s, int a1, int a2, int a3)
|
100
|
+
{
|
101
|
+
if (s->len + 3 >= s->size) {
|
102
|
+
WStr_enlarge(s, WSTR_STRING_EXTEND_LEN);
|
103
|
+
}
|
104
|
+
*(s->str + s->len) = a1;
|
105
|
+
*(s->str + s->len + 1) = a2;
|
106
|
+
*(s->str + s->len + 2) = a3;
|
107
|
+
s->len += 3;
|
108
|
+
|
109
|
+
return s->len;
|
110
|
+
}
|
111
|
+
|
112
|
+
WString*
|
113
|
+
WStr_allocWithUTF8(WString* s, const char* in)
|
114
|
+
{
|
115
|
+
int i;
|
116
|
+
int u = 0;
|
117
|
+
int rest = 0;
|
118
|
+
|
119
|
+
WStr_alloc(s);
|
120
|
+
if (in == NULL)
|
121
|
+
return s;
|
122
|
+
for (i = 0; in[i] != '\0'; i++) {
|
123
|
+
unsigned char c = in[i];
|
124
|
+
if ((c & 0xc0) == 0x80) {
|
125
|
+
if (rest == 0)
|
126
|
+
return NULL;
|
127
|
+
u = (u << 6) | (c & 63);
|
128
|
+
rest--;
|
129
|
+
if (rest == 0) {
|
130
|
+
WStr_addWChar(s, u);
|
131
|
+
}
|
132
|
+
}
|
133
|
+
else if ((c & 0x80) == 0) { /* 0b0nnnnnnn (7bit) */
|
134
|
+
if (c == 0)
|
135
|
+
return NULL;
|
136
|
+
WStr_addWChar(s, c);
|
137
|
+
rest = 0;
|
138
|
+
}
|
139
|
+
else if ((c & 0xe0) == 0xc0) { /* 0b110nnnnn (11bit) */
|
140
|
+
rest = 1;
|
141
|
+
u = c & 31;
|
142
|
+
}
|
143
|
+
else if ((c & 0xf0) == 0xe0) { /* 0b1110nnnn (16bit) */
|
144
|
+
rest = 2;
|
145
|
+
u = c & 15;
|
146
|
+
}
|
147
|
+
else if ((c & 0xf8) == 0xf0) { /* 0b11110nnn (21bit) */
|
148
|
+
rest = 3;
|
149
|
+
u = c & 7;
|
150
|
+
}
|
151
|
+
else if ((c & 0xfc) == 0xf8) { /* 0b111110nn (26bit) */
|
152
|
+
rest = 4;
|
153
|
+
u = c & 3;
|
154
|
+
}
|
155
|
+
else if ((c & 0xfe) == 0xfc) { /* 0b1111110n (31bit) */
|
156
|
+
rest = 5;
|
157
|
+
u = c & 1;
|
158
|
+
}
|
159
|
+
else {
|
160
|
+
return NULL;
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
return s;
|
165
|
+
}
|
166
|
+
|
167
|
+
UString*
|
168
|
+
WStr_convertIntoUString(WString* wstr, UString* ustr)
|
169
|
+
{
|
170
|
+
int i;
|
171
|
+
|
172
|
+
for (i = 0; i < wstr->len; i++) {
|
173
|
+
UniStr_addWChar(ustr, wstr->str[i]);
|
174
|
+
}
|
175
|
+
|
176
|
+
return ustr;
|
177
|
+
}
|
178
|
+
|
179
|
+
void
|
180
|
+
WStr_dump(WString* s)
|
181
|
+
{
|
182
|
+
int i;
|
183
|
+
|
184
|
+
printf("[%d/%d] ", s->len, s->size);
|
185
|
+
for (i = 0; i < s->len ; i++) {
|
186
|
+
printf("%04x ", *(s->str + i));
|
187
|
+
}
|
188
|
+
printf("\n");
|
189
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
/*
|
2
|
+
* Simple wide string library
|
3
|
+
* Version 0.1
|
4
|
+
* 1999 by yoshidam
|
5
|
+
*/
|
6
|
+
|
7
|
+
#ifndef _WSTRING_H
|
8
|
+
#define _WSTRING_H
|
9
|
+
|
10
|
+
#include "ustring.h"
|
11
|
+
|
12
|
+
#ifdef __cplusplus
|
13
|
+
extern "C" {
|
14
|
+
#endif
|
15
|
+
|
16
|
+
#define WSTR_INITIAL_STRING_LEN 1024
|
17
|
+
#define WSTR_STRING_EXTEND_LEN 1024
|
18
|
+
|
19
|
+
typedef struct _WString {
|
20
|
+
int* str;
|
21
|
+
int len;
|
22
|
+
int size;
|
23
|
+
} WString;
|
24
|
+
|
25
|
+
WString* WStr_alloc(WString* str);
|
26
|
+
WString* WStr_allocWithUTF8(WString* s, const char* u);
|
27
|
+
WString* WStr_enlarge(WString* str, int size);
|
28
|
+
void WStr_free(WString* str);
|
29
|
+
int WStr_addWChars(WString* s, const int* a, int len);
|
30
|
+
int WStr_addWChar(WString* s, int a);
|
31
|
+
int WStr_pushWString(WString* s, const WString* add);
|
32
|
+
int WStr_addWChar2(WString* s, int a1, int a2);
|
33
|
+
int WStr_addWChar3(WString* s, int a1, int a2, int a3);
|
34
|
+
UString* WStr_convertIntoUString(WString* wstr, UString* ustr);
|
35
|
+
void WStr_dump(WString* s);
|
36
|
+
|
37
|
+
#ifdef __cplusplus
|
38
|
+
}
|
39
|
+
#endif
|
40
|
+
|
41
|
+
#endif
|