unicode 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (10) hide show
  1. data/README +16 -11
  2. data/test.rb +9 -8
  3. data/tools/README +2 -2
  4. data/tools/mkunidata.rb +20 -10
  5. data/unicode.c +132 -54
  6. data/unidata.map +12976 -1764
  7. data/ustring.c +27 -25
  8. data/ustring.h +12 -12
  9. data/wstring.c +11 -11
  10. metadata +4 -4
data/ustring.c CHANGED
@@ -5,10 +5,12 @@
5
5
  */
6
6
 
7
7
  #include <stdio.h>
8
+ #include <stdlib.h>
9
+ #include <string.h>
8
10
  #include "ustring.h"
9
11
 
10
12
  UString*
11
- UStr_alloc(UString* str)
13
+ UniStr_alloc(UString* str)
12
14
  {
13
15
  str->size = USTR_INITIAL_STRING_LEN;
14
16
  str->len = 0;
@@ -21,7 +23,7 @@ UStr_alloc(UString* str)
21
23
  }
22
24
 
23
25
  UString*
24
- UStr_enlarge(UString* str, int size)
26
+ UniStr_enlarge(UString* str, int size)
25
27
  {
26
28
  unsigned char* newptr;
27
29
 
@@ -36,7 +38,7 @@ UStr_enlarge(UString* str, int size)
36
38
  }
37
39
 
38
40
  void
39
- UStr_free(UString* str)
41
+ UniStr_free(UString* str)
40
42
  {
41
43
  str->size = 0;
42
44
  str->len = 0;
@@ -44,10 +46,10 @@ UStr_free(UString* str)
44
46
  }
45
47
 
46
48
  int
47
- UStr_addChars(UString* s, const unsigned char* a, int len)
49
+ UniStr_addChars(UString* s, const unsigned char* a, int len)
48
50
  {
49
51
  if (s->len + len >= s->size) {
50
- UStr_enlarge(s, len + USTR_STRING_EXTEND_LEN);
52
+ UniStr_enlarge(s, len + USTR_STRING_EXTEND_LEN);
51
53
  }
52
54
  memcpy(s->str + s->len, a, len);
53
55
  s->len += len;
@@ -56,10 +58,10 @@ UStr_addChars(UString* s, const unsigned char* a, int len)
56
58
  }
57
59
 
58
60
  int
59
- UStr_addChar(UString* s, unsigned char a)
61
+ UniStr_addChar(UString* s, unsigned char a)
60
62
  {
61
63
  if (s->len + 1 >= s->size) {
62
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
64
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
63
65
  }
64
66
  *(s->str + s->len) = a;
65
67
  (s->len)++;
@@ -68,10 +70,10 @@ UStr_addChar(UString* s, unsigned char a)
68
70
  }
69
71
 
70
72
  int
71
- UStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
73
+ UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
72
74
  {
73
75
  if (s->len + 2 >= s->size) {
74
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
76
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
75
77
  }
76
78
  *(s->str + s->len) = a1;
77
79
  *(s->str + s->len + 1) = a2;
@@ -81,10 +83,10 @@ UStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
81
83
  }
82
84
 
83
85
  int
84
- UStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
86
+ UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
85
87
  {
86
88
  if (s->len + 3 >= s->size) {
87
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
89
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
88
90
  }
89
91
  *(s->str + s->len) = a1;
90
92
  *(s->str + s->len + 1) = a2;
@@ -95,11 +97,11 @@ UStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
95
97
  }
96
98
 
97
99
  int
98
- UStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
100
+ UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
99
101
  unsigned char a3, unsigned char a4)
100
102
  {
101
103
  if (s->len + 4 >= s->size) {
102
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
104
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
103
105
  }
104
106
  *(s->str + s->len) = a1;
105
107
  *(s->str + s->len + 1) = a2;
@@ -111,11 +113,11 @@ UStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
111
113
  }
112
114
 
113
115
  int
114
- UStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
116
+ UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
115
117
  unsigned char a3, unsigned char a4, unsigned char a5)
116
118
  {
117
119
  if (s->len + 5 >= s->size) {
118
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
120
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
119
121
  }
120
122
  *(s->str + s->len) = a1;
121
123
  *(s->str + s->len + 1) = a2;
@@ -128,12 +130,12 @@ UStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
128
130
  }
129
131
 
130
132
  int
131
- UStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
133
+ UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
132
134
  unsigned char a3, unsigned char a4,
133
135
  unsigned char a5, unsigned char a6)
134
136
  {
135
137
  if (s->len + 6 >= s->size) {
136
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
138
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
137
139
  }
138
140
  *(s->str + s->len) = a1;
139
141
  *(s->str + s->len + 1) = a2;
@@ -147,29 +149,29 @@ UStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
147
149
  }
148
150
 
149
151
  int
150
- UStr_addWChar(UString* ustr, int c)
152
+ UniStr_addWChar(UString* ustr, unsigned int c)
151
153
  {
152
154
  if (c < 128) { /* 0x0000-0x00FF */
153
- UStr_addChar(ustr, c);
155
+ UniStr_addChar(ustr, c);
154
156
  }
155
157
  else if (c < 2048) { /* 0x0100-0x07FF */
156
158
  unsigned char b2 = c & 63;
157
159
  unsigned char b1 = c >> 6;
158
- UStr_addChar2(ustr, b1 | 192, b2 | 128);
160
+ UniStr_addChar2(ustr, b1 | 192, b2 | 128);
159
161
 
160
162
  }
161
163
  else if (c < 0x10000) { /* 0x0800-0xFFFF */
162
164
  unsigned char b3 = c & 63;
163
165
  unsigned char b2 = (c >> 6) & 63;
164
166
  unsigned char b1 = c >> 12;
165
- UStr_addChar3(ustr, b1 | 224, b2 | 128, b3 | 128);
167
+ UniStr_addChar3(ustr, b1 | 224, b2 | 128, b3 | 128);
166
168
  }
167
169
  else if (c < 0x200000) { /* 0x00010000-0x001FFFFF */
168
170
  unsigned char b4 = c & 63;
169
171
  unsigned char b3 = (c >> 6) & 63;
170
172
  unsigned char b2 = (c >> 12) & 63;
171
173
  unsigned char b1 = c >> 18;
172
- UStr_addChar4(ustr, b1 | 240, b2 | 128, b3 | 128, b4 | 128);
174
+ UniStr_addChar4(ustr, b1 | 240, b2 | 128, b3 | 128, b4 | 128);
173
175
  }
174
176
  else if (c < 0x4000000) { /* 0x00200000-0x03FFFFFF */
175
177
  unsigned char b5 = c & 63;
@@ -177,7 +179,7 @@ UStr_addWChar(UString* ustr, int c)
177
179
  unsigned char b3 = (c >> 12) & 63;
178
180
  unsigned char b2 = (c >> 18) & 63;
179
181
  unsigned char b1 = c >> 24;
180
- UStr_addChar5(ustr, b1 | 248, b2 | 128, b3 | 128, b4 | 128, b5 | 128);
182
+ UniStr_addChar5(ustr, b1 | 248, b2 | 128, b3 | 128, b4 | 128, b5 | 128);
181
183
  }
182
184
  else if (c < 0x80000000) { /* 0x04000000-0x7FFFFFFF */
183
185
  unsigned char b6 = c & 63;
@@ -186,7 +188,7 @@ UStr_addWChar(UString* ustr, int c)
186
188
  unsigned char b3 = (c >> 18) & 63;
187
189
  unsigned char b2 = (c >> 24) & 63;
188
190
  unsigned char b1 = (c >> 30) & 63;
189
- UStr_addChar6(ustr, b1 | 252, b2 | 128, b3 | 128,
191
+ UniStr_addChar6(ustr, b1 | 252, b2 | 128, b3 | 128,
190
192
  b4 | 128, b5 | 128, b6 | 128);
191
193
  }
192
194
 
@@ -194,7 +196,7 @@ UStr_addWChar(UString* ustr, int c)
194
196
  }
195
197
 
196
198
  void
197
- UStr_dump(UString* s)
199
+ UniStr_dump(UString* s)
198
200
  {
199
201
  int i;
200
202
 
data/ustring.h CHANGED
@@ -23,23 +23,23 @@ typedef struct _UString {
23
23
  int size;
24
24
  } UString;
25
25
 
26
- UString* UStr_alloc(UString* str);
27
- UString* UStr_enlarge(UString* str, int size);
28
- void UStr_free(UString* str);
29
- int UStr_addUhars(UString* s, const unsigned char* a, int len);
30
- int UStr_addChar(UString* s, unsigned char a);
31
- int UStr_addChar2(UString* s, unsigned char a1, unsigned char a2);
32
- int UStr_addChar3(UString* s, unsigned char a1, unsigned char a2,
26
+ UString* UniStr_alloc(UString* str);
27
+ UString* UniStr_enlarge(UString* str, int size);
28
+ void UniStr_free(UString* str);
29
+ int UniStr_addChars(UString* s, const unsigned char* a, int len);
30
+ int UniStr_addChar(UString* s, unsigned char a);
31
+ int UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2);
32
+ int UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2,
33
33
  unsigned char a3);
34
- int UStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
34
+ int UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
35
35
  unsigned char a3, unsigned char a4);
36
- int UStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
36
+ int UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
37
37
  unsigned char a3, unsigned char a4, unsigned char a5);
38
- int UStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
38
+ int UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
39
39
  unsigned char a3, unsigned char a4,
40
40
  unsigned char a5, unsigned char a6);
41
- int UStr_addWChar(UString* s, int c);
42
- void UStr_dump(UString* s);
41
+ int UniStr_addWChar(UString* s, unsigned int c);
42
+ void UniStr_dump(UString* s);
43
43
 
44
44
  #ifdef __cplusplus
45
45
  }
data/wstring.c CHANGED
@@ -5,6 +5,8 @@
5
5
  */
6
6
 
7
7
  #include <stdio.h>
8
+ #include <stdlib.h>
9
+ #include <string.h>
8
10
  #include "wstring.h"
9
11
 
10
12
  WString*
@@ -115,13 +117,11 @@ WStr_allocWithUTF8(WString* s, const char* in)
115
117
  int rest = 0;
116
118
 
117
119
  WStr_alloc(s);
118
-
119
- if (in == 0)
120
+ if (in == NULL)
120
121
  return s;
121
-
122
122
  for (i = 0; in[i] != '\0'; i++) {
123
123
  unsigned char c = in[i];
124
- if (c >= 128 && c < 192) {
124
+ if ((c & 0xc0) == 0x80) {
125
125
  if (rest == 0)
126
126
  return NULL;
127
127
  u = (u << 6) | (c & 63);
@@ -130,29 +130,29 @@ WStr_allocWithUTF8(WString* s, const char* in)
130
130
  WStr_addWChar(s, u);
131
131
  }
132
132
  }
133
- else if (c < 128) { /* 0b0nnnnnnn (7bit) */
133
+ else if ((c & 0x80) == 0) { /* 0b0nnnnnnn (7bit) */
134
134
  if (c == 0)
135
135
  return NULL;
136
136
  WStr_addWChar(s, c);
137
137
  rest = 0;
138
138
  }
139
- else if (c < 224) { /* 0b110nnnnn (11bit) */
139
+ else if ((c & 0xe0) == 0xc0) { /* 0b110nnnnn (11bit) */
140
140
  rest = 1;
141
141
  u = c & 31;
142
142
  }
143
- else if (c < 240) { /* 0b1110nnnn (16bit) */
143
+ else if ((c & 0xf0) == 0xe0) { /* 0b1110nnnn (16bit) */
144
144
  rest = 2;
145
145
  u = c & 15;
146
146
  }
147
- else if (c < 248) { /* 0b11110nnn (21bit) */
147
+ else if ((c & 0xf8) == 0xf0) { /* 0b11110nnn (21bit) */
148
148
  rest = 3;
149
149
  u = c & 7;
150
150
  }
151
- else if (c < 252) { /* 0b111110nn (26bit) */
151
+ else if ((c & 0xfc) == 0xf8) { /* 0b111110nn (26bit) */
152
152
  rest = 4;
153
153
  u = c & 3;
154
154
  }
155
- else if (c < 254) { /* 0b1111110n (31bit) */
155
+ else if ((c & 0xfe) == 0xfc) { /* 0b1111110n (31bit) */
156
156
  rest = 5;
157
157
  u = c & 1;
158
158
  }
@@ -170,7 +170,7 @@ WStr_convertIntoUString(WString* wstr, UString* ustr)
170
170
  int i;
171
171
 
172
172
  for (i = 0; i < wstr->len; i++) {
173
- UStr_addWChar(ustr, wstr->str[i]);
173
+ UniStr_addWChar(ustr, wstr->str[i]);
174
174
  }
175
175
 
176
176
  return ustr;
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 1
8
- - 1
9
- version: 0.1.1
7
+ - 2
8
+ - 0
9
+ version: 0.2.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Yoshida Masato
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2005-08-13 00:00:00 +02:00
17
+ date: 2009-12-29 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies: []
20
20