unicode 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (10) hide show
  1. data/README +16 -11
  2. data/test.rb +9 -8
  3. data/tools/README +2 -2
  4. data/tools/mkunidata.rb +20 -10
  5. data/unicode.c +132 -54
  6. data/unidata.map +12976 -1764
  7. data/ustring.c +27 -25
  8. data/ustring.h +12 -12
  9. data/wstring.c +11 -11
  10. metadata +4 -4
data/ustring.c CHANGED
@@ -5,10 +5,12 @@
5
5
  */
6
6
 
7
7
  #include <stdio.h>
8
+ #include <stdlib.h>
9
+ #include <string.h>
8
10
  #include "ustring.h"
9
11
 
10
12
  UString*
11
- UStr_alloc(UString* str)
13
+ UniStr_alloc(UString* str)
12
14
  {
13
15
  str->size = USTR_INITIAL_STRING_LEN;
14
16
  str->len = 0;
@@ -21,7 +23,7 @@ UStr_alloc(UString* str)
21
23
  }
22
24
 
23
25
  UString*
24
- UStr_enlarge(UString* str, int size)
26
+ UniStr_enlarge(UString* str, int size)
25
27
  {
26
28
  unsigned char* newptr;
27
29
 
@@ -36,7 +38,7 @@ UStr_enlarge(UString* str, int size)
36
38
  }
37
39
 
38
40
  void
39
- UStr_free(UString* str)
41
+ UniStr_free(UString* str)
40
42
  {
41
43
  str->size = 0;
42
44
  str->len = 0;
@@ -44,10 +46,10 @@ UStr_free(UString* str)
44
46
  }
45
47
 
46
48
  int
47
- UStr_addChars(UString* s, const unsigned char* a, int len)
49
+ UniStr_addChars(UString* s, const unsigned char* a, int len)
48
50
  {
49
51
  if (s->len + len >= s->size) {
50
- UStr_enlarge(s, len + USTR_STRING_EXTEND_LEN);
52
+ UniStr_enlarge(s, len + USTR_STRING_EXTEND_LEN);
51
53
  }
52
54
  memcpy(s->str + s->len, a, len);
53
55
  s->len += len;
@@ -56,10 +58,10 @@ UStr_addChars(UString* s, const unsigned char* a, int len)
56
58
  }
57
59
 
58
60
  int
59
- UStr_addChar(UString* s, unsigned char a)
61
+ UniStr_addChar(UString* s, unsigned char a)
60
62
  {
61
63
  if (s->len + 1 >= s->size) {
62
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
64
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
63
65
  }
64
66
  *(s->str + s->len) = a;
65
67
  (s->len)++;
@@ -68,10 +70,10 @@ UStr_addChar(UString* s, unsigned char a)
68
70
  }
69
71
 
70
72
  int
71
- UStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
73
+ UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
72
74
  {
73
75
  if (s->len + 2 >= s->size) {
74
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
76
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
75
77
  }
76
78
  *(s->str + s->len) = a1;
77
79
  *(s->str + s->len + 1) = a2;
@@ -81,10 +83,10 @@ UStr_addChar2(UString* s, unsigned char a1, unsigned char a2)
81
83
  }
82
84
 
83
85
  int
84
- UStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
86
+ UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
85
87
  {
86
88
  if (s->len + 3 >= s->size) {
87
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
89
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
88
90
  }
89
91
  *(s->str + s->len) = a1;
90
92
  *(s->str + s->len + 1) = a2;
@@ -95,11 +97,11 @@ UStr_addChar3(UString* s, unsigned char a1, unsigned char a2, unsigned char a3)
95
97
  }
96
98
 
97
99
  int
98
- UStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
100
+ UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
99
101
  unsigned char a3, unsigned char a4)
100
102
  {
101
103
  if (s->len + 4 >= s->size) {
102
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
104
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
103
105
  }
104
106
  *(s->str + s->len) = a1;
105
107
  *(s->str + s->len + 1) = a2;
@@ -111,11 +113,11 @@ UStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
111
113
  }
112
114
 
113
115
  int
114
- UStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
116
+ UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
115
117
  unsigned char a3, unsigned char a4, unsigned char a5)
116
118
  {
117
119
  if (s->len + 5 >= s->size) {
118
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
120
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
119
121
  }
120
122
  *(s->str + s->len) = a1;
121
123
  *(s->str + s->len + 1) = a2;
@@ -128,12 +130,12 @@ UStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
128
130
  }
129
131
 
130
132
  int
131
- UStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
133
+ UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
132
134
  unsigned char a3, unsigned char a4,
133
135
  unsigned char a5, unsigned char a6)
134
136
  {
135
137
  if (s->len + 6 >= s->size) {
136
- UStr_enlarge(s, USTR_STRING_EXTEND_LEN);
138
+ UniStr_enlarge(s, USTR_STRING_EXTEND_LEN);
137
139
  }
138
140
  *(s->str + s->len) = a1;
139
141
  *(s->str + s->len + 1) = a2;
@@ -147,29 +149,29 @@ UStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
147
149
  }
148
150
 
149
151
  int
150
- UStr_addWChar(UString* ustr, int c)
152
+ UniStr_addWChar(UString* ustr, unsigned int c)
151
153
  {
152
154
  if (c < 128) { /* 0x0000-0x00FF */
153
- UStr_addChar(ustr, c);
155
+ UniStr_addChar(ustr, c);
154
156
  }
155
157
  else if (c < 2048) { /* 0x0100-0x07FF */
156
158
  unsigned char b2 = c & 63;
157
159
  unsigned char b1 = c >> 6;
158
- UStr_addChar2(ustr, b1 | 192, b2 | 128);
160
+ UniStr_addChar2(ustr, b1 | 192, b2 | 128);
159
161
 
160
162
  }
161
163
  else if (c < 0x10000) { /* 0x0800-0xFFFF */
162
164
  unsigned char b3 = c & 63;
163
165
  unsigned char b2 = (c >> 6) & 63;
164
166
  unsigned char b1 = c >> 12;
165
- UStr_addChar3(ustr, b1 | 224, b2 | 128, b3 | 128);
167
+ UniStr_addChar3(ustr, b1 | 224, b2 | 128, b3 | 128);
166
168
  }
167
169
  else if (c < 0x200000) { /* 0x00010000-0x001FFFFF */
168
170
  unsigned char b4 = c & 63;
169
171
  unsigned char b3 = (c >> 6) & 63;
170
172
  unsigned char b2 = (c >> 12) & 63;
171
173
  unsigned char b1 = c >> 18;
172
- UStr_addChar4(ustr, b1 | 240, b2 | 128, b3 | 128, b4 | 128);
174
+ UniStr_addChar4(ustr, b1 | 240, b2 | 128, b3 | 128, b4 | 128);
173
175
  }
174
176
  else if (c < 0x4000000) { /* 0x00200000-0x03FFFFFF */
175
177
  unsigned char b5 = c & 63;
@@ -177,7 +179,7 @@ UStr_addWChar(UString* ustr, int c)
177
179
  unsigned char b3 = (c >> 12) & 63;
178
180
  unsigned char b2 = (c >> 18) & 63;
179
181
  unsigned char b1 = c >> 24;
180
- UStr_addChar5(ustr, b1 | 248, b2 | 128, b3 | 128, b4 | 128, b5 | 128);
182
+ UniStr_addChar5(ustr, b1 | 248, b2 | 128, b3 | 128, b4 | 128, b5 | 128);
181
183
  }
182
184
  else if (c < 0x80000000) { /* 0x04000000-0x7FFFFFFF */
183
185
  unsigned char b6 = c & 63;
@@ -186,7 +188,7 @@ UStr_addWChar(UString* ustr, int c)
186
188
  unsigned char b3 = (c >> 18) & 63;
187
189
  unsigned char b2 = (c >> 24) & 63;
188
190
  unsigned char b1 = (c >> 30) & 63;
189
- UStr_addChar6(ustr, b1 | 252, b2 | 128, b3 | 128,
191
+ UniStr_addChar6(ustr, b1 | 252, b2 | 128, b3 | 128,
190
192
  b4 | 128, b5 | 128, b6 | 128);
191
193
  }
192
194
 
@@ -194,7 +196,7 @@ UStr_addWChar(UString* ustr, int c)
194
196
  }
195
197
 
196
198
  void
197
- UStr_dump(UString* s)
199
+ UniStr_dump(UString* s)
198
200
  {
199
201
  int i;
200
202
 
data/ustring.h CHANGED
@@ -23,23 +23,23 @@ typedef struct _UString {
23
23
  int size;
24
24
  } UString;
25
25
 
26
- UString* UStr_alloc(UString* str);
27
- UString* UStr_enlarge(UString* str, int size);
28
- void UStr_free(UString* str);
29
- int UStr_addUhars(UString* s, const unsigned char* a, int len);
30
- int UStr_addChar(UString* s, unsigned char a);
31
- int UStr_addChar2(UString* s, unsigned char a1, unsigned char a2);
32
- int UStr_addChar3(UString* s, unsigned char a1, unsigned char a2,
26
+ UString* UniStr_alloc(UString* str);
27
+ UString* UniStr_enlarge(UString* str, int size);
28
+ void UniStr_free(UString* str);
29
+ int UniStr_addChars(UString* s, const unsigned char* a, int len);
30
+ int UniStr_addChar(UString* s, unsigned char a);
31
+ int UniStr_addChar2(UString* s, unsigned char a1, unsigned char a2);
32
+ int UniStr_addChar3(UString* s, unsigned char a1, unsigned char a2,
33
33
  unsigned char a3);
34
- int UStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
34
+ int UniStr_addChar4(UString* s, unsigned char a1, unsigned char a2,
35
35
  unsigned char a3, unsigned char a4);
36
- int UStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
36
+ int UniStr_addChar5(UString* s, unsigned char a1, unsigned char a2,
37
37
  unsigned char a3, unsigned char a4, unsigned char a5);
38
- int UStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
38
+ int UniStr_addChar6(UString* s, unsigned char a1, unsigned char a2,
39
39
  unsigned char a3, unsigned char a4,
40
40
  unsigned char a5, unsigned char a6);
41
- int UStr_addWChar(UString* s, int c);
42
- void UStr_dump(UString* s);
41
+ int UniStr_addWChar(UString* s, unsigned int c);
42
+ void UniStr_dump(UString* s);
43
43
 
44
44
  #ifdef __cplusplus
45
45
  }
data/wstring.c CHANGED
@@ -5,6 +5,8 @@
5
5
  */
6
6
 
7
7
  #include <stdio.h>
8
+ #include <stdlib.h>
9
+ #include <string.h>
8
10
  #include "wstring.h"
9
11
 
10
12
  WString*
@@ -115,13 +117,11 @@ WStr_allocWithUTF8(WString* s, const char* in)
115
117
  int rest = 0;
116
118
 
117
119
  WStr_alloc(s);
118
-
119
- if (in == 0)
120
+ if (in == NULL)
120
121
  return s;
121
-
122
122
  for (i = 0; in[i] != '\0'; i++) {
123
123
  unsigned char c = in[i];
124
- if (c >= 128 && c < 192) {
124
+ if ((c & 0xc0) == 0x80) {
125
125
  if (rest == 0)
126
126
  return NULL;
127
127
  u = (u << 6) | (c & 63);
@@ -130,29 +130,29 @@ WStr_allocWithUTF8(WString* s, const char* in)
130
130
  WStr_addWChar(s, u);
131
131
  }
132
132
  }
133
- else if (c < 128) { /* 0b0nnnnnnn (7bit) */
133
+ else if ((c & 0x80) == 0) { /* 0b0nnnnnnn (7bit) */
134
134
  if (c == 0)
135
135
  return NULL;
136
136
  WStr_addWChar(s, c);
137
137
  rest = 0;
138
138
  }
139
- else if (c < 224) { /* 0b110nnnnn (11bit) */
139
+ else if ((c & 0xe0) == 0xc0) { /* 0b110nnnnn (11bit) */
140
140
  rest = 1;
141
141
  u = c & 31;
142
142
  }
143
- else if (c < 240) { /* 0b1110nnnn (16bit) */
143
+ else if ((c & 0xf0) == 0xe0) { /* 0b1110nnnn (16bit) */
144
144
  rest = 2;
145
145
  u = c & 15;
146
146
  }
147
- else if (c < 248) { /* 0b11110nnn (21bit) */
147
+ else if ((c & 0xf8) == 0xf0) { /* 0b11110nnn (21bit) */
148
148
  rest = 3;
149
149
  u = c & 7;
150
150
  }
151
- else if (c < 252) { /* 0b111110nn (26bit) */
151
+ else if ((c & 0xfc) == 0xf8) { /* 0b111110nn (26bit) */
152
152
  rest = 4;
153
153
  u = c & 3;
154
154
  }
155
- else if (c < 254) { /* 0b1111110n (31bit) */
155
+ else if ((c & 0xfe) == 0xfc) { /* 0b1111110n (31bit) */
156
156
  rest = 5;
157
157
  u = c & 1;
158
158
  }
@@ -170,7 +170,7 @@ WStr_convertIntoUString(WString* wstr, UString* ustr)
170
170
  int i;
171
171
 
172
172
  for (i = 0; i < wstr->len; i++) {
173
- UStr_addWChar(ustr, wstr->str[i]);
173
+ UniStr_addWChar(ustr, wstr->str[i]);
174
174
  }
175
175
 
176
176
  return ustr;
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 1
8
- - 1
9
- version: 0.1.1
7
+ - 2
8
+ - 0
9
+ version: 0.2.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Yoshida Masato
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2005-08-13 00:00:00 +02:00
17
+ date: 2009-12-29 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies: []
20
20