unicode 0.4.2 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (8) hide show
  1. data/README +29 -7
  2. data/tools/README +3 -2
  3. data/tools/mkunidata.rb +136 -12
  4. data/unicode.c +379 -16
  5. data/unidata.map +24536 -24435
  6. data/wstring.c +69 -1
  7. data/wstring.h +2 -0
  8. metadata +20 -38
data/wstring.c CHANGED
@@ -43,7 +43,10 @@ WStr_free(WString* str)
43
43
  {
44
44
  str->size = 0;
45
45
  str->len = 0;
46
- free(str->str);
46
+ if (str->str) {
47
+ free(str->str);
48
+ str->str = NULL;
49
+ }
47
50
  }
48
51
 
49
52
  int
@@ -164,6 +167,59 @@ WStr_allocWithUTF8(WString* s, const char* in)
164
167
  return s;
165
168
  }
166
169
 
170
+ WString*
171
+ WStr_allocWithUTF8L(WString* s, const char* in, int len)
172
+ {
173
+ int i;
174
+ int u = 0;
175
+ int rest = 0;
176
+
177
+ WStr_alloc(s);
178
+ if (in == NULL)
179
+ return s;
180
+ for (i = 0; i < len; i++) {
181
+ unsigned char c = in[i];
182
+ if ((c & 0xc0) == 0x80) {
183
+ if (rest == 0)
184
+ return NULL;
185
+ u = (u << 6) | (c & 63);
186
+ rest--;
187
+ if (rest == 0) {
188
+ WStr_addWChar(s, u);
189
+ }
190
+ }
191
+ else if ((c & 0x80) == 0) { /* 0b0nnnnnnn (7bit) */
192
+ WStr_addWChar(s, c);
193
+ rest = 0;
194
+ }
195
+ else if ((c & 0xe0) == 0xc0) { /* 0b110nnnnn (11bit) */
196
+ rest = 1;
197
+ u = c & 31;
198
+ }
199
+ else if ((c & 0xf0) == 0xe0) { /* 0b1110nnnn (16bit) */
200
+ rest = 2;
201
+ u = c & 15;
202
+ }
203
+ else if ((c & 0xf8) == 0xf0) { /* 0b11110nnn (21bit) */
204
+ rest = 3;
205
+ u = c & 7;
206
+ }
207
+ else if ((c & 0xfc) == 0xf8) { /* 0b111110nn (26bit) */
208
+ rest = 4;
209
+ u = c & 3;
210
+ }
211
+ else if ((c & 0xfe) == 0xfc) { /* 0b1111110n (31bit) */
212
+ rest = 5;
213
+ u = c & 1;
214
+ }
215
+ else {
216
+ return NULL;
217
+ }
218
+ }
219
+
220
+ return s;
221
+ }
222
+
167
223
  UString*
168
224
  WStr_convertIntoUString(WString* wstr, UString* ustr)
169
225
  {
@@ -176,6 +232,18 @@ WStr_convertIntoUString(WString* wstr, UString* ustr)
176
232
  return ustr;
177
233
  }
178
234
 
235
+ UString*
236
+ WStr_convertIntoUString2(WString* wstr, int start, int len, UString* ustr)
237
+ {
238
+ int i;
239
+
240
+ for (i = start; i < wstr->len && i < start + len; i++) {
241
+ UniStr_addWChar(ustr, wstr->str[i]);
242
+ }
243
+
244
+ return ustr;
245
+ }
246
+
179
247
  void
180
248
  WStr_dump(WString* s)
181
249
  {
data/wstring.h CHANGED
@@ -24,6 +24,7 @@ typedef struct _WString {
24
24
 
25
25
  WString* WStr_alloc(WString* str);
26
26
  WString* WStr_allocWithUTF8(WString* s, const char* u);
27
+ WString* WStr_allocWithUTF8L(WString* s, const char* u, int len);
27
28
  WString* WStr_enlarge(WString* str, int size);
28
29
  void WStr_free(WString* str);
29
30
  int WStr_addWChars(WString* s, const int* a, int len);
@@ -32,6 +33,7 @@ int WStr_pushWString(WString* s, const WString* add);
32
33
  int WStr_addWChar2(WString* s, int a1, int a2);
33
34
  int WStr_addWChar3(WString* s, int a1, int a2, int a3);
34
35
  UString* WStr_convertIntoUString(WString* wstr, UString* ustr);
36
+ UString* WStr_convertIntoUString2(WString* wstr, int start, int len, UString* ustr);
35
37
  void WStr_dump(WString* s);
36
38
 
37
39
  #ifdef __cplusplus
metadata CHANGED
@@ -1,32 +1,24 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: unicode
3
- version: !ruby/object:Gem::Version
4
- hash: 11
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.3
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 4
9
- - 2
10
- version: 0.4.2
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Yoshida Masato
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-02-03 00:00:00 Z
12
+ date: 2012-08-07 00:00:00.000000000 Z
19
13
  dependencies: []
20
-
21
14
  description: Unicode normalization library.
22
15
  email: yoshidam@yoshidam.net
23
16
  executables: []
24
-
25
- extensions:
17
+ extensions:
26
18
  - extconf.rb
27
- extra_rdoc_files:
19
+ extra_rdoc_files:
28
20
  - README
29
- files:
21
+ files:
30
22
  - extconf.rb
31
23
  - unicode.c
32
24
  - ustring.c
@@ -40,36 +32,26 @@ files:
40
32
  - unidata.map
41
33
  homepage: http://www.yoshidam.net/Ruby.html#unicode
42
34
  licenses: []
43
-
44
35
  post_install_message:
45
36
  rdoc_options: []
46
-
47
- require_paths:
37
+ require_paths:
48
38
  - .
49
- required_ruby_version: !ruby/object:Gem::Requirement
39
+ required_ruby_version: !ruby/object:Gem::Requirement
50
40
  none: false
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- hash: 3
55
- segments:
56
- - 0
57
- version: "0"
58
- required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
46
  none: false
60
- requirements:
61
- - - ">="
62
- - !ruby/object:Gem::Version
63
- hash: 3
64
- segments:
65
- - 0
66
- version: "0"
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
67
51
  requirements: []
68
-
69
52
  rubyforge_project:
70
- rubygems_version: 1.8.17
53
+ rubygems_version: 1.8.24
71
54
  signing_key:
72
55
  specification_version: 3
73
56
  summary: Unicode normalization library.
74
57
  test_files: []
75
-