unicode 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (8) hide show
  1. data/README +29 -7
  2. data/tools/README +3 -2
  3. data/tools/mkunidata.rb +136 -12
  4. data/unicode.c +379 -16
  5. data/unidata.map +24536 -24435
  6. data/wstring.c +69 -1
  7. data/wstring.h +2 -0
  8. metadata +20 -38
data/wstring.c CHANGED
@@ -43,7 +43,10 @@ WStr_free(WString* str)
43
43
  {
44
44
  str->size = 0;
45
45
  str->len = 0;
46
- free(str->str);
46
+ if (str->str) {
47
+ free(str->str);
48
+ str->str = NULL;
49
+ }
47
50
  }
48
51
 
49
52
  int
@@ -164,6 +167,59 @@ WStr_allocWithUTF8(WString* s, const char* in)
164
167
  return s;
165
168
  }
166
169
 
170
+ WString*
171
+ WStr_allocWithUTF8L(WString* s, const char* in, int len)
172
+ {
173
+ int i;
174
+ int u = 0;
175
+ int rest = 0;
176
+
177
+ WStr_alloc(s);
178
+ if (in == NULL)
179
+ return s;
180
+ for (i = 0; i < len; i++) {
181
+ unsigned char c = in[i];
182
+ if ((c & 0xc0) == 0x80) {
183
+ if (rest == 0)
184
+ return NULL;
185
+ u = (u << 6) | (c & 63);
186
+ rest--;
187
+ if (rest == 0) {
188
+ WStr_addWChar(s, u);
189
+ }
190
+ }
191
+ else if ((c & 0x80) == 0) { /* 0b0nnnnnnn (7bit) */
192
+ WStr_addWChar(s, c);
193
+ rest = 0;
194
+ }
195
+ else if ((c & 0xe0) == 0xc0) { /* 0b110nnnnn (11bit) */
196
+ rest = 1;
197
+ u = c & 31;
198
+ }
199
+ else if ((c & 0xf0) == 0xe0) { /* 0b1110nnnn (16bit) */
200
+ rest = 2;
201
+ u = c & 15;
202
+ }
203
+ else if ((c & 0xf8) == 0xf0) { /* 0b11110nnn (21bit) */
204
+ rest = 3;
205
+ u = c & 7;
206
+ }
207
+ else if ((c & 0xfc) == 0xf8) { /* 0b111110nn (26bit) */
208
+ rest = 4;
209
+ u = c & 3;
210
+ }
211
+ else if ((c & 0xfe) == 0xfc) { /* 0b1111110n (31bit) */
212
+ rest = 5;
213
+ u = c & 1;
214
+ }
215
+ else {
216
+ return NULL;
217
+ }
218
+ }
219
+
220
+ return s;
221
+ }
222
+
167
223
  UString*
168
224
  WStr_convertIntoUString(WString* wstr, UString* ustr)
169
225
  {
@@ -176,6 +232,18 @@ WStr_convertIntoUString(WString* wstr, UString* ustr)
176
232
  return ustr;
177
233
  }
178
234
 
235
+ UString*
236
+ WStr_convertIntoUString2(WString* wstr, int start, int len, UString* ustr)
237
+ {
238
+ int i;
239
+
240
+ for (i = start; i < wstr->len && i < start + len; i++) {
241
+ UniStr_addWChar(ustr, wstr->str[i]);
242
+ }
243
+
244
+ return ustr;
245
+ }
246
+
179
247
  void
180
248
  WStr_dump(WString* s)
181
249
  {
data/wstring.h CHANGED
@@ -24,6 +24,7 @@ typedef struct _WString {
24
24
 
25
25
  WString* WStr_alloc(WString* str);
26
26
  WString* WStr_allocWithUTF8(WString* s, const char* u);
27
+ WString* WStr_allocWithUTF8L(WString* s, const char* u, int len);
27
28
  WString* WStr_enlarge(WString* str, int size);
28
29
  void WStr_free(WString* str);
29
30
  int WStr_addWChars(WString* s, const int* a, int len);
@@ -32,6 +33,7 @@ int WStr_pushWString(WString* s, const WString* add);
32
33
  int WStr_addWChar2(WString* s, int a1, int a2);
33
34
  int WStr_addWChar3(WString* s, int a1, int a2, int a3);
34
35
  UString* WStr_convertIntoUString(WString* wstr, UString* ustr);
36
+ UString* WStr_convertIntoUString2(WString* wstr, int start, int len, UString* ustr);
35
37
  void WStr_dump(WString* s);
36
38
 
37
39
  #ifdef __cplusplus
metadata CHANGED
@@ -1,32 +1,24 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: unicode
3
- version: !ruby/object:Gem::Version
4
- hash: 11
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.3
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 4
9
- - 2
10
- version: 0.4.2
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Yoshida Masato
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-02-03 00:00:00 Z
12
+ date: 2012-08-07 00:00:00.000000000 Z
19
13
  dependencies: []
20
-
21
14
  description: Unicode normalization library.
22
15
  email: yoshidam@yoshidam.net
23
16
  executables: []
24
-
25
- extensions:
17
+ extensions:
26
18
  - extconf.rb
27
- extra_rdoc_files:
19
+ extra_rdoc_files:
28
20
  - README
29
- files:
21
+ files:
30
22
  - extconf.rb
31
23
  - unicode.c
32
24
  - ustring.c
@@ -40,36 +32,26 @@ files:
40
32
  - unidata.map
41
33
  homepage: http://www.yoshidam.net/Ruby.html#unicode
42
34
  licenses: []
43
-
44
35
  post_install_message:
45
36
  rdoc_options: []
46
-
47
- require_paths:
37
+ require_paths:
48
38
  - .
49
- required_ruby_version: !ruby/object:Gem::Requirement
39
+ required_ruby_version: !ruby/object:Gem::Requirement
50
40
  none: false
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- hash: 3
55
- segments:
56
- - 0
57
- version: "0"
58
- required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
46
  none: false
60
- requirements:
61
- - - ">="
62
- - !ruby/object:Gem::Version
63
- hash: 3
64
- segments:
65
- - 0
66
- version: "0"
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
67
51
  requirements: []
68
-
69
52
  rubyforge_project:
70
- rubygems_version: 1.8.17
53
+ rubygems_version: 1.8.24
71
54
  signing_key:
72
55
  specification_version: 3
73
56
  summary: Unicode normalization library.
74
57
  test_files: []
75
-