unicode 0.4.4.3-x86-mswin32-60 → 0.4.4.4-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1325 +0,0 @@
1
- /*
2
- * Unicode Library version 0.4.3
3
- * Aug 8, 2012: version 0.4
4
- * Oct 14, 2010: version 0.4
5
- * Feb 26, 2010: version 0.3
6
- * Dec 29, 2009: version 0.2
7
- * Nov 23, 1999 yoshidam
8
- *
9
- */
10
-
11
- #define UNICODE_VERSION "0.4.3"
12
-
13
- #include "ruby.h"
14
- #ifdef HAVE_RUBY_IO_H
15
- # include "ruby/io.h"
16
- #else
17
- # include "rubyio.h"
18
- #endif
19
- #include <stdio.h>
20
- #include "wstring.h"
21
- #include "unidata.map"
22
-
23
- #ifndef RSTRING_PTR
24
- # define RSTRING_PTR(s) (RSTRING(s)->ptr)
25
- # define RSTRING_LEN(s) (RSTRING(s)->len)
26
- #endif
27
-
28
- #ifdef HAVE_RUBY_ENCODING_H
29
- static rb_encoding* enc_out;
30
- # define ENC_(o) (rb_enc_associate(o, enc_out))
31
- #else
32
- # define ENC_(o) (o)
33
- #endif
34
-
35
- inline static VALUE
36
- taintObject(VALUE src, VALUE obj) {
37
- if (OBJ_TAINTED(src))
38
- OBJ_TAINT(obj);
39
- return obj;
40
- }
41
- #define TO_(src, obj) (taintObject(src, obj))
42
-
43
- #ifdef HAVE_RUBY_ENCODING_H
44
- # define CONVERT_TO_UTF8(str) do { \
45
- int encindex = ENCODING_GET(str); \
46
- volatile VALUE encobj; \
47
- if (encindex != rb_utf8_encindex() && \
48
- encindex != rb_usascii_encindex()) { \
49
- encobj = rb_enc_from_encoding(enc_out); \
50
- str = rb_str_encode(str, encobj, 0, Qnil); \
51
- } \
52
- } while (0)
53
- #endif
54
-
55
- static VALUE mUnicode;
56
- static VALUE unicode_data;
57
- static VALUE composition_table;
58
- static VALUE catname_long[c_Cn+1];
59
- static VALUE catname_abbr[c_Cn+1];
60
-
61
- /* Hangul */
62
- #define SBASE (0xac00)
63
- #define LBASE (0x1100)
64
- #define LCOUNT (19)
65
- #define VBASE (0x1161)
66
- #define VCOUNT (21)
67
- #define TBASE (0x11a7)
68
- #define TCOUNT (28)
69
- #define NCOUNT (VCOUNT * TCOUNT) /* 588 */
70
- #define SCOUNT (LCOUNT * NCOUNT) /* 11172 */
71
-
72
- VALUE
73
- get_unidata(int ucs) {
74
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
75
- if (!NIL_P(ch))
76
- return ch;
77
- #ifdef CJK_IDEOGRAPH_EXTENSION_A_FIRST
78
- else if (ucs >= CJK_IDEOGRAPH_EXTENSION_A_FIRST &&
79
- ucs <= CJK_IDEOGRAPH_EXTENSION_A_LAST)
80
- return rb_hash_aref(unicode_data,
81
- INT2FIX(CJK_IDEOGRAPH_EXTENSION_A_FIRST));
82
- #endif
83
- #ifdef CJK_IDEOGRAPH_FIRST
84
- else if (ucs >= CJK_IDEOGRAPH_FIRST &&
85
- ucs <= CJK_IDEOGRAPH_LAST)
86
- return rb_hash_aref(unicode_data,
87
- INT2FIX(CJK_IDEOGRAPH_FIRST));
88
- #endif
89
- #ifdef HANGUL_SYLLABLE_FIRST
90
- else if (ucs >= HANGUL_SYLLABLE_FIRST &&
91
- ucs <= HANGUL_SYLLABLE_LAST)
92
- return rb_hash_aref(unicode_data,
93
- INT2FIX(HANGUL_SYLLABLE_FIRST));
94
- #endif
95
- #ifdef NON_PRIVATE_USE_HIGH_SURROGATE_FIRST
96
- else if (ucs >= NON_PRIVATE_USE_HIGH_SURROGATE_FIRST &&
97
- ucs <= NON_PRIVATE_USE_HIGH_SURROGATE_LAST)
98
- return rb_hash_aref(unicode_data,
99
- INT2FIX(NON_PRIVATE_USE_HIGH_SURROGATE_FIRST));
100
- #endif
101
- #ifdef PRIVATE_USE_HIGH_SURROGATE_FIRST
102
- else if (ucs >= PRIVATE_USE_HIGH_SURROGATE_FIRST &&
103
- ucs <= PRIVATE_USE_HIGH_SURROGATE_LAST)
104
- return rb_hash_aref(unicode_data,
105
- INT2FIX(PRIVATE_USE_HIGH_SURROGATE_FIRST));
106
- #endif
107
- #ifdef LOW_SURROGATE_FIRST
108
- else if (ucs >= LOW_SURROGATE_FIRST &&
109
- ucs <= LOW_SURROGATE_LAST)
110
- return rb_hash_aref(unicode_data,
111
- INT2FIX(LOW_SURROGATE_FIRST));
112
- #endif
113
- #ifdef PRIVATE_USE_FIRST
114
- else if (ucs >= PRIVATE_USE_FIRST &&
115
- ucs <= PRIVATE_USE_LAST)
116
- return rb_hash_aref(unicode_data,
117
- INT2FIX(PRIVATE_USE_FIRST));
118
- #endif
119
- #ifdef CJK_IDEOGRAPH_EXTENSION_B_FIRST
120
- else if (ucs >= CJK_IDEOGRAPH_EXTENSION_B_FIRST &&
121
- ucs <= CJK_IDEOGRAPH_EXTENSION_B_LAST)
122
- return rb_hash_aref(unicode_data,
123
- INT2FIX(CJK_IDEOGRAPH_EXTENSION_B_FIRST));
124
- #endif
125
- #ifdef CJK_IDEOGRAPH_EXTENSION_C_FIRST
126
- else if (ucs >= CJK_IDEOGRAPH_EXTENSION_C_FIRST &&
127
- ucs <= CJK_IDEOGRAPH_EXTENSION_C_LAST)
128
- return rb_hash_aref(unicode_data,
129
- INT2FIX(CJK_IDEOGRAPH_EXTENSION_C_FIRST));
130
- #endif
131
- #ifdef CJK_IDEOGRAPH_EXTENSION_D_FIRST
132
- else if (ucs >= CJK_IDEOGRAPH_EXTENSION_D_FIRST &&
133
- ucs <= CJK_IDEOGRAPH_EXTENSION_D_LAST)
134
- return rb_hash_aref(unicode_data,
135
- INT2FIX(CJK_IDEOGRAPH_EXTENSION_D_FIRST));
136
- #endif
137
- #ifdef PLANE_15_PRIVATE_USE_FIRST
138
- else if (ucs >= PLANE_15_PRIVATE_USE_FIRST &&
139
- ucs <= PLANE_15_PRIVATE_USE_LAST)
140
- return rb_hash_aref(unicode_data,
141
- INT2FIX(PLANE_15_PRIVATE_USE_FIRST));
142
- #endif
143
- #ifdef PLANE_16_PRIVATE_USE_FIRST
144
- else if (ucs >= PLANE_16_PRIVATE_USE_FIRST &&
145
- ucs <= PLANE_16_PRIVATE_USE_LAST)
146
- return rb_hash_aref(unicode_data,
147
- INT2FIX(PLANE_16_PRIVATE_USE_FIRST));
148
- #endif
149
- return Qnil;
150
- }
151
-
152
- static int
153
- get_cc(int ucs)
154
- {
155
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
156
-
157
- if (!NIL_P(ch)) {
158
- return unidata[FIX2INT(ch)].combining_class;
159
- }
160
- return 0;
161
- }
162
-
163
- static int
164
- get_gencat(int ucs)
165
- {
166
- VALUE ch = get_unidata(ucs);
167
-
168
- if (!NIL_P(ch)) {
169
- return unidata[FIX2INT(ch)].general_category;
170
- }
171
- return c_Cn; /* Unassigned */
172
- }
173
-
174
- static int
175
- get_eawidth(int ucs)
176
- {
177
- VALUE ch = get_unidata(ucs);
178
-
179
- if (!NIL_P(ch)) {
180
- return unidata[FIX2INT(ch)].east_asian_width;
181
- }
182
- return w_N; /* Neutral */
183
- }
184
-
185
- static const char*
186
- get_canon(int ucs)
187
- {
188
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
189
-
190
- if (!NIL_P(ch)) {
191
- return unidata[FIX2INT(ch)].canon;
192
- }
193
- return NULL;
194
- }
195
-
196
- static const char*
197
- get_canon_ex(int ucs)
198
- {
199
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
200
-
201
- if (!NIL_P(ch)) {
202
- int i = FIX2INT(ch);
203
- if (!unidata[i].exclusion)
204
- return unidata[i].canon;
205
- }
206
- return NULL;
207
- }
208
-
209
- static const char*
210
- get_compat(int ucs)
211
- {
212
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
213
-
214
- if (!NIL_P(ch)) {
215
- return unidata[FIX2INT(ch)].compat;
216
- }
217
- return NULL;
218
- }
219
-
220
- static const char*
221
- get_uppercase(int ucs)
222
- {
223
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
224
-
225
- if (!NIL_P(ch)) {
226
- return unidata[FIX2INT(ch)].uppercase;
227
- }
228
- return NULL;
229
- }
230
-
231
- static const char*
232
- get_lowercase(int ucs)
233
- {
234
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
235
-
236
- if (!NIL_P(ch)) {
237
- return unidata[FIX2INT(ch)].lowercase;
238
- }
239
- return NULL;
240
- }
241
-
242
- static const char*
243
- get_titlecase(int ucs)
244
- {
245
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
246
-
247
- if (!NIL_P(ch)) {
248
- return unidata[FIX2INT(ch)].titlecase;
249
- }
250
- return NULL;
251
- }
252
-
253
- static int
254
- get_composition(const char* str)
255
- {
256
- VALUE ch = rb_hash_aref(composition_table, rb_str_new2(str));
257
-
258
- if (!NIL_P(ch)) {
259
- return FIX2INT(ch);
260
- }
261
- return -1;
262
- }
263
-
264
- static WString*
265
- sort_canonical(WString* ustr)
266
- {
267
- int i = 1;
268
- int len = ustr->len;
269
-
270
- if (len < 2) return ustr;
271
-
272
- while (i < len) {
273
- int last = ustr->str[i - 1];
274
- int ch = ustr->str[i];
275
- int last_cc = get_cc(last);
276
- int cc = get_cc(ch);
277
- if (cc != 0 && last_cc != 0 && last_cc > cc) {
278
- ustr->str[i] = last;
279
- ustr->str[i-1] = ch;
280
- if (i > 1) i--;
281
- }
282
- else {
283
- i++;
284
- }
285
- }
286
- return ustr;
287
- }
288
-
289
- static void
290
- decompose_hangul(int ucs, int* l, int* v, int* t)
291
- {
292
- int sindex = ucs - SBASE;
293
- if (sindex < 0 || sindex >= SCOUNT) {
294
- *l = ucs;
295
- *v = *t = 0;
296
- return;
297
- }
298
- *l = LBASE + sindex / NCOUNT;
299
- *v = VBASE + (sindex % NCOUNT) / TCOUNT;
300
- *t = TBASE + sindex % TCOUNT;
301
- if (*t == TBASE) *t = 0;
302
- }
303
-
304
- /*
305
- * push decomposed str into result
306
- */
307
- static WString*
308
- decompose_internal(WString* ustr, WString* result)
309
- {
310
- int i;
311
- int len = ustr->len;
312
-
313
- for (i = 0; i < len; i++) {
314
- int ucs = ustr->str[i];
315
- if (ucs >= SBASE && ucs < SBASE + SCOUNT) {
316
- int l, v, t;
317
- decompose_hangul(ucs, &l, &v, &t);
318
- WStr_addWChar(result, l);
319
- if (v) WStr_addWChar(result, v);
320
- if (t) WStr_addWChar(result, t);
321
- }
322
- else {
323
- const char* dc = get_canon(ucs);
324
- if (!dc) {
325
- WStr_addWChar(result, ucs);
326
- }
327
- else {
328
- WString wdc;
329
- WStr_allocWithUTF8(&wdc, dc);
330
- decompose_internal(&wdc, result);
331
- WStr_free(&wdc);
332
- }
333
- }
334
- }
335
- return result;
336
- }
337
-
338
- /*
339
- * push decomposed str into result
340
- */
341
- static WString*
342
- decompose_safe_internal(WString* ustr, WString* result)
343
- {
344
- int i;
345
- int len = ustr->len;
346
-
347
- for (i = 0; i < len; i++) {
348
- int ucs = ustr->str[i];
349
- if (ucs >= SBASE && ucs < SBASE + SCOUNT) {
350
- int l, v, t;
351
- decompose_hangul(ucs, &l, &v, &t);
352
- WStr_addWChar(result, l);
353
- if (v) WStr_addWChar(result, v);
354
- if (t) WStr_addWChar(result, t);
355
- }
356
- else {
357
- const char* dc = get_canon_ex(ucs);
358
- if (!dc) {
359
- WStr_addWChar(result, ucs);
360
- }
361
- else {
362
- WString wdc;
363
- WStr_allocWithUTF8(&wdc, dc);
364
- decompose_safe_internal(&wdc, result);
365
- WStr_free(&wdc);
366
- }
367
- }
368
- }
369
- return result;
370
- }
371
-
372
- /*
373
- * push compatibility decomposed str into result
374
- */
375
- static WString*
376
- decompose_compat_internal(WString* ustr, WString* result)
377
- {
378
- int i;
379
- int len = ustr->len;
380
-
381
- for (i = 0; i < len; i++) {
382
- int ucs = ustr->str[i];
383
- if (ucs >= SBASE && ucs < SBASE + SCOUNT) {
384
- int l, v, t;
385
- decompose_hangul(ucs, &l, &v, &t);
386
- WStr_addWChar(result, l);
387
- if (v) WStr_addWChar(result, v);
388
- if (t) WStr_addWChar(result, t);
389
- }
390
- else {
391
- const char* dc = get_compat(ucs);
392
- if (!dc) {
393
- WStr_addWChar(result, ucs);
394
- }
395
- else {
396
- WString wdc;
397
- WStr_allocWithUTF8(&wdc, dc);
398
- decompose_compat_internal(&wdc, result);
399
- WStr_free(&wdc);
400
- }
401
- }
402
- }
403
- return result;
404
- }
405
-
406
-
407
- #define UCS4toUTF8(p, c) \
408
- do { \
409
- if (c < 128) { \
410
- *p++ = c; \
411
- } \
412
- else if (c < 2048) { \
413
- *p++ = (c >> 6) | 192; \
414
- *p++ = (c & 63) | 128; \
415
- } \
416
- else if (c < 0x10000) { \
417
- *p++ = (c >> 12) | 224; \
418
- *p++ = ((c >> 6) & 63) | 128; \
419
- *p++ = (c & 63) | 128; \
420
- } \
421
- else if (c < 0x200000) { \
422
- *p++ = (c >> 18) | 240; \
423
- *p++ = ((c >> 12) & 63) | 128; \
424
- *p++ = ((c >> 6) & 63) | 128; \
425
- *p++ = (c & 63) | 128; \
426
- } \
427
- else if (c < 0x4000000) { \
428
- *p++ = (c >> 24) | 248; \
429
- *p++ = ((c >> 18) & 63) | 128; \
430
- *p++ = ((c >> 12) & 63) | 128; \
431
- *p++ = ((c >> 6) & 63) | 128; \
432
- *p++ = (c & 63) | 128; \
433
- } \
434
- else if (c < 0x80000000) { \
435
- *p++ = (c >> 30) | 252; \
436
- *p++ = ((c >> 24) & 63) | 128; \
437
- *p++ = ((c >> 18) & 63) | 128; \
438
- *p++ = ((c >> 12) & 63) | 128; \
439
- *p++ = ((c >> 6) & 63) | 128; \
440
- *p++ = (c & 63) | 128; \
441
- } \
442
- } while (0)
443
-
444
- static int
445
- compose_pair(unsigned int c1, unsigned int c2)
446
- {
447
- int ret;
448
- char ustr[13]; /* stored two UTF-8 chars */
449
- char *p = ustr;
450
-
451
- /* Hangul L + V */
452
- if (c1 >= LBASE && c1 < LBASE + LCOUNT &&
453
- c2 >= VBASE && c2 < VBASE + VCOUNT) {
454
- return SBASE + ((c1 - LBASE) * VCOUNT + (c2 - VBASE)) * TCOUNT;
455
- }
456
- /* Hangul LV + T */
457
- else if (c1 >= SBASE && c1 < SBASE + SCOUNT &&
458
- (c1 - SBASE) % TCOUNT == 0 &&
459
- c2 >= TBASE && c2 < TBASE + TCOUNT) {
460
- return c1 + (c2 - TBASE);
461
- }
462
- UCS4toUTF8(p, c1);
463
- UCS4toUTF8(p, c2);
464
- *p = '\0';
465
- ret = get_composition(ustr);
466
-
467
- return ret;
468
- }
469
-
470
- /*
471
- * push canonical composed str into result
472
- */
473
- static WString*
474
- compose_internal(WString* ustr, WString* result)
475
- {
476
- int starterPos = 0;
477
- int starterCh = ustr->str[0];
478
- int compPos = 1;
479
- int lastClass = get_cc(starterCh);
480
- int oldLen = ustr->len;
481
- int decompPos;
482
-
483
- if (oldLen == 0) return result;
484
- if (lastClass != 0) lastClass = 256;
485
- /* copy string */
486
- result->len = 0;
487
- WStr_pushWString(result, ustr);
488
-
489
- for (decompPos = compPos; decompPos < result->len; decompPos++) {
490
- int ch = result->str[decompPos];
491
- int chClass = get_cc(ch);
492
- int composite = compose_pair(starterCh, ch);
493
- if (composite > 0 &&
494
- (lastClass < chClass ||lastClass == 0)) {
495
- result->str[starterPos] = composite;
496
- starterCh = composite;
497
- }
498
- else {
499
- if (chClass == 0) {
500
- starterPos = compPos;
501
- starterCh = ch;
502
- }
503
- lastClass = chClass;
504
- result->str[compPos] = ch;
505
- if (result->len != oldLen) {
506
- decompPos += result->len - oldLen;
507
- oldLen = result->len;
508
- }
509
- compPos++;
510
- }
511
- }
512
- result->len = compPos;
513
- return result;
514
- }
515
- #if 0
516
- static WString*
517
- compose_internal(WString* ustr, WString* result)
518
- {
519
- int len = ustr->len;
520
- int starter;
521
- int startercc;
522
- int i;
523
-
524
- if (len == 0) return result;
525
-
526
- starter = ustr->str[0];
527
- startercc = get_cc(starter);
528
- if (startercc != 0) startercc = 256;
529
- for (i = 1; i < len; i++) {
530
- int ch = ustr->str[i];
531
- int cc = get_cc(ch);
532
- int composite;
533
-
534
- if (startercc == 0 &&
535
- (composite = compose_pair(starter, ch)) >= 0) {
536
- starter = composite;
537
- startercc = get_cc(composite);
538
- }
539
- else {
540
- WStr_addWChar(result, starter);
541
- starter = ch;
542
- startercc = cc;
543
- }
544
- }
545
- WStr_addWChar(result, starter);
546
-
547
- return result;
548
- }
549
- #endif
550
-
551
- static WString*
552
- upcase_internal(WString* str, WString* result)
553
- {
554
- int i;
555
- int len = str->len;
556
-
557
- for (i = 0; i < len; i++) {
558
- int ucs = str->str[i];
559
- const char* c = get_uppercase(ucs);
560
- if (!c) {
561
- WStr_addWChar(result, ucs);
562
- }
563
- else {
564
- WString wc;
565
- WStr_allocWithUTF8(&wc, c);
566
- WStr_pushWString(result, &wc);
567
- WStr_free(&wc);
568
- }
569
- }
570
- return result;
571
- }
572
-
573
- static WString*
574
- downcase_internal(WString* str, WString* result)
575
- {
576
- int i;
577
- int len = str->len;
578
-
579
- for (i = 0; i < len; i++) {
580
- int ucs = str->str[i];
581
- const char* c = get_lowercase(ucs);
582
- if (!c) {
583
- WStr_addWChar(result, ucs);
584
- }
585
- else {
586
- WString wc;
587
- WStr_allocWithUTF8(&wc, c);
588
- WStr_pushWString(result, &wc);
589
- WStr_free(&wc);
590
- }
591
- }
592
- return result;
593
- }
594
-
595
- static WString*
596
- capitalize_internal(WString* str, WString* result)
597
- {
598
- int i;
599
- int len = str->len;
600
-
601
- if (len > 0) {
602
- const char* c = get_titlecase(str->str[0]);
603
- if (!c) {
604
- WStr_addWChar(result, str->str[0]);
605
- }
606
- else {
607
- WString wc;
608
- WStr_allocWithUTF8(&wc, c);
609
- WStr_pushWString(result, &wc);
610
- WStr_free(&wc);
611
- }
612
- }
613
- for (i = 1; i < len; i++) {
614
- int ucs = str->str[i];
615
- const char* c = get_lowercase(ucs);
616
- if (!c) {
617
- WStr_addWChar(result, ucs);
618
- }
619
- else {
620
- WString wc;
621
- WStr_allocWithUTF8(&wc, c);
622
- WStr_pushWString(result, &wc);
623
- WStr_free(&wc);
624
- }
625
- }
626
- return result;
627
- }
628
-
629
- static VALUE
630
- unicode_strcmp(VALUE obj, VALUE str1, VALUE str2)
631
- {
632
- WString wstr1;
633
- WString wstr2;
634
- WString result1;
635
- WString result2;
636
- UString ustr1;
637
- UString ustr2;
638
- int ret;
639
-
640
- Check_Type(str1, T_STRING);
641
- Check_Type(str2, T_STRING);
642
- #ifdef HAVE_RUBY_ENCODING_H
643
- CONVERT_TO_UTF8(str1);
644
- CONVERT_TO_UTF8(str2);
645
- #endif
646
- WStr_allocWithUTF8L(&wstr1, RSTRING_PTR(str1), RSTRING_LEN(str1));
647
- WStr_allocWithUTF8L(&wstr2, RSTRING_PTR(str2), RSTRING_LEN(str2));
648
- WStr_alloc(&result1);
649
- WStr_alloc(&result2);
650
- decompose_internal(&wstr1, &result1);
651
- decompose_internal(&wstr2, &result2);
652
- WStr_free(&wstr1);
653
- WStr_free(&wstr2);
654
- sort_canonical(&result1);
655
- sort_canonical(&result2);
656
- UniStr_alloc(&ustr1);
657
- UniStr_alloc(&ustr2);
658
- WStr_convertIntoUString(&result1, &ustr1);
659
- WStr_convertIntoUString(&result2, &ustr2);
660
- WStr_free(&result1);
661
- WStr_free(&result2);
662
- UniStr_addChar(&ustr1, '\0');
663
- UniStr_addChar(&ustr2, '\0');
664
- ret = strcmp((char*)ustr1.str, (char*)ustr2.str);
665
- UniStr_free(&ustr1);
666
- UniStr_free(&ustr2);
667
-
668
- return INT2FIX(ret);
669
- }
670
-
671
- static VALUE
672
- unicode_strcmp_compat(VALUE obj, VALUE str1, VALUE str2)
673
- {
674
- WString wstr1;
675
- WString wstr2;
676
- WString result1;
677
- WString result2;
678
- UString ustr1;
679
- UString ustr2;
680
- int ret;
681
-
682
- Check_Type(str1, T_STRING);
683
- Check_Type(str2, T_STRING);
684
- #ifdef HAVE_RUBY_ENCODING_H
685
- CONVERT_TO_UTF8(str1);
686
- CONVERT_TO_UTF8(str2);
687
- #endif
688
- WStr_allocWithUTF8L(&wstr1, RSTRING_PTR(str1), RSTRING_LEN(str1));
689
- WStr_allocWithUTF8L(&wstr2, RSTRING_PTR(str2), RSTRING_LEN(str2));
690
- WStr_alloc(&result1);
691
- WStr_alloc(&result2);
692
- decompose_compat_internal(&wstr1, &result1);
693
- decompose_compat_internal(&wstr2, &result2);
694
- WStr_free(&wstr1);
695
- WStr_free(&wstr2);
696
- sort_canonical(&result1);
697
- sort_canonical(&result2);
698
- UniStr_alloc(&ustr1);
699
- UniStr_alloc(&ustr2);
700
- WStr_convertIntoUString(&result1, &ustr1);
701
- WStr_convertIntoUString(&result2, &ustr2);
702
- WStr_free(&result1);
703
- WStr_free(&result2);
704
- UniStr_addChar(&ustr1, '\0');
705
- UniStr_addChar(&ustr2, '\0');
706
- ret = strcmp((char*)ustr1.str, (char*)ustr2.str);
707
- UniStr_free(&ustr1);
708
- UniStr_free(&ustr2);
709
-
710
- return INT2FIX(ret);
711
- }
712
-
713
- static VALUE
714
- unicode_decompose(VALUE obj, VALUE str)
715
- {
716
- WString ustr;
717
- WString result;
718
- UString ret;
719
- VALUE vret;
720
-
721
- Check_Type(str, T_STRING);
722
- #ifdef HAVE_RUBY_ENCODING_H
723
- CONVERT_TO_UTF8(str);
724
- #endif
725
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
726
- WStr_alloc(&result);
727
- decompose_internal(&ustr, &result);
728
- WStr_free(&ustr);
729
- sort_canonical(&result);
730
- UniStr_alloc(&ret);
731
- WStr_convertIntoUString(&result, &ret);
732
- WStr_free(&result);
733
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
734
- UniStr_free(&ret);
735
-
736
- return vret;
737
- }
738
-
739
- static VALUE
740
- unicode_decompose_safe(VALUE obj, VALUE str)
741
- {
742
- WString ustr;
743
- WString result;
744
- UString ret;
745
- VALUE vret;
746
-
747
- Check_Type(str, T_STRING);
748
- #ifdef HAVE_RUBY_ENCODING_H
749
- CONVERT_TO_UTF8(str);
750
- #endif
751
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
752
- WStr_alloc(&result);
753
- decompose_safe_internal(&ustr, &result);
754
- WStr_free(&ustr);
755
- sort_canonical(&result);
756
- UniStr_alloc(&ret);
757
- WStr_convertIntoUString(&result, &ret);
758
- WStr_free(&result);
759
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
760
- UniStr_free(&ret);
761
-
762
- return vret;
763
- }
764
-
765
- static VALUE
766
- unicode_decompose_compat(VALUE obj, VALUE str)
767
- {
768
- WString ustr;
769
- WString result;
770
- UString ret;
771
- VALUE vret;
772
-
773
- Check_Type(str, T_STRING);
774
- #ifdef HAVE_RUBY_ENCODING_H
775
- CONVERT_TO_UTF8(str);
776
- #endif
777
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
778
- WStr_alloc(&result);
779
- decompose_compat_internal(&ustr, &result);
780
- WStr_free(&ustr);
781
- sort_canonical(&result);
782
- UniStr_alloc(&ret);
783
- WStr_convertIntoUString(&result, &ret);
784
- WStr_free(&result);
785
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
786
- UniStr_free(&ret);
787
-
788
- return vret;
789
- }
790
-
791
- static VALUE
792
- unicode_compose(VALUE obj, VALUE str)
793
- {
794
- WString ustr;
795
- WString result;
796
- UString ret;
797
- VALUE vret;
798
-
799
- Check_Type(str, T_STRING);
800
- #ifdef HAVE_RUBY_ENCODING_H
801
- CONVERT_TO_UTF8(str);
802
- #endif
803
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
804
- sort_canonical(&ustr);
805
- WStr_alloc(&result);
806
- compose_internal(&ustr, &result);
807
- WStr_free(&ustr);
808
- UniStr_alloc(&ret);
809
- WStr_convertIntoUString(&result, &ret);
810
- WStr_free(&result);
811
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
812
- UniStr_free(&ret);
813
-
814
- return vret;
815
- }
816
-
817
- static VALUE
818
- unicode_normalize_C(VALUE obj, VALUE str)
819
- {
820
- WString ustr1;
821
- WString ustr2;
822
- WString result;
823
- UString ret;
824
- VALUE vret;
825
-
826
- Check_Type(str, T_STRING);
827
- #ifdef HAVE_RUBY_ENCODING_H
828
- CONVERT_TO_UTF8(str);
829
- #endif
830
- WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
831
- WStr_alloc(&ustr2);
832
- decompose_internal(&ustr1, &ustr2);
833
- WStr_free(&ustr1);
834
- sort_canonical(&ustr2);
835
- WStr_alloc(&result);
836
- compose_internal(&ustr2, &result);
837
- WStr_free(&ustr2);
838
- UniStr_alloc(&ret);
839
- WStr_convertIntoUString(&result, &ret);
840
- WStr_free(&result);
841
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
842
- UniStr_free(&ret);
843
-
844
- return vret;
845
- }
846
-
847
- static VALUE
848
- unicode_normalize_safe(VALUE obj, VALUE str)
849
- {
850
- WString ustr1;
851
- WString ustr2;
852
- WString result;
853
- UString ret;
854
- VALUE vret;
855
-
856
- Check_Type(str, T_STRING);
857
- #ifdef HAVE_RUBY_ENCODING_H
858
- CONVERT_TO_UTF8(str);
859
- #endif
860
- WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
861
- WStr_alloc(&ustr2);
862
- decompose_safe_internal(&ustr1, &ustr2);
863
- WStr_free(&ustr1);
864
- sort_canonical(&ustr2);
865
- WStr_alloc(&result);
866
- compose_internal(&ustr2, &result);
867
- WStr_free(&ustr2);
868
- UniStr_alloc(&ret);
869
- WStr_convertIntoUString(&result, &ret);
870
- WStr_free(&result);
871
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
872
- UniStr_free(&ret);
873
-
874
- return vret;
875
- }
876
-
877
- static VALUE
878
- unicode_normalize_KC(VALUE obj, VALUE str)
879
- {
880
- WString ustr1;
881
- WString ustr2;
882
- WString result;
883
- UString ret;
884
- VALUE vret;
885
-
886
- Check_Type(str, T_STRING);
887
- #ifdef HAVE_RUBY_ENCODING_H
888
- CONVERT_TO_UTF8(str);
889
- #endif
890
- WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
891
- WStr_alloc(&ustr2);
892
- decompose_compat_internal(&ustr1, &ustr2);
893
- WStr_free(&ustr1);
894
- sort_canonical(&ustr2);
895
- WStr_alloc(&result);
896
- compose_internal(&ustr2, &result);
897
- WStr_free(&ustr2);
898
- UniStr_alloc(&ret);
899
- WStr_convertIntoUString(&result, &ret);
900
- WStr_free(&result);
901
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
902
- UniStr_free(&ret);
903
-
904
- return vret;
905
- }
906
-
907
- static VALUE
908
- unicode_upcase(VALUE obj, VALUE str)
909
- {
910
- WString ustr;
911
- WString result;
912
- UString ret;
913
- VALUE vret;
914
-
915
- Check_Type(str, T_STRING);
916
- #ifdef HAVE_RUBY_ENCODING_H
917
- CONVERT_TO_UTF8(str);
918
- #endif
919
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
920
- WStr_alloc(&result);
921
- upcase_internal(&ustr, &result);
922
- //sort_canonical(&result);
923
- WStr_free(&ustr);
924
- UniStr_alloc(&ret);
925
- WStr_convertIntoUString(&result, &ret);
926
- WStr_free(&result);
927
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
928
- UniStr_free(&ret);
929
-
930
- return vret;
931
- }
932
-
933
- static VALUE
934
- unicode_downcase(VALUE obj, VALUE str)
935
- {
936
- WString ustr;
937
- WString result;
938
- UString ret;
939
- VALUE vret;
940
-
941
- Check_Type(str, T_STRING);
942
- #ifdef HAVE_RUBY_ENCODING_H
943
- CONVERT_TO_UTF8(str);
944
- #endif
945
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
946
- WStr_alloc(&result);
947
- downcase_internal(&ustr, &result);
948
- //sort_canonical(&result);
949
- WStr_free(&ustr);
950
- UniStr_alloc(&ret);
951
- WStr_convertIntoUString(&result, &ret);
952
- WStr_free(&result);
953
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
954
- UniStr_free(&ret);
955
-
956
- return vret;
957
- }
958
-
959
- #ifdef HAVE_RUBY_ENCODING_H
960
-
961
-
962
- #endif
963
-
964
- static VALUE
965
- unicode_capitalize(VALUE obj, VALUE str)
966
- {
967
- WString ustr;
968
- WString result;
969
- UString ret;
970
- VALUE vret;
971
-
972
- Check_Type(str, T_STRING);
973
- #ifdef HAVE_RUBY_ENCODING_H
974
- CONVERT_TO_UTF8(str);
975
- #endif
976
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
977
- WStr_alloc(&result);
978
- capitalize_internal(&ustr, &result);
979
- //sort_canonical(&result);
980
- WStr_free(&ustr);
981
- UniStr_alloc(&ret);
982
- WStr_convertIntoUString(&result, &ret);
983
- WStr_free(&result);
984
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
985
- UniStr_free(&ret);
986
-
987
- return vret;
988
- }
989
-
990
- typedef struct _get_categories_param {
991
- WString* wstr;
992
- VALUE str;
993
- VALUE* catname;
994
- } get_categories_param;
995
-
996
- static VALUE
997
- get_categories_internal(get_categories_param* param)
998
- {
999
- WString* wstr = param->wstr;
1000
- VALUE str = param->str;
1001
- VALUE* catname = param->catname;
1002
- int pos;
1003
- int block_p = rb_block_given_p();
1004
- volatile VALUE ret = str;
1005
-
1006
- if (!block_p)
1007
- ret = rb_ary_new();
1008
- for (pos = 0; pos < wstr->len; pos++) {
1009
- int gencat = get_gencat(wstr->str[pos]);
1010
- if (!block_p)
1011
- rb_ary_push(ret, catname[gencat]);
1012
- else {
1013
- rb_yield(catname[gencat]);
1014
- }
1015
- }
1016
-
1017
- return ret;
1018
- }
1019
-
1020
- VALUE
1021
- get_categories_ensure(WString* wstr)
1022
- {
1023
- WStr_free(wstr);
1024
- return Qnil;
1025
- }
1026
-
1027
- VALUE
1028
- unicode_get_categories(VALUE obj, VALUE str)
1029
- {
1030
- WString wstr;
1031
- get_categories_param param = { &wstr, str, catname_long };
1032
-
1033
- Check_Type(str, T_STRING);
1034
- #ifdef HAVE_RUBY_ENCODING_H
1035
- CONVERT_TO_UTF8(str);
1036
- #endif
1037
- WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));
1038
-
1039
- return rb_ensure(get_categories_internal, (VALUE)&param,
1040
- get_categories_ensure, (VALUE)&wstr);
1041
- /* wstr will be freed in get_text_elements_ensure() */
1042
- }
1043
-
1044
-
1045
- VALUE
1046
- unicode_get_abbr_categories(VALUE obj, VALUE str)
1047
- {
1048
- WString wstr;
1049
- get_categories_param param = { &wstr, str, catname_abbr };
1050
-
1051
- Check_Type(str, T_STRING);
1052
- #ifdef HAVE_RUBY_ENCODING_H
1053
- CONVERT_TO_UTF8(str);
1054
- #endif
1055
- WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));
1056
-
1057
- return rb_ensure(get_categories_internal, (VALUE)&param,
1058
- get_categories_ensure, (VALUE)&wstr);
1059
- /* wstr will be freed in get_text_elements_ensure() */
1060
- }
1061
-
1062
- VALUE
1063
- unicode_wcswidth(int argc, VALUE* argv, VALUE obj)
1064
- {
1065
- WString wstr;
1066
- int i, count;
1067
- int width = 0;
1068
- int cjk_p = 0;
1069
- VALUE str;
1070
- VALUE cjk;
1071
-
1072
- count = rb_scan_args(argc, argv, "11", &str, &cjk);
1073
- if (count > 1)
1074
- cjk_p = RTEST(cjk);
1075
- Check_Type(str, T_STRING);
1076
- #ifdef HAVE_RUBY_ENCODING_H
1077
- CONVERT_TO_UTF8(str);
1078
- #endif
1079
- WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));
1080
- for (i = 0; i <wstr.len; i++) {
1081
- int c = wstr.str[i];
1082
- int cat = get_gencat(c);
1083
- int eaw = get_eawidth(c);
1084
- if ((c > 0 && c < 32) || (c >= 0x7f && c < 0xa0)) {
1085
- /* Control Characters */
1086
- width = -1;
1087
- break;
1088
- }
1089
- else if (c != 0x00ad && /* SOFT HYPHEN */
1090
- (cat == c_Mn || cat == c_Me || /* Non-spacing Marks */
1091
- cat == c_Cf || /* Format */
1092
- c == 0 || /* NUL */
1093
- (c >= 0x1160 && c <= 0x11ff))) /* HANGUL JUNGSEONG/JONGSEONG */
1094
- /* zero width */ ;
1095
- else if (eaw == w_F || eaw == w_W || /* Fullwidth or Wide */
1096
- (c >= 0x4db6 && c <= 0x4dbf) || /* CJK Reserved */
1097
- (c >= 0x9fcd && c <= 0x9fff) || /* CJK Reserved */
1098
- (c >= 0xfa6e && c <= 0xfa6f) || /* CJK Reserved */
1099
- (c >= 0xfada && c <= 0xfaff) || /* CJK Reserved */
1100
- (c >= 0x2a6d7 && c <= 0x2a6ff) || /* CJK Reserved */
1101
- (c >= 0x2b735 && c <= 0x2b73f) || /* CJK Reserved */
1102
- (c >= 0x2b81e && c <= 0x2f7ff) || /* CJK Reserved */
1103
- (c >= 0x2fa1e && c <= 0x2fffd) || /* CJK Reserved */
1104
- (c >= 0x30000 && c <= 0x3fffd) || /* CJK Reserved */
1105
- (cjk_p && eaw == w_A)) /* East Asian Ambiguous */
1106
- width += 2;
1107
- else
1108
- width++; /* Halfwidth or Neutral */
1109
- }
1110
- WStr_free(&wstr);
1111
-
1112
- return INT2FIX(width);
1113
- }
1114
-
1115
- VALUE
1116
- wstring_to_rstring(WString* wstr, int start, int len) {
1117
- UString ret;
1118
- volatile VALUE vret;
1119
-
1120
- UniStr_alloc(&ret);
1121
- WStr_convertIntoUString2(wstr, start, len, &ret);
1122
- vret = ENC_(rb_str_new((char*)ret.str, ret.len));
1123
- UniStr_free(&ret);
1124
-
1125
- return vret;
1126
- }
1127
-
1128
- typedef struct _get_text_elements_param {
1129
- WString* wstr;
1130
- VALUE str;
1131
- } get_text_elements_param;
1132
-
1133
- VALUE
1134
- get_text_elements_internal(get_text_elements_param* param)
1135
- {
1136
- WString* wstr = param->wstr;
1137
- VALUE str = param->str;
1138
- int start_pos;
1139
- int block_p = rb_block_given_p();
1140
- volatile VALUE ret = str;
1141
-
1142
- if (!block_p)
1143
- ret = rb_ary_new();
1144
- for (start_pos = 0; start_pos < wstr->len;) {
1145
- int c0 = wstr->str[start_pos];
1146
- int cat = get_gencat(c0);
1147
- int length = 1;
1148
- int j;
1149
-
1150
- if (cat == c_Mn || cat == c_Mc || cat == c_Me) {
1151
- volatile VALUE rstr = TO_(str, wstring_to_rstring(wstr, start_pos, length));
1152
- if (!block_p)
1153
- rb_ary_push(ret, rstr);
1154
- else
1155
- rb_yield(rstr);
1156
- start_pos++;
1157
- continue;
1158
- }
1159
-
1160
- for (j = start_pos + 1; j < wstr->len; j++) {
1161
- int c1 = wstr->str[j];
1162
- int cat = get_gencat(c1);
1163
- if (c0 >= LBASE && c0 < LBASE + LCOUNT &&
1164
- j + 1 < wstr->len &&
1165
- c1 >= VBASE && c1 < VBASE + VCOUNT &&
1166
- wstr->str[j+1] >= TBASE && wstr->str[j+1] < TBASE + TCOUNT) {
1167
- /* Hangul L+V+T */
1168
- length += 2;
1169
- j++;
1170
- }
1171
- else if (c0 >= LBASE && c0 < LBASE + LCOUNT &&
1172
- c1 >= VBASE && c1< VBASE + VCOUNT) {
1173
- /* Hangul L+V */
1174
- length++;
1175
- }
1176
- else if (c0 >= SBASE && c0 < SBASE + SCOUNT &&
1177
- (c0 - SBASE) % TCOUNT == 0 &&
1178
- c1 >= TBASE && c1 < TBASE + TCOUNT) {
1179
- /* Hangul LV+T */
1180
- length++;
1181
- }
1182
- else if (cat == c_Mn || cat == c_Mc || cat == c_Me) {
1183
- /* Mark */
1184
- length++;
1185
- }
1186
- else {
1187
- volatile VALUE rstr = TO_(str, wstring_to_rstring(wstr, start_pos, length));
1188
- if (!block_p)
1189
- rb_ary_push(ret, rstr);
1190
- else
1191
- rb_yield(rstr);
1192
- length = 0;
1193
- break;
1194
- }
1195
- }
1196
- if (length > 0) {
1197
- volatile VALUE rstr = TO_(str, wstring_to_rstring(wstr, start_pos, length));
1198
- if (!block_p)
1199
- rb_ary_push(ret, rstr);
1200
- else
1201
- rb_yield(rstr);
1202
- }
1203
- start_pos = j;
1204
- }
1205
- return ret;
1206
- }
1207
-
1208
- VALUE
1209
- get_text_elements_ensure(WString* wstr)
1210
- {
1211
- WStr_free(wstr);
1212
- return Qnil;
1213
- }
1214
-
1215
- VALUE
1216
- unicode_get_text_elements(VALUE obj, VALUE str)
1217
- {
1218
- WString wstr;
1219
- get_text_elements_param param = { &wstr, str };
1220
-
1221
- Check_Type(str, T_STRING);
1222
- #ifdef HAVE_RUBY_ENCODING_H
1223
- CONVERT_TO_UTF8(str);
1224
- #endif
1225
- WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));
1226
-
1227
- return rb_ensure(get_text_elements_internal, (VALUE)&param,
1228
- get_text_elements_ensure, (VALUE)&wstr);
1229
- /* wstr will be freed in get_text_elements_ensure() */
1230
- }
1231
-
1232
- void
1233
- Init_unicode_native()
1234
- {
1235
- int i;
1236
-
1237
- #ifdef HAVE_RUBY_ENCODING_H
1238
- enc_out = rb_utf8_encoding();
1239
- #endif
1240
-
1241
- mUnicode = rb_define_module("Unicode");
1242
- unicode_data = rb_hash_new();
1243
- composition_table = rb_hash_new();
1244
-
1245
- rb_global_variable(&unicode_data);
1246
- rb_global_variable(&composition_table);
1247
-
1248
- for (i = 0; unidata[i].code != -1; i++) {
1249
- int code = unidata[i].code;
1250
- const char* canon = unidata[i].canon;
1251
- int exclusion = unidata[i].exclusion;
1252
-
1253
- rb_hash_aset(unicode_data, INT2FIX(code), INT2FIX(i));
1254
- if (canon && exclusion == 0) {
1255
- rb_hash_aset(composition_table, rb_str_new2(canon), INT2FIX(code));
1256
- }
1257
- }
1258
-
1259
- for (i = 0; i < c_Cn + 1; i++) {
1260
- catname_abbr[i] = ID2SYM(rb_intern(gencat_abbr[i]));
1261
- catname_long[i] = ID2SYM(rb_intern(gencat_long[i]));
1262
- rb_global_variable(&catname_abbr[i]);
1263
- rb_global_variable(&catname_long[i]);
1264
- }
1265
-
1266
- rb_define_module_function(mUnicode, "strcmp",
1267
- unicode_strcmp, 2);
1268
- rb_define_module_function(mUnicode, "strcmp_compat",
1269
- unicode_strcmp_compat, 2);
1270
-
1271
- rb_define_module_function(mUnicode, "decompose",
1272
- unicode_decompose, 1);
1273
- rb_define_module_function(mUnicode, "decompose_safe",
1274
- unicode_decompose_safe, 1);
1275
- rb_define_module_function(mUnicode, "decompose_compat",
1276
- unicode_decompose_compat, 1);
1277
- rb_define_module_function(mUnicode, "compose",
1278
- unicode_compose, 1);
1279
-
1280
- rb_define_module_function(mUnicode, "normalize_D",
1281
- unicode_decompose, 1);
1282
- rb_define_module_function(mUnicode, "normalize_D_safe",
1283
- unicode_decompose_safe, 1);
1284
- rb_define_module_function(mUnicode, "normalize_KD",
1285
- unicode_decompose_compat, 1);
1286
- rb_define_module_function(mUnicode, "normalize_C",
1287
- unicode_normalize_C, 1);
1288
- rb_define_module_function(mUnicode, "normalize_C_safe",
1289
- unicode_normalize_safe, 1);
1290
- rb_define_module_function(mUnicode, "normalize_KC",
1291
- unicode_normalize_KC, 1);
1292
-
1293
- /* aliases */
1294
- rb_define_module_function(mUnicode, "nfd",
1295
- unicode_decompose, 1);
1296
- rb_define_module_function(mUnicode, "nfd_safe",
1297
- unicode_decompose_safe, 1);
1298
- rb_define_module_function(mUnicode, "nfkd",
1299
- unicode_decompose_compat, 1);
1300
- rb_define_module_function(mUnicode, "nfc",
1301
- unicode_normalize_C, 1);
1302
- rb_define_module_function(mUnicode, "nfc_safe",
1303
- unicode_normalize_safe, 1);
1304
- rb_define_module_function(mUnicode, "nfkc",
1305
- unicode_normalize_KC, 1);
1306
-
1307
- rb_define_module_function(mUnicode, "upcase",
1308
- unicode_upcase, 1);
1309
- rb_define_module_function(mUnicode, "downcase",
1310
- unicode_downcase, 1);
1311
- rb_define_module_function(mUnicode, "capitalize",
1312
- unicode_capitalize, 1);
1313
-
1314
- rb_define_module_function(mUnicode, "categories",
1315
- unicode_get_categories, 1);
1316
- rb_define_module_function(mUnicode, "abbr_categories",
1317
- unicode_get_abbr_categories, 1);
1318
- rb_define_module_function(mUnicode, "width",
1319
- unicode_wcswidth, -1);
1320
- rb_define_module_function(mUnicode, "text_elements",
1321
- unicode_get_text_elements, 1);
1322
-
1323
- rb_define_const(mUnicode, "VERSION",
1324
- rb_str_new2(UNICODE_VERSION));
1325
- }