unicode 0.4.4.3-x86-mswin32-60 → 0.4.4.4-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,1325 +0,0 @@
1
- /*
2
- * Unicode Library version 0.4.3
3
- * Aug 8, 2012: version 0.4
4
- * Oct 14, 2010: version 0.4
5
- * Feb 26, 2010: version 0.3
6
- * Dec 29, 2009: version 0.2
7
- * Nov 23, 1999 yoshidam
8
- *
9
- */
10
-
11
- #define UNICODE_VERSION "0.4.3"
12
-
13
- #include "ruby.h"
14
- #ifdef HAVE_RUBY_IO_H
15
- # include "ruby/io.h"
16
- #else
17
- # include "rubyio.h"
18
- #endif
19
- #include <stdio.h>
20
- #include "wstring.h"
21
- #include "unidata.map"
22
-
23
- #ifndef RSTRING_PTR
24
- # define RSTRING_PTR(s) (RSTRING(s)->ptr)
25
- # define RSTRING_LEN(s) (RSTRING(s)->len)
26
- #endif
27
-
28
- #ifdef HAVE_RUBY_ENCODING_H
29
- static rb_encoding* enc_out;
30
- # define ENC_(o) (rb_enc_associate(o, enc_out))
31
- #else
32
- # define ENC_(o) (o)
33
- #endif
34
-
35
- inline static VALUE
36
- taintObject(VALUE src, VALUE obj) {
37
- if (OBJ_TAINTED(src))
38
- OBJ_TAINT(obj);
39
- return obj;
40
- }
41
- #define TO_(src, obj) (taintObject(src, obj))
42
-
43
- #ifdef HAVE_RUBY_ENCODING_H
44
- # define CONVERT_TO_UTF8(str) do { \
45
- int encindex = ENCODING_GET(str); \
46
- volatile VALUE encobj; \
47
- if (encindex != rb_utf8_encindex() && \
48
- encindex != rb_usascii_encindex()) { \
49
- encobj = rb_enc_from_encoding(enc_out); \
50
- str = rb_str_encode(str, encobj, 0, Qnil); \
51
- } \
52
- } while (0)
53
- #endif
54
-
55
- static VALUE mUnicode;
56
- static VALUE unicode_data;
57
- static VALUE composition_table;
58
- static VALUE catname_long[c_Cn+1];
59
- static VALUE catname_abbr[c_Cn+1];
60
-
61
- /* Hangul */
62
- #define SBASE (0xac00)
63
- #define LBASE (0x1100)
64
- #define LCOUNT (19)
65
- #define VBASE (0x1161)
66
- #define VCOUNT (21)
67
- #define TBASE (0x11a7)
68
- #define TCOUNT (28)
69
- #define NCOUNT (VCOUNT * TCOUNT) /* 588 */
70
- #define SCOUNT (LCOUNT * NCOUNT) /* 11172 */
71
-
72
- VALUE
73
- get_unidata(int ucs) {
74
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
75
- if (!NIL_P(ch))
76
- return ch;
77
- #ifdef CJK_IDEOGRAPH_EXTENSION_A_FIRST
78
- else if (ucs >= CJK_IDEOGRAPH_EXTENSION_A_FIRST &&
79
- ucs <= CJK_IDEOGRAPH_EXTENSION_A_LAST)
80
- return rb_hash_aref(unicode_data,
81
- INT2FIX(CJK_IDEOGRAPH_EXTENSION_A_FIRST));
82
- #endif
83
- #ifdef CJK_IDEOGRAPH_FIRST
84
- else if (ucs >= CJK_IDEOGRAPH_FIRST &&
85
- ucs <= CJK_IDEOGRAPH_LAST)
86
- return rb_hash_aref(unicode_data,
87
- INT2FIX(CJK_IDEOGRAPH_FIRST));
88
- #endif
89
- #ifdef HANGUL_SYLLABLE_FIRST
90
- else if (ucs >= HANGUL_SYLLABLE_FIRST &&
91
- ucs <= HANGUL_SYLLABLE_LAST)
92
- return rb_hash_aref(unicode_data,
93
- INT2FIX(HANGUL_SYLLABLE_FIRST));
94
- #endif
95
- #ifdef NON_PRIVATE_USE_HIGH_SURROGATE_FIRST
96
- else if (ucs >= NON_PRIVATE_USE_HIGH_SURROGATE_FIRST &&
97
- ucs <= NON_PRIVATE_USE_HIGH_SURROGATE_LAST)
98
- return rb_hash_aref(unicode_data,
99
- INT2FIX(NON_PRIVATE_USE_HIGH_SURROGATE_FIRST));
100
- #endif
101
- #ifdef PRIVATE_USE_HIGH_SURROGATE_FIRST
102
- else if (ucs >= PRIVATE_USE_HIGH_SURROGATE_FIRST &&
103
- ucs <= PRIVATE_USE_HIGH_SURROGATE_LAST)
104
- return rb_hash_aref(unicode_data,
105
- INT2FIX(PRIVATE_USE_HIGH_SURROGATE_FIRST));
106
- #endif
107
- #ifdef LOW_SURROGATE_FIRST
108
- else if (ucs >= LOW_SURROGATE_FIRST &&
109
- ucs <= LOW_SURROGATE_LAST)
110
- return rb_hash_aref(unicode_data,
111
- INT2FIX(LOW_SURROGATE_FIRST));
112
- #endif
113
- #ifdef PRIVATE_USE_FIRST
114
- else if (ucs >= PRIVATE_USE_FIRST &&
115
- ucs <= PRIVATE_USE_LAST)
116
- return rb_hash_aref(unicode_data,
117
- INT2FIX(PRIVATE_USE_FIRST));
118
- #endif
119
- #ifdef CJK_IDEOGRAPH_EXTENSION_B_FIRST
120
- else if (ucs >= CJK_IDEOGRAPH_EXTENSION_B_FIRST &&
121
- ucs <= CJK_IDEOGRAPH_EXTENSION_B_LAST)
122
- return rb_hash_aref(unicode_data,
123
- INT2FIX(CJK_IDEOGRAPH_EXTENSION_B_FIRST));
124
- #endif
125
- #ifdef CJK_IDEOGRAPH_EXTENSION_C_FIRST
126
- else if (ucs >= CJK_IDEOGRAPH_EXTENSION_C_FIRST &&
127
- ucs <= CJK_IDEOGRAPH_EXTENSION_C_LAST)
128
- return rb_hash_aref(unicode_data,
129
- INT2FIX(CJK_IDEOGRAPH_EXTENSION_C_FIRST));
130
- #endif
131
- #ifdef CJK_IDEOGRAPH_EXTENSION_D_FIRST
132
- else if (ucs >= CJK_IDEOGRAPH_EXTENSION_D_FIRST &&
133
- ucs <= CJK_IDEOGRAPH_EXTENSION_D_LAST)
134
- return rb_hash_aref(unicode_data,
135
- INT2FIX(CJK_IDEOGRAPH_EXTENSION_D_FIRST));
136
- #endif
137
- #ifdef PLANE_15_PRIVATE_USE_FIRST
138
- else if (ucs >= PLANE_15_PRIVATE_USE_FIRST &&
139
- ucs <= PLANE_15_PRIVATE_USE_LAST)
140
- return rb_hash_aref(unicode_data,
141
- INT2FIX(PLANE_15_PRIVATE_USE_FIRST));
142
- #endif
143
- #ifdef PLANE_16_PRIVATE_USE_FIRST
144
- else if (ucs >= PLANE_16_PRIVATE_USE_FIRST &&
145
- ucs <= PLANE_16_PRIVATE_USE_LAST)
146
- return rb_hash_aref(unicode_data,
147
- INT2FIX(PLANE_16_PRIVATE_USE_FIRST));
148
- #endif
149
- return Qnil;
150
- }
151
-
152
- static int
153
- get_cc(int ucs)
154
- {
155
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
156
-
157
- if (!NIL_P(ch)) {
158
- return unidata[FIX2INT(ch)].combining_class;
159
- }
160
- return 0;
161
- }
162
-
163
- static int
164
- get_gencat(int ucs)
165
- {
166
- VALUE ch = get_unidata(ucs);
167
-
168
- if (!NIL_P(ch)) {
169
- return unidata[FIX2INT(ch)].general_category;
170
- }
171
- return c_Cn; /* Unassigned */
172
- }
173
-
174
- static int
175
- get_eawidth(int ucs)
176
- {
177
- VALUE ch = get_unidata(ucs);
178
-
179
- if (!NIL_P(ch)) {
180
- return unidata[FIX2INT(ch)].east_asian_width;
181
- }
182
- return w_N; /* Neutral */
183
- }
184
-
185
- static const char*
186
- get_canon(int ucs)
187
- {
188
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
189
-
190
- if (!NIL_P(ch)) {
191
- return unidata[FIX2INT(ch)].canon;
192
- }
193
- return NULL;
194
- }
195
-
196
- static const char*
197
- get_canon_ex(int ucs)
198
- {
199
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
200
-
201
- if (!NIL_P(ch)) {
202
- int i = FIX2INT(ch);
203
- if (!unidata[i].exclusion)
204
- return unidata[i].canon;
205
- }
206
- return NULL;
207
- }
208
-
209
- static const char*
210
- get_compat(int ucs)
211
- {
212
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
213
-
214
- if (!NIL_P(ch)) {
215
- return unidata[FIX2INT(ch)].compat;
216
- }
217
- return NULL;
218
- }
219
-
220
- static const char*
221
- get_uppercase(int ucs)
222
- {
223
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
224
-
225
- if (!NIL_P(ch)) {
226
- return unidata[FIX2INT(ch)].uppercase;
227
- }
228
- return NULL;
229
- }
230
-
231
- static const char*
232
- get_lowercase(int ucs)
233
- {
234
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
235
-
236
- if (!NIL_P(ch)) {
237
- return unidata[FIX2INT(ch)].lowercase;
238
- }
239
- return NULL;
240
- }
241
-
242
- static const char*
243
- get_titlecase(int ucs)
244
- {
245
- VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
246
-
247
- if (!NIL_P(ch)) {
248
- return unidata[FIX2INT(ch)].titlecase;
249
- }
250
- return NULL;
251
- }
252
-
253
- static int
254
- get_composition(const char* str)
255
- {
256
- VALUE ch = rb_hash_aref(composition_table, rb_str_new2(str));
257
-
258
- if (!NIL_P(ch)) {
259
- return FIX2INT(ch);
260
- }
261
- return -1;
262
- }
263
-
264
- static WString*
265
- sort_canonical(WString* ustr)
266
- {
267
- int i = 1;
268
- int len = ustr->len;
269
-
270
- if (len < 2) return ustr;
271
-
272
- while (i < len) {
273
- int last = ustr->str[i - 1];
274
- int ch = ustr->str[i];
275
- int last_cc = get_cc(last);
276
- int cc = get_cc(ch);
277
- if (cc != 0 && last_cc != 0 && last_cc > cc) {
278
- ustr->str[i] = last;
279
- ustr->str[i-1] = ch;
280
- if (i > 1) i--;
281
- }
282
- else {
283
- i++;
284
- }
285
- }
286
- return ustr;
287
- }
288
-
289
- static void
290
- decompose_hangul(int ucs, int* l, int* v, int* t)
291
- {
292
- int sindex = ucs - SBASE;
293
- if (sindex < 0 || sindex >= SCOUNT) {
294
- *l = ucs;
295
- *v = *t = 0;
296
- return;
297
- }
298
- *l = LBASE + sindex / NCOUNT;
299
- *v = VBASE + (sindex % NCOUNT) / TCOUNT;
300
- *t = TBASE + sindex % TCOUNT;
301
- if (*t == TBASE) *t = 0;
302
- }
303
-
304
- /*
305
- * push decomposed str into result
306
- */
307
- static WString*
308
- decompose_internal(WString* ustr, WString* result)
309
- {
310
- int i;
311
- int len = ustr->len;
312
-
313
- for (i = 0; i < len; i++) {
314
- int ucs = ustr->str[i];
315
- if (ucs >= SBASE && ucs < SBASE + SCOUNT) {
316
- int l, v, t;
317
- decompose_hangul(ucs, &l, &v, &t);
318
- WStr_addWChar(result, l);
319
- if (v) WStr_addWChar(result, v);
320
- if (t) WStr_addWChar(result, t);
321
- }
322
- else {
323
- const char* dc = get_canon(ucs);
324
- if (!dc) {
325
- WStr_addWChar(result, ucs);
326
- }
327
- else {
328
- WString wdc;
329
- WStr_allocWithUTF8(&wdc, dc);
330
- decompose_internal(&wdc, result);
331
- WStr_free(&wdc);
332
- }
333
- }
334
- }
335
- return result;
336
- }
337
-
338
- /*
339
- * push decomposed str into result
340
- */
341
- static WString*
342
- decompose_safe_internal(WString* ustr, WString* result)
343
- {
344
- int i;
345
- int len = ustr->len;
346
-
347
- for (i = 0; i < len; i++) {
348
- int ucs = ustr->str[i];
349
- if (ucs >= SBASE && ucs < SBASE + SCOUNT) {
350
- int l, v, t;
351
- decompose_hangul(ucs, &l, &v, &t);
352
- WStr_addWChar(result, l);
353
- if (v) WStr_addWChar(result, v);
354
- if (t) WStr_addWChar(result, t);
355
- }
356
- else {
357
- const char* dc = get_canon_ex(ucs);
358
- if (!dc) {
359
- WStr_addWChar(result, ucs);
360
- }
361
- else {
362
- WString wdc;
363
- WStr_allocWithUTF8(&wdc, dc);
364
- decompose_safe_internal(&wdc, result);
365
- WStr_free(&wdc);
366
- }
367
- }
368
- }
369
- return result;
370
- }
371
-
372
- /*
373
- * push compatibility decomposed str into result
374
- */
375
- static WString*
376
- decompose_compat_internal(WString* ustr, WString* result)
377
- {
378
- int i;
379
- int len = ustr->len;
380
-
381
- for (i = 0; i < len; i++) {
382
- int ucs = ustr->str[i];
383
- if (ucs >= SBASE && ucs < SBASE + SCOUNT) {
384
- int l, v, t;
385
- decompose_hangul(ucs, &l, &v, &t);
386
- WStr_addWChar(result, l);
387
- if (v) WStr_addWChar(result, v);
388
- if (t) WStr_addWChar(result, t);
389
- }
390
- else {
391
- const char* dc = get_compat(ucs);
392
- if (!dc) {
393
- WStr_addWChar(result, ucs);
394
- }
395
- else {
396
- WString wdc;
397
- WStr_allocWithUTF8(&wdc, dc);
398
- decompose_compat_internal(&wdc, result);
399
- WStr_free(&wdc);
400
- }
401
- }
402
- }
403
- return result;
404
- }
405
-
406
-
407
- #define UCS4toUTF8(p, c) \
408
- do { \
409
- if (c < 128) { \
410
- *p++ = c; \
411
- } \
412
- else if (c < 2048) { \
413
- *p++ = (c >> 6) | 192; \
414
- *p++ = (c & 63) | 128; \
415
- } \
416
- else if (c < 0x10000) { \
417
- *p++ = (c >> 12) | 224; \
418
- *p++ = ((c >> 6) & 63) | 128; \
419
- *p++ = (c & 63) | 128; \
420
- } \
421
- else if (c < 0x200000) { \
422
- *p++ = (c >> 18) | 240; \
423
- *p++ = ((c >> 12) & 63) | 128; \
424
- *p++ = ((c >> 6) & 63) | 128; \
425
- *p++ = (c & 63) | 128; \
426
- } \
427
- else if (c < 0x4000000) { \
428
- *p++ = (c >> 24) | 248; \
429
- *p++ = ((c >> 18) & 63) | 128; \
430
- *p++ = ((c >> 12) & 63) | 128; \
431
- *p++ = ((c >> 6) & 63) | 128; \
432
- *p++ = (c & 63) | 128; \
433
- } \
434
- else if (c < 0x80000000) { \
435
- *p++ = (c >> 30) | 252; \
436
- *p++ = ((c >> 24) & 63) | 128; \
437
- *p++ = ((c >> 18) & 63) | 128; \
438
- *p++ = ((c >> 12) & 63) | 128; \
439
- *p++ = ((c >> 6) & 63) | 128; \
440
- *p++ = (c & 63) | 128; \
441
- } \
442
- } while (0)
443
-
444
- static int
445
- compose_pair(unsigned int c1, unsigned int c2)
446
- {
447
- int ret;
448
- char ustr[13]; /* stored two UTF-8 chars */
449
- char *p = ustr;
450
-
451
- /* Hangul L + V */
452
- if (c1 >= LBASE && c1 < LBASE + LCOUNT &&
453
- c2 >= VBASE && c2 < VBASE + VCOUNT) {
454
- return SBASE + ((c1 - LBASE) * VCOUNT + (c2 - VBASE)) * TCOUNT;
455
- }
456
- /* Hangul LV + T */
457
- else if (c1 >= SBASE && c1 < SBASE + SCOUNT &&
458
- (c1 - SBASE) % TCOUNT == 0 &&
459
- c2 >= TBASE && c2 < TBASE + TCOUNT) {
460
- return c1 + (c2 - TBASE);
461
- }
462
- UCS4toUTF8(p, c1);
463
- UCS4toUTF8(p, c2);
464
- *p = '\0';
465
- ret = get_composition(ustr);
466
-
467
- return ret;
468
- }
469
-
470
- /*
471
- * push canonical composed str into result
472
- */
473
- static WString*
474
- compose_internal(WString* ustr, WString* result)
475
- {
476
- int starterPos = 0;
477
- int starterCh = ustr->str[0];
478
- int compPos = 1;
479
- int lastClass = get_cc(starterCh);
480
- int oldLen = ustr->len;
481
- int decompPos;
482
-
483
- if (oldLen == 0) return result;
484
- if (lastClass != 0) lastClass = 256;
485
- /* copy string */
486
- result->len = 0;
487
- WStr_pushWString(result, ustr);
488
-
489
- for (decompPos = compPos; decompPos < result->len; decompPos++) {
490
- int ch = result->str[decompPos];
491
- int chClass = get_cc(ch);
492
- int composite = compose_pair(starterCh, ch);
493
- if (composite > 0 &&
494
- (lastClass < chClass ||lastClass == 0)) {
495
- result->str[starterPos] = composite;
496
- starterCh = composite;
497
- }
498
- else {
499
- if (chClass == 0) {
500
- starterPos = compPos;
501
- starterCh = ch;
502
- }
503
- lastClass = chClass;
504
- result->str[compPos] = ch;
505
- if (result->len != oldLen) {
506
- decompPos += result->len - oldLen;
507
- oldLen = result->len;
508
- }
509
- compPos++;
510
- }
511
- }
512
- result->len = compPos;
513
- return result;
514
- }
515
- #if 0
516
- static WString*
517
- compose_internal(WString* ustr, WString* result)
518
- {
519
- int len = ustr->len;
520
- int starter;
521
- int startercc;
522
- int i;
523
-
524
- if (len == 0) return result;
525
-
526
- starter = ustr->str[0];
527
- startercc = get_cc(starter);
528
- if (startercc != 0) startercc = 256;
529
- for (i = 1; i < len; i++) {
530
- int ch = ustr->str[i];
531
- int cc = get_cc(ch);
532
- int composite;
533
-
534
- if (startercc == 0 &&
535
- (composite = compose_pair(starter, ch)) >= 0) {
536
- starter = composite;
537
- startercc = get_cc(composite);
538
- }
539
- else {
540
- WStr_addWChar(result, starter);
541
- starter = ch;
542
- startercc = cc;
543
- }
544
- }
545
- WStr_addWChar(result, starter);
546
-
547
- return result;
548
- }
549
- #endif
550
-
551
- static WString*
552
- upcase_internal(WString* str, WString* result)
553
- {
554
- int i;
555
- int len = str->len;
556
-
557
- for (i = 0; i < len; i++) {
558
- int ucs = str->str[i];
559
- const char* c = get_uppercase(ucs);
560
- if (!c) {
561
- WStr_addWChar(result, ucs);
562
- }
563
- else {
564
- WString wc;
565
- WStr_allocWithUTF8(&wc, c);
566
- WStr_pushWString(result, &wc);
567
- WStr_free(&wc);
568
- }
569
- }
570
- return result;
571
- }
572
-
573
- static WString*
574
- downcase_internal(WString* str, WString* result)
575
- {
576
- int i;
577
- int len = str->len;
578
-
579
- for (i = 0; i < len; i++) {
580
- int ucs = str->str[i];
581
- const char* c = get_lowercase(ucs);
582
- if (!c) {
583
- WStr_addWChar(result, ucs);
584
- }
585
- else {
586
- WString wc;
587
- WStr_allocWithUTF8(&wc, c);
588
- WStr_pushWString(result, &wc);
589
- WStr_free(&wc);
590
- }
591
- }
592
- return result;
593
- }
594
-
595
- static WString*
596
- capitalize_internal(WString* str, WString* result)
597
- {
598
- int i;
599
- int len = str->len;
600
-
601
- if (len > 0) {
602
- const char* c = get_titlecase(str->str[0]);
603
- if (!c) {
604
- WStr_addWChar(result, str->str[0]);
605
- }
606
- else {
607
- WString wc;
608
- WStr_allocWithUTF8(&wc, c);
609
- WStr_pushWString(result, &wc);
610
- WStr_free(&wc);
611
- }
612
- }
613
- for (i = 1; i < len; i++) {
614
- int ucs = str->str[i];
615
- const char* c = get_lowercase(ucs);
616
- if (!c) {
617
- WStr_addWChar(result, ucs);
618
- }
619
- else {
620
- WString wc;
621
- WStr_allocWithUTF8(&wc, c);
622
- WStr_pushWString(result, &wc);
623
- WStr_free(&wc);
624
- }
625
- }
626
- return result;
627
- }
628
-
629
- static VALUE
630
- unicode_strcmp(VALUE obj, VALUE str1, VALUE str2)
631
- {
632
- WString wstr1;
633
- WString wstr2;
634
- WString result1;
635
- WString result2;
636
- UString ustr1;
637
- UString ustr2;
638
- int ret;
639
-
640
- Check_Type(str1, T_STRING);
641
- Check_Type(str2, T_STRING);
642
- #ifdef HAVE_RUBY_ENCODING_H
643
- CONVERT_TO_UTF8(str1);
644
- CONVERT_TO_UTF8(str2);
645
- #endif
646
- WStr_allocWithUTF8L(&wstr1, RSTRING_PTR(str1), RSTRING_LEN(str1));
647
- WStr_allocWithUTF8L(&wstr2, RSTRING_PTR(str2), RSTRING_LEN(str2));
648
- WStr_alloc(&result1);
649
- WStr_alloc(&result2);
650
- decompose_internal(&wstr1, &result1);
651
- decompose_internal(&wstr2, &result2);
652
- WStr_free(&wstr1);
653
- WStr_free(&wstr2);
654
- sort_canonical(&result1);
655
- sort_canonical(&result2);
656
- UniStr_alloc(&ustr1);
657
- UniStr_alloc(&ustr2);
658
- WStr_convertIntoUString(&result1, &ustr1);
659
- WStr_convertIntoUString(&result2, &ustr2);
660
- WStr_free(&result1);
661
- WStr_free(&result2);
662
- UniStr_addChar(&ustr1, '\0');
663
- UniStr_addChar(&ustr2, '\0');
664
- ret = strcmp((char*)ustr1.str, (char*)ustr2.str);
665
- UniStr_free(&ustr1);
666
- UniStr_free(&ustr2);
667
-
668
- return INT2FIX(ret);
669
- }
670
-
671
- static VALUE
672
- unicode_strcmp_compat(VALUE obj, VALUE str1, VALUE str2)
673
- {
674
- WString wstr1;
675
- WString wstr2;
676
- WString result1;
677
- WString result2;
678
- UString ustr1;
679
- UString ustr2;
680
- int ret;
681
-
682
- Check_Type(str1, T_STRING);
683
- Check_Type(str2, T_STRING);
684
- #ifdef HAVE_RUBY_ENCODING_H
685
- CONVERT_TO_UTF8(str1);
686
- CONVERT_TO_UTF8(str2);
687
- #endif
688
- WStr_allocWithUTF8L(&wstr1, RSTRING_PTR(str1), RSTRING_LEN(str1));
689
- WStr_allocWithUTF8L(&wstr2, RSTRING_PTR(str2), RSTRING_LEN(str2));
690
- WStr_alloc(&result1);
691
- WStr_alloc(&result2);
692
- decompose_compat_internal(&wstr1, &result1);
693
- decompose_compat_internal(&wstr2, &result2);
694
- WStr_free(&wstr1);
695
- WStr_free(&wstr2);
696
- sort_canonical(&result1);
697
- sort_canonical(&result2);
698
- UniStr_alloc(&ustr1);
699
- UniStr_alloc(&ustr2);
700
- WStr_convertIntoUString(&result1, &ustr1);
701
- WStr_convertIntoUString(&result2, &ustr2);
702
- WStr_free(&result1);
703
- WStr_free(&result2);
704
- UniStr_addChar(&ustr1, '\0');
705
- UniStr_addChar(&ustr2, '\0');
706
- ret = strcmp((char*)ustr1.str, (char*)ustr2.str);
707
- UniStr_free(&ustr1);
708
- UniStr_free(&ustr2);
709
-
710
- return INT2FIX(ret);
711
- }
712
-
713
- static VALUE
714
- unicode_decompose(VALUE obj, VALUE str)
715
- {
716
- WString ustr;
717
- WString result;
718
- UString ret;
719
- VALUE vret;
720
-
721
- Check_Type(str, T_STRING);
722
- #ifdef HAVE_RUBY_ENCODING_H
723
- CONVERT_TO_UTF8(str);
724
- #endif
725
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
726
- WStr_alloc(&result);
727
- decompose_internal(&ustr, &result);
728
- WStr_free(&ustr);
729
- sort_canonical(&result);
730
- UniStr_alloc(&ret);
731
- WStr_convertIntoUString(&result, &ret);
732
- WStr_free(&result);
733
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
734
- UniStr_free(&ret);
735
-
736
- return vret;
737
- }
738
-
739
- static VALUE
740
- unicode_decompose_safe(VALUE obj, VALUE str)
741
- {
742
- WString ustr;
743
- WString result;
744
- UString ret;
745
- VALUE vret;
746
-
747
- Check_Type(str, T_STRING);
748
- #ifdef HAVE_RUBY_ENCODING_H
749
- CONVERT_TO_UTF8(str);
750
- #endif
751
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
752
- WStr_alloc(&result);
753
- decompose_safe_internal(&ustr, &result);
754
- WStr_free(&ustr);
755
- sort_canonical(&result);
756
- UniStr_alloc(&ret);
757
- WStr_convertIntoUString(&result, &ret);
758
- WStr_free(&result);
759
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
760
- UniStr_free(&ret);
761
-
762
- return vret;
763
- }
764
-
765
- static VALUE
766
- unicode_decompose_compat(VALUE obj, VALUE str)
767
- {
768
- WString ustr;
769
- WString result;
770
- UString ret;
771
- VALUE vret;
772
-
773
- Check_Type(str, T_STRING);
774
- #ifdef HAVE_RUBY_ENCODING_H
775
- CONVERT_TO_UTF8(str);
776
- #endif
777
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
778
- WStr_alloc(&result);
779
- decompose_compat_internal(&ustr, &result);
780
- WStr_free(&ustr);
781
- sort_canonical(&result);
782
- UniStr_alloc(&ret);
783
- WStr_convertIntoUString(&result, &ret);
784
- WStr_free(&result);
785
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
786
- UniStr_free(&ret);
787
-
788
- return vret;
789
- }
790
-
791
- static VALUE
792
- unicode_compose(VALUE obj, VALUE str)
793
- {
794
- WString ustr;
795
- WString result;
796
- UString ret;
797
- VALUE vret;
798
-
799
- Check_Type(str, T_STRING);
800
- #ifdef HAVE_RUBY_ENCODING_H
801
- CONVERT_TO_UTF8(str);
802
- #endif
803
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
804
- sort_canonical(&ustr);
805
- WStr_alloc(&result);
806
- compose_internal(&ustr, &result);
807
- WStr_free(&ustr);
808
- UniStr_alloc(&ret);
809
- WStr_convertIntoUString(&result, &ret);
810
- WStr_free(&result);
811
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
812
- UniStr_free(&ret);
813
-
814
- return vret;
815
- }
816
-
817
- static VALUE
818
- unicode_normalize_C(VALUE obj, VALUE str)
819
- {
820
- WString ustr1;
821
- WString ustr2;
822
- WString result;
823
- UString ret;
824
- VALUE vret;
825
-
826
- Check_Type(str, T_STRING);
827
- #ifdef HAVE_RUBY_ENCODING_H
828
- CONVERT_TO_UTF8(str);
829
- #endif
830
- WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
831
- WStr_alloc(&ustr2);
832
- decompose_internal(&ustr1, &ustr2);
833
- WStr_free(&ustr1);
834
- sort_canonical(&ustr2);
835
- WStr_alloc(&result);
836
- compose_internal(&ustr2, &result);
837
- WStr_free(&ustr2);
838
- UniStr_alloc(&ret);
839
- WStr_convertIntoUString(&result, &ret);
840
- WStr_free(&result);
841
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
842
- UniStr_free(&ret);
843
-
844
- return vret;
845
- }
846
-
847
- static VALUE
848
- unicode_normalize_safe(VALUE obj, VALUE str)
849
- {
850
- WString ustr1;
851
- WString ustr2;
852
- WString result;
853
- UString ret;
854
- VALUE vret;
855
-
856
- Check_Type(str, T_STRING);
857
- #ifdef HAVE_RUBY_ENCODING_H
858
- CONVERT_TO_UTF8(str);
859
- #endif
860
- WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
861
- WStr_alloc(&ustr2);
862
- decompose_safe_internal(&ustr1, &ustr2);
863
- WStr_free(&ustr1);
864
- sort_canonical(&ustr2);
865
- WStr_alloc(&result);
866
- compose_internal(&ustr2, &result);
867
- WStr_free(&ustr2);
868
- UniStr_alloc(&ret);
869
- WStr_convertIntoUString(&result, &ret);
870
- WStr_free(&result);
871
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
872
- UniStr_free(&ret);
873
-
874
- return vret;
875
- }
876
-
877
- static VALUE
878
- unicode_normalize_KC(VALUE obj, VALUE str)
879
- {
880
- WString ustr1;
881
- WString ustr2;
882
- WString result;
883
- UString ret;
884
- VALUE vret;
885
-
886
- Check_Type(str, T_STRING);
887
- #ifdef HAVE_RUBY_ENCODING_H
888
- CONVERT_TO_UTF8(str);
889
- #endif
890
- WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
891
- WStr_alloc(&ustr2);
892
- decompose_compat_internal(&ustr1, &ustr2);
893
- WStr_free(&ustr1);
894
- sort_canonical(&ustr2);
895
- WStr_alloc(&result);
896
- compose_internal(&ustr2, &result);
897
- WStr_free(&ustr2);
898
- UniStr_alloc(&ret);
899
- WStr_convertIntoUString(&result, &ret);
900
- WStr_free(&result);
901
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
902
- UniStr_free(&ret);
903
-
904
- return vret;
905
- }
906
-
907
- static VALUE
908
- unicode_upcase(VALUE obj, VALUE str)
909
- {
910
- WString ustr;
911
- WString result;
912
- UString ret;
913
- VALUE vret;
914
-
915
- Check_Type(str, T_STRING);
916
- #ifdef HAVE_RUBY_ENCODING_H
917
- CONVERT_TO_UTF8(str);
918
- #endif
919
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
920
- WStr_alloc(&result);
921
- upcase_internal(&ustr, &result);
922
- //sort_canonical(&result);
923
- WStr_free(&ustr);
924
- UniStr_alloc(&ret);
925
- WStr_convertIntoUString(&result, &ret);
926
- WStr_free(&result);
927
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
928
- UniStr_free(&ret);
929
-
930
- return vret;
931
- }
932
-
933
- static VALUE
934
- unicode_downcase(VALUE obj, VALUE str)
935
- {
936
- WString ustr;
937
- WString result;
938
- UString ret;
939
- VALUE vret;
940
-
941
- Check_Type(str, T_STRING);
942
- #ifdef HAVE_RUBY_ENCODING_H
943
- CONVERT_TO_UTF8(str);
944
- #endif
945
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
946
- WStr_alloc(&result);
947
- downcase_internal(&ustr, &result);
948
- //sort_canonical(&result);
949
- WStr_free(&ustr);
950
- UniStr_alloc(&ret);
951
- WStr_convertIntoUString(&result, &ret);
952
- WStr_free(&result);
953
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
954
- UniStr_free(&ret);
955
-
956
- return vret;
957
- }
958
-
959
- #ifdef HAVE_RUBY_ENCODING_H
960
-
961
-
962
- #endif
963
-
964
- static VALUE
965
- unicode_capitalize(VALUE obj, VALUE str)
966
- {
967
- WString ustr;
968
- WString result;
969
- UString ret;
970
- VALUE vret;
971
-
972
- Check_Type(str, T_STRING);
973
- #ifdef HAVE_RUBY_ENCODING_H
974
- CONVERT_TO_UTF8(str);
975
- #endif
976
- WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
977
- WStr_alloc(&result);
978
- capitalize_internal(&ustr, &result);
979
- //sort_canonical(&result);
980
- WStr_free(&ustr);
981
- UniStr_alloc(&ret);
982
- WStr_convertIntoUString(&result, &ret);
983
- WStr_free(&result);
984
- vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
985
- UniStr_free(&ret);
986
-
987
- return vret;
988
- }
989
-
990
- typedef struct _get_categories_param {
991
- WString* wstr;
992
- VALUE str;
993
- VALUE* catname;
994
- } get_categories_param;
995
-
996
- static VALUE
997
- get_categories_internal(get_categories_param* param)
998
- {
999
- WString* wstr = param->wstr;
1000
- VALUE str = param->str;
1001
- VALUE* catname = param->catname;
1002
- int pos;
1003
- int block_p = rb_block_given_p();
1004
- volatile VALUE ret = str;
1005
-
1006
- if (!block_p)
1007
- ret = rb_ary_new();
1008
- for (pos = 0; pos < wstr->len; pos++) {
1009
- int gencat = get_gencat(wstr->str[pos]);
1010
- if (!block_p)
1011
- rb_ary_push(ret, catname[gencat]);
1012
- else {
1013
- rb_yield(catname[gencat]);
1014
- }
1015
- }
1016
-
1017
- return ret;
1018
- }
1019
-
1020
- VALUE
1021
- get_categories_ensure(WString* wstr)
1022
- {
1023
- WStr_free(wstr);
1024
- return Qnil;
1025
- }
1026
-
1027
- VALUE
1028
- unicode_get_categories(VALUE obj, VALUE str)
1029
- {
1030
- WString wstr;
1031
- get_categories_param param = { &wstr, str, catname_long };
1032
-
1033
- Check_Type(str, T_STRING);
1034
- #ifdef HAVE_RUBY_ENCODING_H
1035
- CONVERT_TO_UTF8(str);
1036
- #endif
1037
- WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));
1038
-
1039
- return rb_ensure(get_categories_internal, (VALUE)&param,
1040
- get_categories_ensure, (VALUE)&wstr);
1041
- /* wstr will be freed in get_text_elements_ensure() */
1042
- }
1043
-
1044
-
1045
- VALUE
1046
- unicode_get_abbr_categories(VALUE obj, VALUE str)
1047
- {
1048
- WString wstr;
1049
- get_categories_param param = { &wstr, str, catname_abbr };
1050
-
1051
- Check_Type(str, T_STRING);
1052
- #ifdef HAVE_RUBY_ENCODING_H
1053
- CONVERT_TO_UTF8(str);
1054
- #endif
1055
- WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));
1056
-
1057
- return rb_ensure(get_categories_internal, (VALUE)&param,
1058
- get_categories_ensure, (VALUE)&wstr);
1059
- /* wstr will be freed in get_text_elements_ensure() */
1060
- }
1061
-
1062
- VALUE
1063
- unicode_wcswidth(int argc, VALUE* argv, VALUE obj)
1064
- {
1065
- WString wstr;
1066
- int i, count;
1067
- int width = 0;
1068
- int cjk_p = 0;
1069
- VALUE str;
1070
- VALUE cjk;
1071
-
1072
- count = rb_scan_args(argc, argv, "11", &str, &cjk);
1073
- if (count > 1)
1074
- cjk_p = RTEST(cjk);
1075
- Check_Type(str, T_STRING);
1076
- #ifdef HAVE_RUBY_ENCODING_H
1077
- CONVERT_TO_UTF8(str);
1078
- #endif
1079
- WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));
1080
- for (i = 0; i <wstr.len; i++) {
1081
- int c = wstr.str[i];
1082
- int cat = get_gencat(c);
1083
- int eaw = get_eawidth(c);
1084
- if ((c > 0 && c < 32) || (c >= 0x7f && c < 0xa0)) {
1085
- /* Control Characters */
1086
- width = -1;
1087
- break;
1088
- }
1089
- else if (c != 0x00ad && /* SOFT HYPHEN */
1090
- (cat == c_Mn || cat == c_Me || /* Non-spacing Marks */
1091
- cat == c_Cf || /* Format */
1092
- c == 0 || /* NUL */
1093
- (c >= 0x1160 && c <= 0x11ff))) /* HANGUL JUNGSEONG/JONGSEONG */
1094
- /* zero width */ ;
1095
- else if (eaw == w_F || eaw == w_W || /* Fullwidth or Wide */
1096
- (c >= 0x4db6 && c <= 0x4dbf) || /* CJK Reserved */
1097
- (c >= 0x9fcd && c <= 0x9fff) || /* CJK Reserved */
1098
- (c >= 0xfa6e && c <= 0xfa6f) || /* CJK Reserved */
1099
- (c >= 0xfada && c <= 0xfaff) || /* CJK Reserved */
1100
- (c >= 0x2a6d7 && c <= 0x2a6ff) || /* CJK Reserved */
1101
- (c >= 0x2b735 && c <= 0x2b73f) || /* CJK Reserved */
1102
- (c >= 0x2b81e && c <= 0x2f7ff) || /* CJK Reserved */
1103
- (c >= 0x2fa1e && c <= 0x2fffd) || /* CJK Reserved */
1104
- (c >= 0x30000 && c <= 0x3fffd) || /* CJK Reserved */
1105
- (cjk_p && eaw == w_A)) /* East Asian Ambiguous */
1106
- width += 2;
1107
- else
1108
- width++; /* Halfwidth or Neutral */
1109
- }
1110
- WStr_free(&wstr);
1111
-
1112
- return INT2FIX(width);
1113
- }
1114
-
1115
- VALUE
1116
- wstring_to_rstring(WString* wstr, int start, int len) {
1117
- UString ret;
1118
- volatile VALUE vret;
1119
-
1120
- UniStr_alloc(&ret);
1121
- WStr_convertIntoUString2(wstr, start, len, &ret);
1122
- vret = ENC_(rb_str_new((char*)ret.str, ret.len));
1123
- UniStr_free(&ret);
1124
-
1125
- return vret;
1126
- }
1127
-
1128
- typedef struct _get_text_elements_param {
1129
- WString* wstr;
1130
- VALUE str;
1131
- } get_text_elements_param;
1132
-
1133
- VALUE
1134
- get_text_elements_internal(get_text_elements_param* param)
1135
- {
1136
- WString* wstr = param->wstr;
1137
- VALUE str = param->str;
1138
- int start_pos;
1139
- int block_p = rb_block_given_p();
1140
- volatile VALUE ret = str;
1141
-
1142
- if (!block_p)
1143
- ret = rb_ary_new();
1144
- for (start_pos = 0; start_pos < wstr->len;) {
1145
- int c0 = wstr->str[start_pos];
1146
- int cat = get_gencat(c0);
1147
- int length = 1;
1148
- int j;
1149
-
1150
- if (cat == c_Mn || cat == c_Mc || cat == c_Me) {
1151
- volatile VALUE rstr = TO_(str, wstring_to_rstring(wstr, start_pos, length));
1152
- if (!block_p)
1153
- rb_ary_push(ret, rstr);
1154
- else
1155
- rb_yield(rstr);
1156
- start_pos++;
1157
- continue;
1158
- }
1159
-
1160
- for (j = start_pos + 1; j < wstr->len; j++) {
1161
- int c1 = wstr->str[j];
1162
- int cat = get_gencat(c1);
1163
- if (c0 >= LBASE && c0 < LBASE + LCOUNT &&
1164
- j + 1 < wstr->len &&
1165
- c1 >= VBASE && c1 < VBASE + VCOUNT &&
1166
- wstr->str[j+1] >= TBASE && wstr->str[j+1] < TBASE + TCOUNT) {
1167
- /* Hangul L+V+T */
1168
- length += 2;
1169
- j++;
1170
- }
1171
- else if (c0 >= LBASE && c0 < LBASE + LCOUNT &&
1172
- c1 >= VBASE && c1< VBASE + VCOUNT) {
1173
- /* Hangul L+V */
1174
- length++;
1175
- }
1176
- else if (c0 >= SBASE && c0 < SBASE + SCOUNT &&
1177
- (c0 - SBASE) % TCOUNT == 0 &&
1178
- c1 >= TBASE && c1 < TBASE + TCOUNT) {
1179
- /* Hangul LV+T */
1180
- length++;
1181
- }
1182
- else if (cat == c_Mn || cat == c_Mc || cat == c_Me) {
1183
- /* Mark */
1184
- length++;
1185
- }
1186
- else {
1187
- volatile VALUE rstr = TO_(str, wstring_to_rstring(wstr, start_pos, length));
1188
- if (!block_p)
1189
- rb_ary_push(ret, rstr);
1190
- else
1191
- rb_yield(rstr);
1192
- length = 0;
1193
- break;
1194
- }
1195
- }
1196
- if (length > 0) {
1197
- volatile VALUE rstr = TO_(str, wstring_to_rstring(wstr, start_pos, length));
1198
- if (!block_p)
1199
- rb_ary_push(ret, rstr);
1200
- else
1201
- rb_yield(rstr);
1202
- }
1203
- start_pos = j;
1204
- }
1205
- return ret;
1206
- }
1207
-
1208
- VALUE
1209
- get_text_elements_ensure(WString* wstr)
1210
- {
1211
- WStr_free(wstr);
1212
- return Qnil;
1213
- }
1214
-
1215
- VALUE
1216
- unicode_get_text_elements(VALUE obj, VALUE str)
1217
- {
1218
- WString wstr;
1219
- get_text_elements_param param = { &wstr, str };
1220
-
1221
- Check_Type(str, T_STRING);
1222
- #ifdef HAVE_RUBY_ENCODING_H
1223
- CONVERT_TO_UTF8(str);
1224
- #endif
1225
- WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));
1226
-
1227
- return rb_ensure(get_text_elements_internal, (VALUE)&param,
1228
- get_text_elements_ensure, (VALUE)&wstr);
1229
- /* wstr will be freed in get_text_elements_ensure() */
1230
- }
1231
-
1232
- void
1233
- Init_unicode_native()
1234
- {
1235
- int i;
1236
-
1237
- #ifdef HAVE_RUBY_ENCODING_H
1238
- enc_out = rb_utf8_encoding();
1239
- #endif
1240
-
1241
- mUnicode = rb_define_module("Unicode");
1242
- unicode_data = rb_hash_new();
1243
- composition_table = rb_hash_new();
1244
-
1245
- rb_global_variable(&unicode_data);
1246
- rb_global_variable(&composition_table);
1247
-
1248
- for (i = 0; unidata[i].code != -1; i++) {
1249
- int code = unidata[i].code;
1250
- const char* canon = unidata[i].canon;
1251
- int exclusion = unidata[i].exclusion;
1252
-
1253
- rb_hash_aset(unicode_data, INT2FIX(code), INT2FIX(i));
1254
- if (canon && exclusion == 0) {
1255
- rb_hash_aset(composition_table, rb_str_new2(canon), INT2FIX(code));
1256
- }
1257
- }
1258
-
1259
- for (i = 0; i < c_Cn + 1; i++) {
1260
- catname_abbr[i] = ID2SYM(rb_intern(gencat_abbr[i]));
1261
- catname_long[i] = ID2SYM(rb_intern(gencat_long[i]));
1262
- rb_global_variable(&catname_abbr[i]);
1263
- rb_global_variable(&catname_long[i]);
1264
- }
1265
-
1266
- rb_define_module_function(mUnicode, "strcmp",
1267
- unicode_strcmp, 2);
1268
- rb_define_module_function(mUnicode, "strcmp_compat",
1269
- unicode_strcmp_compat, 2);
1270
-
1271
- rb_define_module_function(mUnicode, "decompose",
1272
- unicode_decompose, 1);
1273
- rb_define_module_function(mUnicode, "decompose_safe",
1274
- unicode_decompose_safe, 1);
1275
- rb_define_module_function(mUnicode, "decompose_compat",
1276
- unicode_decompose_compat, 1);
1277
- rb_define_module_function(mUnicode, "compose",
1278
- unicode_compose, 1);
1279
-
1280
- rb_define_module_function(mUnicode, "normalize_D",
1281
- unicode_decompose, 1);
1282
- rb_define_module_function(mUnicode, "normalize_D_safe",
1283
- unicode_decompose_safe, 1);
1284
- rb_define_module_function(mUnicode, "normalize_KD",
1285
- unicode_decompose_compat, 1);
1286
- rb_define_module_function(mUnicode, "normalize_C",
1287
- unicode_normalize_C, 1);
1288
- rb_define_module_function(mUnicode, "normalize_C_safe",
1289
- unicode_normalize_safe, 1);
1290
- rb_define_module_function(mUnicode, "normalize_KC",
1291
- unicode_normalize_KC, 1);
1292
-
1293
- /* aliases */
1294
- rb_define_module_function(mUnicode, "nfd",
1295
- unicode_decompose, 1);
1296
- rb_define_module_function(mUnicode, "nfd_safe",
1297
- unicode_decompose_safe, 1);
1298
- rb_define_module_function(mUnicode, "nfkd",
1299
- unicode_decompose_compat, 1);
1300
- rb_define_module_function(mUnicode, "nfc",
1301
- unicode_normalize_C, 1);
1302
- rb_define_module_function(mUnicode, "nfc_safe",
1303
- unicode_normalize_safe, 1);
1304
- rb_define_module_function(mUnicode, "nfkc",
1305
- unicode_normalize_KC, 1);
1306
-
1307
- rb_define_module_function(mUnicode, "upcase",
1308
- unicode_upcase, 1);
1309
- rb_define_module_function(mUnicode, "downcase",
1310
- unicode_downcase, 1);
1311
- rb_define_module_function(mUnicode, "capitalize",
1312
- unicode_capitalize, 1);
1313
-
1314
- rb_define_module_function(mUnicode, "categories",
1315
- unicode_get_categories, 1);
1316
- rb_define_module_function(mUnicode, "abbr_categories",
1317
- unicode_get_abbr_categories, 1);
1318
- rb_define_module_function(mUnicode, "width",
1319
- unicode_wcswidth, -1);
1320
- rb_define_module_function(mUnicode, "text_elements",
1321
- unicode_get_text_elements, 1);
1322
-
1323
- rb_define_const(mUnicode, "VERSION",
1324
- rb_str_new2(UNICODE_VERSION));
1325
- }