icu4r 0.1.3.2006.01.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README +153 -0
- data/calendar.c +576 -0
- data/docs/FORMATTING +131 -0
- data/docs/UNICODE_REGEXPS +204 -0
- data/extconf.rb +15 -0
- data/fmt.cpp +150 -0
- data/icu4r.c +14 -0
- data/icu_common.h +45 -0
- data/samples/demo_each.rb +23 -0
- data/samples/demo_locales.rb +16 -0
- data/samples/demo_regexp.rb +11 -0
- data/samples/resbundle/appmsg/root.res +0 -0
- data/samples/resbundle/appmsg/ru.res +0 -0
- data/samples/resbundle/demo_bundle.rb +4 -0
- data/samples/resbundle/mkres.sh +4 -0
- data/samples/resbundle/root.txt +10 -0
- data/samples/resbundle/ru.txt +4 -0
- data/test/test_calendar.rb +109 -0
- data/test/test_ustring.rb +381 -0
- data/tools/doc.sh +2 -0
- data/tools/km.rb +425 -0
- data/ubundle.c +209 -0
- data/ucore_ext.c +168 -0
- data/uregex.c +673 -0
- data/uregex.h +27 -0
- data/ustring.c +3042 -0
- metadata +81 -0
data/ucore_ext.c
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
#include "icu_common.h"
|
2
|
+
extern VALUE rb_cUString;
|
3
|
+
extern VALUE icu_ustr_new_set(const UChar * str, long len, long capa);
|
4
|
+
|
5
|
+
/**
|
6
|
+
* call-seq:
|
7
|
+
* ary.to_u => anUString
|
8
|
+
*
|
9
|
+
* Creates UString from array of fixnums, representing Unicode codepoints.
|
10
|
+
* (inversion of UString#codepoints)
|
11
|
+
*
|
12
|
+
* a = "поддержка".to_u.codepoints # => [1087, 1086, 1076, 1076, 1077, 1088, 1078, 1082, 1072]
|
13
|
+
* a.to_u # => "поддержка"
|
14
|
+
*
|
15
|
+
*/
|
16
|
+
VALUE icu_ustr_from_array(obj)
|
17
|
+
VALUE obj;
|
18
|
+
{
|
19
|
+
int i, n;
|
20
|
+
VALUE *p;
|
21
|
+
VALUE ret, temp;
|
22
|
+
UChar32 * src , *pos, chr;
|
23
|
+
UChar * buf;
|
24
|
+
int32_t len, capa;
|
25
|
+
UErrorCode status = U_ZERO_ERROR;
|
26
|
+
|
27
|
+
n = RARRAY(obj)->len;
|
28
|
+
p = RARRAY(obj)->ptr;
|
29
|
+
|
30
|
+
src = ALLOC_N(UChar32, n);
|
31
|
+
pos = src;
|
32
|
+
for ( i = 0; i < n; i++){
|
33
|
+
temp = p[i];
|
34
|
+
if(TYPE(temp) != T_FIXNUM) {
|
35
|
+
free(src);
|
36
|
+
rb_raise(rb_eTypeError, "Can't convert from %s", rb_class2name(CLASS_OF(temp)));
|
37
|
+
}
|
38
|
+
chr = (UChar32) FIX2INT(temp);
|
39
|
+
// invalid codepoints are converted to U+FFFD
|
40
|
+
if( ! (U_IS_UNICODE_CHAR(chr)) ) {
|
41
|
+
chr = 0xFFFD;
|
42
|
+
}
|
43
|
+
*pos = chr;
|
44
|
+
pos ++;
|
45
|
+
}
|
46
|
+
capa = n+1;
|
47
|
+
buf = ALLOC_N(UChar, capa);
|
48
|
+
u_strFromUTF32(buf, capa, &len, src, n, &status);
|
49
|
+
if( U_BUFFER_OVERFLOW_ERROR == status ){
|
50
|
+
capa = len+1;
|
51
|
+
REALLOC_N(buf, UChar, capa);
|
52
|
+
status = U_ZERO_ERROR;
|
53
|
+
u_strFromUTF32(buf, capa, &len, src, n, &status);
|
54
|
+
}
|
55
|
+
if (U_FAILURE(status) ) {
|
56
|
+
free(src);
|
57
|
+
free(buf);
|
58
|
+
rb_raise(rb_eRuntimeError, u_errorName(status));
|
59
|
+
}
|
60
|
+
if( capa <= len ){
|
61
|
+
++capa;
|
62
|
+
REALLOC_N(buf, UChar, capa);
|
63
|
+
}
|
64
|
+
ret = icu_ustr_new_set(buf, len, capa);
|
65
|
+
free(src);
|
66
|
+
return ret;
|
67
|
+
}
|
68
|
+
|
69
|
+
/**
|
70
|
+
* call-seq:
|
71
|
+
* str.to_u(encoding = 'utf8') => String
|
72
|
+
*
|
73
|
+
* Converts String value in given encoding to UString.
|
74
|
+
* When no encoding is given, utf8 is assumed. If string is not valid UTF8,
|
75
|
+
* and no encoding is given, exception is raised.
|
76
|
+
*
|
77
|
+
* When explicit encoding is given, converter will replace incorrect codepoints
|
78
|
+
* with <U+FFFD> - replacement character.
|
79
|
+
*/
|
80
|
+
VALUE
|
81
|
+
icu_from_rstr(argc, argv, str)
|
82
|
+
int argc;
|
83
|
+
VALUE *argv,
|
84
|
+
str;
|
85
|
+
{
|
86
|
+
VALUE enc;
|
87
|
+
char *encoding = 0; /* default */
|
88
|
+
UErrorCode error = 0;
|
89
|
+
int32_t capa, len;
|
90
|
+
VALUE s;
|
91
|
+
UChar * buf;
|
92
|
+
UConverter * conv;
|
93
|
+
if (rb_scan_args(argc, argv, "01", &enc) == 1) {
|
94
|
+
Check_Type(enc, T_STRING);
|
95
|
+
encoding = RSTRING(enc)->ptr;
|
96
|
+
}
|
97
|
+
capa = RSTRING(str)->len + 1;
|
98
|
+
buf = ALLOC_N(UChar, capa);
|
99
|
+
|
100
|
+
if(! encoding || !strncmp(encoding, "utf8", 4) ) {
|
101
|
+
/* from UTF8 */
|
102
|
+
u_strFromUTF8(buf, capa-1, &len, RSTRING(str)->ptr, RSTRING(str)->len, &error);
|
103
|
+
if( U_FAILURE(error)) {
|
104
|
+
free(buf);
|
105
|
+
rb_raise(rb_eArgError, u_errorName(error));
|
106
|
+
}
|
107
|
+
s = icu_ustr_new_set(buf, len, capa);
|
108
|
+
} else {
|
109
|
+
conv = ucnv_open(encoding, &error);
|
110
|
+
if (U_FAILURE(error)) {
|
111
|
+
ucnv_close(conv);
|
112
|
+
rb_raise(rb_eArgError, u_errorName(error));
|
113
|
+
}
|
114
|
+
len = ucnv_toUChars(conv, buf, capa-1, RSTRING(str)->ptr,
|
115
|
+
RSTRING(str)->len, &error);
|
116
|
+
if (U_BUFFER_OVERFLOW_ERROR == error) {
|
117
|
+
capa = len+1;
|
118
|
+
REALLOC_N(buf, UChar, capa);
|
119
|
+
error = 0;
|
120
|
+
len = ucnv_toUChars(conv, buf, capa-1, RSTRING(str)->ptr,
|
121
|
+
RSTRING(str)->len, &error);
|
122
|
+
if (U_FAILURE(error)) {
|
123
|
+
free(buf);
|
124
|
+
rb_raise(rb_eArgError, u_errorName(error));
|
125
|
+
}
|
126
|
+
|
127
|
+
}
|
128
|
+
s = icu_ustr_new_set(buf, len, capa);
|
129
|
+
ucnv_close(conv);
|
130
|
+
}
|
131
|
+
return s;
|
132
|
+
}
|
133
|
+
|
134
|
+
/**
|
135
|
+
* call-seq:
|
136
|
+
* u(str, enc = 'utf8') => UString
|
137
|
+
*
|
138
|
+
* Global function to convert from String to UString
|
139
|
+
*/
|
140
|
+
VALUE
|
141
|
+
icu_f_rb_str(argc, argv, obj)
|
142
|
+
int argc;
|
143
|
+
VALUE *argv;
|
144
|
+
VALUE obj;
|
145
|
+
{
|
146
|
+
VALUE enc;
|
147
|
+
VALUE str;
|
148
|
+
if (rb_scan_args(argc, argv, "11", &str, &enc) == 2) {
|
149
|
+
Check_Type(enc, T_STRING);
|
150
|
+
Check_Type(str, T_STRING);
|
151
|
+
return icu_from_rstr(1, &enc, str);
|
152
|
+
} else {
|
153
|
+
Check_Type(str, T_STRING);
|
154
|
+
return icu_from_rstr(0, NULL, str);
|
155
|
+
}
|
156
|
+
|
157
|
+
}
|
158
|
+
|
159
|
+
void initialize_ucore_ext(void)
|
160
|
+
{
|
161
|
+
/* conversion from String to UString */
|
162
|
+
rb_define_method(rb_cString, "to_u", icu_from_rstr, -1);
|
163
|
+
rb_define_alias(rb_cString, "u", "to_u");
|
164
|
+
rb_define_global_function("u", icu_f_rb_str, -1);
|
165
|
+
|
166
|
+
/* conversion from Array to UString */
|
167
|
+
rb_define_method(rb_cArray, "to_u", icu_ustr_from_array, 0);
|
168
|
+
}
|