icu4r 0.1.3.2006.01.26

Sign up to get free protection for your applications and to get access to all the features.
data/ucore_ext.c ADDED
@@ -0,0 +1,168 @@
1
+ #include "icu_common.h"
2
+ extern VALUE rb_cUString;
3
+ extern VALUE icu_ustr_new_set(const UChar * str, long len, long capa);
4
+
5
+ /**
6
+ * call-seq:
7
+ * ary.to_u => anUString
8
+ *
9
+ * Creates UString from array of fixnums, representing Unicode codepoints.
10
+ * (inversion of UString#codepoints)
11
+ *
12
+ * a = "поддержка".to_u.codepoints # => [1087, 1086, 1076, 1076, 1077, 1088, 1078, 1082, 1072]
13
+ * a.to_u # => "поддержка"
14
+ *
15
+ */
16
+ VALUE icu_ustr_from_array(obj)
17
+ VALUE obj;
18
+ {
19
+ int i, n;
20
+ VALUE *p;
21
+ VALUE ret, temp;
22
+ UChar32 * src , *pos, chr;
23
+ UChar * buf;
24
+ int32_t len, capa;
25
+ UErrorCode status = U_ZERO_ERROR;
26
+
27
+ n = RARRAY(obj)->len;
28
+ p = RARRAY(obj)->ptr;
29
+
30
+ src = ALLOC_N(UChar32, n);
31
+ pos = src;
32
+ for ( i = 0; i < n; i++){
33
+ temp = p[i];
34
+ if(TYPE(temp) != T_FIXNUM) {
35
+ free(src);
36
+ rb_raise(rb_eTypeError, "Can't convert from %s", rb_class2name(CLASS_OF(temp)));
37
+ }
38
+ chr = (UChar32) FIX2INT(temp);
39
+ // invalid codepoints are converted to U+FFFD
40
+ if( ! (U_IS_UNICODE_CHAR(chr)) ) {
41
+ chr = 0xFFFD;
42
+ }
43
+ *pos = chr;
44
+ pos ++;
45
+ }
46
+ capa = n+1;
47
+ buf = ALLOC_N(UChar, capa);
48
+ u_strFromUTF32(buf, capa, &len, src, n, &status);
49
+ if( U_BUFFER_OVERFLOW_ERROR == status ){
50
+ capa = len+1;
51
+ REALLOC_N(buf, UChar, capa);
52
+ status = U_ZERO_ERROR;
53
+ u_strFromUTF32(buf, capa, &len, src, n, &status);
54
+ }
55
+ if (U_FAILURE(status) ) {
56
+ free(src);
57
+ free(buf);
58
+ rb_raise(rb_eRuntimeError, u_errorName(status));
59
+ }
60
+ if( capa <= len ){
61
+ ++capa;
62
+ REALLOC_N(buf, UChar, capa);
63
+ }
64
+ ret = icu_ustr_new_set(buf, len, capa);
65
+ free(src);
66
+ return ret;
67
+ }
68
+
69
+ /**
70
+ * call-seq:
71
+ * str.to_u(encoding = 'utf8') => String
72
+ *
73
+ * Converts String value in given encoding to UString.
74
+ * When no encoding is given, utf8 is assumed. If string is not valid UTF8,
75
+ * and no encoding is given, exception is raised.
76
+ *
77
+ * When explicit encoding is given, converter will replace incorrect codepoints
78
+ * with <U+FFFD> - replacement character.
79
+ */
80
+ VALUE
81
+ icu_from_rstr(argc, argv, str)
82
+ int argc;
83
+ VALUE *argv,
84
+ str;
85
+ {
86
+ VALUE enc;
87
+ char *encoding = 0; /* default */
88
+ UErrorCode error = 0;
89
+ int32_t capa, len;
90
+ VALUE s;
91
+ UChar * buf;
92
+ UConverter * conv;
93
+ if (rb_scan_args(argc, argv, "01", &enc) == 1) {
94
+ Check_Type(enc, T_STRING);
95
+ encoding = RSTRING(enc)->ptr;
96
+ }
97
+ capa = RSTRING(str)->len + 1;
98
+ buf = ALLOC_N(UChar, capa);
99
+
100
+ if(! encoding || !strncmp(encoding, "utf8", 4) ) {
101
+ /* from UTF8 */
102
+ u_strFromUTF8(buf, capa-1, &len, RSTRING(str)->ptr, RSTRING(str)->len, &error);
103
+ if( U_FAILURE(error)) {
104
+ free(buf);
105
+ rb_raise(rb_eArgError, u_errorName(error));
106
+ }
107
+ s = icu_ustr_new_set(buf, len, capa);
108
+ } else {
109
+ conv = ucnv_open(encoding, &error);
110
+ if (U_FAILURE(error)) {
111
+ ucnv_close(conv);
112
+ rb_raise(rb_eArgError, u_errorName(error));
113
+ }
114
+ len = ucnv_toUChars(conv, buf, capa-1, RSTRING(str)->ptr,
115
+ RSTRING(str)->len, &error);
116
+ if (U_BUFFER_OVERFLOW_ERROR == error) {
117
+ capa = len+1;
118
+ REALLOC_N(buf, UChar, capa);
119
+ error = 0;
120
+ len = ucnv_toUChars(conv, buf, capa-1, RSTRING(str)->ptr,
121
+ RSTRING(str)->len, &error);
122
+ if (U_FAILURE(error)) {
123
+ free(buf);
124
+ rb_raise(rb_eArgError, u_errorName(error));
125
+ }
126
+
127
+ }
128
+ s = icu_ustr_new_set(buf, len, capa);
129
+ ucnv_close(conv);
130
+ }
131
+ return s;
132
+ }
133
+
134
+ /**
135
+ * call-seq:
136
+ * u(str, enc = 'utf8') => UString
137
+ *
138
+ * Global function to convert from String to UString
139
+ */
140
+ VALUE
141
+ icu_f_rb_str(argc, argv, obj)
142
+ int argc;
143
+ VALUE *argv;
144
+ VALUE obj;
145
+ {
146
+ VALUE enc;
147
+ VALUE str;
148
+ if (rb_scan_args(argc, argv, "11", &str, &enc) == 2) {
149
+ Check_Type(enc, T_STRING);
150
+ Check_Type(str, T_STRING);
151
+ return icu_from_rstr(1, &enc, str);
152
+ } else {
153
+ Check_Type(str, T_STRING);
154
+ return icu_from_rstr(0, NULL, str);
155
+ }
156
+
157
+ }
158
+
159
+ void initialize_ucore_ext(void)
160
+ {
161
+ /* conversion from String to UString */
162
+ rb_define_method(rb_cString, "to_u", icu_from_rstr, -1);
163
+ rb_define_alias(rb_cString, "u", "to_u");
164
+ rb_define_global_function("u", icu_f_rb_str, -1);
165
+
166
+ /* conversion from Array to UString */
167
+ rb_define_method(rb_cArray, "to_u", icu_ustr_from_array, 0);
168
+ }