icu4r 0.1.3.2006.01.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ucore_ext.c ADDED
@@ -0,0 +1,168 @@
1
+ #include "icu_common.h"
2
+ extern VALUE rb_cUString;
3
+ extern VALUE icu_ustr_new_set(const UChar * str, long len, long capa);
4
+
5
+ /**
6
+ * call-seq:
7
+ * ary.to_u => anUString
8
+ *
9
+ * Creates UString from array of fixnums, representing Unicode codepoints.
10
+ * (inversion of UString#codepoints)
11
+ *
12
+ * a = "поддержка".to_u.codepoints # => [1087, 1086, 1076, 1076, 1077, 1088, 1078, 1082, 1072]
13
+ * a.to_u # => "поддержка"
14
+ *
15
+ */
16
+ VALUE icu_ustr_from_array(obj)
17
+ VALUE obj;
18
+ {
19
+ int i, n;
20
+ VALUE *p;
21
+ VALUE ret, temp;
22
+ UChar32 * src , *pos, chr;
23
+ UChar * buf;
24
+ int32_t len, capa;
25
+ UErrorCode status = U_ZERO_ERROR;
26
+
27
+ n = RARRAY(obj)->len;
28
+ p = RARRAY(obj)->ptr;
29
+
30
+ src = ALLOC_N(UChar32, n);
31
+ pos = src;
32
+ for ( i = 0; i < n; i++){
33
+ temp = p[i];
34
+ if(TYPE(temp) != T_FIXNUM) {
35
+ free(src);
36
+ rb_raise(rb_eTypeError, "Can't convert from %s", rb_class2name(CLASS_OF(temp)));
37
+ }
38
+ chr = (UChar32) FIX2INT(temp);
39
+ // invalid codepoints are converted to U+FFFD
40
+ if( ! (U_IS_UNICODE_CHAR(chr)) ) {
41
+ chr = 0xFFFD;
42
+ }
43
+ *pos = chr;
44
+ pos ++;
45
+ }
46
+ capa = n+1;
47
+ buf = ALLOC_N(UChar, capa);
48
+ u_strFromUTF32(buf, capa, &len, src, n, &status);
49
+ if( U_BUFFER_OVERFLOW_ERROR == status ){
50
+ capa = len+1;
51
+ REALLOC_N(buf, UChar, capa);
52
+ status = U_ZERO_ERROR;
53
+ u_strFromUTF32(buf, capa, &len, src, n, &status);
54
+ }
55
+ if (U_FAILURE(status) ) {
56
+ free(src);
57
+ free(buf);
58
+ rb_raise(rb_eRuntimeError, u_errorName(status));
59
+ }
60
+ if( capa <= len ){
61
+ ++capa;
62
+ REALLOC_N(buf, UChar, capa);
63
+ }
64
+ ret = icu_ustr_new_set(buf, len, capa);
65
+ free(src);
66
+ return ret;
67
+ }
68
+
69
+ /**
70
+ * call-seq:
71
+ * str.to_u(encoding = 'utf8') => String
72
+ *
73
+ * Converts String value in given encoding to UString.
74
+ * When no encoding is given, utf8 is assumed. If string is not valid UTF8,
75
+ * and no encoding is given, exception is raised.
76
+ *
77
+ * When explicit encoding is given, converter will replace incorrect codepoints
78
+ * with <U+FFFD> - replacement character.
79
+ */
80
+ VALUE
81
+ icu_from_rstr(argc, argv, str)
82
+ int argc;
83
+ VALUE *argv,
84
+ str;
85
+ {
86
+ VALUE enc;
87
+ char *encoding = 0; /* default */
88
+ UErrorCode error = 0;
89
+ int32_t capa, len;
90
+ VALUE s;
91
+ UChar * buf;
92
+ UConverter * conv;
93
+ if (rb_scan_args(argc, argv, "01", &enc) == 1) {
94
+ Check_Type(enc, T_STRING);
95
+ encoding = RSTRING(enc)->ptr;
96
+ }
97
+ capa = RSTRING(str)->len + 1;
98
+ buf = ALLOC_N(UChar, capa);
99
+
100
+ if(! encoding || !strncmp(encoding, "utf8", 4) ) {
101
+ /* from UTF8 */
102
+ u_strFromUTF8(buf, capa-1, &len, RSTRING(str)->ptr, RSTRING(str)->len, &error);
103
+ if( U_FAILURE(error)) {
104
+ free(buf);
105
+ rb_raise(rb_eArgError, u_errorName(error));
106
+ }
107
+ s = icu_ustr_new_set(buf, len, capa);
108
+ } else {
109
+ conv = ucnv_open(encoding, &error);
110
+ if (U_FAILURE(error)) {
111
+ ucnv_close(conv);
112
+ rb_raise(rb_eArgError, u_errorName(error));
113
+ }
114
+ len = ucnv_toUChars(conv, buf, capa-1, RSTRING(str)->ptr,
115
+ RSTRING(str)->len, &error);
116
+ if (U_BUFFER_OVERFLOW_ERROR == error) {
117
+ capa = len+1;
118
+ REALLOC_N(buf, UChar, capa);
119
+ error = 0;
120
+ len = ucnv_toUChars(conv, buf, capa-1, RSTRING(str)->ptr,
121
+ RSTRING(str)->len, &error);
122
+ if (U_FAILURE(error)) {
123
+ free(buf);
124
+ rb_raise(rb_eArgError, u_errorName(error));
125
+ }
126
+
127
+ }
128
+ s = icu_ustr_new_set(buf, len, capa);
129
+ ucnv_close(conv);
130
+ }
131
+ return s;
132
+ }
133
+
134
+ /**
135
+ * call-seq:
136
+ * u(str, enc = 'utf8') => UString
137
+ *
138
+ * Global function to convert from String to UString
139
+ */
140
+ VALUE
141
+ icu_f_rb_str(argc, argv, obj)
142
+ int argc;
143
+ VALUE *argv;
144
+ VALUE obj;
145
+ {
146
+ VALUE enc;
147
+ VALUE str;
148
+ if (rb_scan_args(argc, argv, "11", &str, &enc) == 2) {
149
+ Check_Type(enc, T_STRING);
150
+ Check_Type(str, T_STRING);
151
+ return icu_from_rstr(1, &enc, str);
152
+ } else {
153
+ Check_Type(str, T_STRING);
154
+ return icu_from_rstr(0, NULL, str);
155
+ }
156
+
157
+ }
158
+
159
+ void initialize_ucore_ext(void)
160
+ {
161
+ /* conversion from String to UString */
162
+ rb_define_method(rb_cString, "to_u", icu_from_rstr, -1);
163
+ rb_define_alias(rb_cString, "u", "to_u");
164
+ rb_define_global_function("u", icu_f_rb_str, -1);
165
+
166
+ /* conversion from Array to UString */
167
+ rb_define_method(rb_cArray, "to_u", icu_ustr_from_array, 0);
168
+ }