icu4r 0.1.3.2006.01.26 → 0.1.4.2006.05.29

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,322 @@
1
+ #include "icu_common.h"
2
+ extern VALUE rb_cUString;
3
+ extern VALUE icu_ustr_new_set(UChar * ptr, long len, long capa);
4
+ extern VALUE rb_cUConverter;
5
+
6
+ #define UCONVERTER(obj) ((UConverter *)DATA_PTR(obj))
7
+
8
+ static void icu4r_cnv_free(UConverter * conv)
9
+ {
10
+ ucnv_close(conv);
11
+ }
12
+ static VALUE icu4r_cnv_alloc(VALUE klass)
13
+ {
14
+ return Data_Wrap_Struct(klass, 0, icu4r_cnv_free, 0);
15
+ }
16
+
17
+
18
+ /**
19
+ * call-seq:
20
+ * conv = UConverter.new(name)
21
+ *
22
+ * Creates new converter, by given name. Name must be a Ruby String and may contain
23
+ * additional options, e.g.:
24
+ *
25
+ * "SCSU,locale=ja" # Converter option for specifying a locale
26
+ * "UTF-7,version=1" # Converter option for specifying a version selector (0..9) for some converters.
27
+ * "ibm-1047,swaplfnl" # Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
28
+ *
29
+ * To get list of available converters call UConverter.list_available
30
+ */
31
+ VALUE icu4r_cnv_init(VALUE self, VALUE name)
32
+ {
33
+ UConverter * converter;
34
+ UErrorCode status = U_ZERO_ERROR;
35
+
36
+ Check_Type(name, T_STRING);
37
+ converter = ucnv_open(RSTRING(name)->ptr, &status);
38
+ ICU_RAISE(status);
39
+ DATA_PTR(self) = converter;
40
+ return self;
41
+ }
42
+ /**
43
+ * call-seq:
44
+ * UConverter.list_available # => Array
45
+ *
46
+ * Returns the names of available converters.
47
+ */
48
+ VALUE icu4r_cnv_list(VALUE self)
49
+ {
50
+ VALUE ret ;
51
+ int32_t count, i;
52
+ count = ucnv_countAvailable();
53
+ ret = rb_ary_new2(count);
54
+ for( i = 0; i < count ; i++)
55
+ {
56
+ rb_ary_store(ret, i, rb_str_new2(ucnv_getAvailableName(i)));
57
+ }
58
+ return ret;
59
+ }
60
+
61
+ /**
62
+ * call-seq:
63
+ * converter.subst_chars
64
+ *
65
+ * Returns substitution characters as multiple bytes
66
+ */
67
+ VALUE icu4r_cnv_get_subst_chars(VALUE self)
68
+ {
69
+ char buf[16];
70
+ int8_t len = 16;
71
+ UErrorCode status = U_ZERO_ERROR;
72
+ ucnv_getSubstChars(UCONVERTER(self), buf, &len, &status);
73
+ ICU_RAISE(status);
74
+ return rb_str_new(buf, len);
75
+ }
76
+
77
+ /**
78
+ * call-seq:
79
+ * converter.subst_chars=chars
80
+ *
81
+ * Sets the substitution chars when converting from unicode to a codepage.
82
+ * The substitution is specified as a string of 1-4 bytes
83
+ */
84
+ VALUE icu4r_cnv_set_subst_chars(VALUE self, VALUE str)
85
+ {
86
+ UErrorCode status = U_ZERO_ERROR;
87
+ Check_Type(str, T_STRING);
88
+ ucnv_setSubstChars(UCONVERTER(self), RSTRING(str)->ptr, RSTRING(str)->len, &status);
89
+ ICU_RAISE(status);
90
+ return Qnil;
91
+ }
92
+
93
+ /**
94
+ * call-seq:
95
+ * conv.name
96
+ *
97
+ * Gets the internal, canonical name of the converter.
98
+ */
99
+ VALUE icu4r_cnv_name(VALUE self)
100
+ {
101
+ UConverter * cnv = UCONVERTER(self);
102
+ UErrorCode status = U_ZERO_ERROR;
103
+ return rb_str_new2(ucnv_getName(cnv, &status));
104
+ }
105
+
106
+ /**
107
+ * call-seq:
108
+ * converter.reset
109
+ *
110
+ * Resets the state of a converter to the default state.
111
+ * This is used in the case of an error, to restart a conversion from a known default state.
112
+ * It will also empty the internal output buffers.
113
+ */
114
+ VALUE icu4r_cnv_reset(VALUE self)
115
+ {
116
+ UConverter * cnv = UCONVERTER(self);
117
+ ucnv_reset(cnv);
118
+ return Qnil;
119
+ }
120
+
121
+ /**
122
+ * call-seq:
123
+ * conv.from_u(ustring) -> String
124
+ *
125
+ * Convert the Unicode string into a codepage string using an existing UConverter.
126
+ */
127
+ VALUE icu4r_cnv_from_unicode(VALUE self, VALUE str)
128
+ {
129
+ UConverter * conv = UCONVERTER(self);
130
+ UErrorCode status = U_ZERO_ERROR;
131
+ int32_t enclen, capa;
132
+ char * buf;
133
+ VALUE s = Qnil;
134
+ Check_Class(str, rb_cUString);
135
+ capa = ICU_LEN(str) + 1;
136
+ buf = ALLOC_N(char, capa);
137
+ enclen = ucnv_fromUChars(conv, buf, capa-1, ICU_PTR(str), ICU_LEN(str), &status);
138
+ if (U_BUFFER_OVERFLOW_ERROR == status) {
139
+ REALLOC_N(buf, char, enclen + 1);
140
+ status = 0;
141
+ ucnv_fromUChars(conv, buf, enclen, ICU_PTR(str), ICU_LEN(str), &status);
142
+ }
143
+ if( U_FAILURE(status) ){
144
+ free(buf);
145
+ rb_raise(rb_eArgError, u_errorName(status));
146
+ }
147
+ s = rb_str_new(buf, enclen);
148
+ return s;
149
+ }
150
+
151
+ /**
152
+ * call-seq:
153
+ * conv.to_u(string) -> UString
154
+ *
155
+ * Convert the codepage string into a Unicode string using an existing UConverter.
156
+ */
157
+ VALUE icu4r_cnv_to_unicode(VALUE self, VALUE str)
158
+ {
159
+ UConverter * conv = UCONVERTER(self);
160
+ UErrorCode status = U_ZERO_ERROR;
161
+ long len, capa;
162
+ VALUE s;
163
+ UChar * buf;
164
+ Check_Type(str, T_STRING);
165
+ capa = RSTRING(str)->len + 1;
166
+ buf = ALLOC_N(UChar, capa);
167
+ len = ucnv_toUChars(conv, buf, capa-1, RSTRING(str)->ptr, RSTRING(str)->len, &status);
168
+ if (U_BUFFER_OVERFLOW_ERROR == status) {
169
+ capa = len+1;
170
+ REALLOC_N(buf, UChar, capa);
171
+ status = 0;
172
+ len = ucnv_toUChars(conv, buf, capa-1, RSTRING(str)->ptr, RSTRING(str)->len, &status);
173
+ if (U_FAILURE(status)) {
174
+ free(buf);
175
+ rb_raise(rb_eArgError, u_errorName(status));
176
+ }
177
+ }
178
+ s = icu_ustr_new_set(buf, len, capa);
179
+ return s;
180
+ }
181
+
182
+ #define BUF_SIZE 1024
183
+ /**
184
+ * call-seq:
185
+ * conv.convert(other_conv, string)
186
+ *
187
+ * Convert from one external charset to another using two existing UConverters,
188
+ * ignoring the location of errors.
189
+ */
190
+ VALUE icu4r_cnv_convert_to(VALUE self, VALUE other, VALUE src)
191
+ {
192
+ UConverter * cnv, * other_cnv;
193
+ UErrorCode status = U_ZERO_ERROR;
194
+ UChar pivotBuffer[BUF_SIZE];
195
+ UChar *pivot, *pivot2;
196
+ char * target,buffer[BUF_SIZE], *target_limit;
197
+ const char * src_ptr, * src_end;
198
+ VALUE ret;
199
+ Check_Class(other, rb_cUConverter);
200
+ Check_Type(src, T_STRING);
201
+ pivot=pivot2=pivotBuffer;
202
+ cnv = UCONVERTER(self);
203
+ other_cnv = UCONVERTER(other);
204
+ src_ptr = RSTRING(src)->ptr;
205
+ src_end = src_ptr + RSTRING(src)->len;
206
+ ret = rb_str_new2("");
207
+ ucnv_reset(other_cnv);
208
+ ucnv_reset(cnv);
209
+ target_limit = buffer+BUF_SIZE;
210
+ do {
211
+ status = U_ZERO_ERROR;
212
+ target = buffer;
213
+ ucnv_convertEx( other_cnv, cnv, &target, target_limit,
214
+ &src_ptr, src_end, pivotBuffer, &pivot, &pivot2, pivotBuffer+BUF_SIZE, FALSE, TRUE, &status);
215
+
216
+ if(U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
217
+ ICU_RAISE(status);
218
+ }
219
+ rb_str_buf_cat(ret, buffer, (int32_t)(target-buffer));
220
+ } while (status == U_BUFFER_OVERFLOW_ERROR);
221
+ return ret;
222
+ }
223
+
224
+ /**
225
+ * call-seq:
226
+ * UConverter.convert(to_converter_name, from_converter_name, source) # => String
227
+ *
228
+ * Convert from one external charset to another.
229
+ * Internally, two converters are opened according to the name arguments, then the text is converted to and from using them.
230
+ */
231
+ VALUE icu4r_cnv_convert(VALUE self, VALUE to_conv_name, VALUE from_conv_name, VALUE src)
232
+ {
233
+ UErrorCode status = U_ZERO_ERROR;
234
+ char * target = NULL;
235
+ int32_t target_capa, len;
236
+ VALUE ret;
237
+ target_capa = ucnv_convert( RSTRING(to_conv_name)->ptr, RSTRING(from_conv_name)->ptr,
238
+ target, 0,
239
+ RSTRING(src)->ptr, RSTRING(src)->len, &status);
240
+ if(status == U_BUFFER_OVERFLOW_ERROR){
241
+ status = U_ZERO_ERROR;
242
+ target_capa += 1;
243
+ target = ALLOC_N(char, target_capa);
244
+ len = ucnv_convert( RSTRING(to_conv_name)->ptr, RSTRING(from_conv_name)->ptr,
245
+ target, target_capa,
246
+ RSTRING(src)->ptr, RSTRING(src)->len, &status);
247
+ if(U_FAILURE(status)){
248
+ free(target);
249
+ ICU_RAISE(status);
250
+ }
251
+ ret = rb_str_new(target, len);
252
+ free(target);
253
+ return ret;
254
+ } else ICU_RAISE(status);
255
+ return rb_str_new2("");
256
+ }
257
+ /**
258
+ * call-seq:
259
+ * UConverter.std_names(conv_name, std_name)
260
+ *
261
+ * Returns list of alias names for a given converter that are recognized by a standard; MIME and IANA are such standards
262
+ */
263
+ VALUE icu4r_cnv_standard_names(VALUE self, VALUE cnv_name, VALUE std_name)
264
+ {
265
+ UEnumeration * name_list;
266
+ UErrorCode status = U_ZERO_ERROR;
267
+ VALUE ret ;
268
+ char * name;
269
+ int32_t len;
270
+ Check_Type(cnv_name, T_STRING);
271
+ Check_Type(std_name, T_STRING);
272
+ name_list = ucnv_openStandardNames(RSTRING(cnv_name)->ptr, RSTRING(std_name)->ptr, &status);
273
+ ICU_RAISE(status);
274
+ ret = rb_ary_new();
275
+ while( (name = (char*)uenum_next(name_list, &len, &status))) {
276
+ rb_ary_push(ret, rb_str_new2(name));
277
+ }
278
+ uenum_close(name_list);
279
+ return ret;
280
+ }
281
+
282
+ /**
283
+ * call-seq:
284
+ * UConverter.all_names
285
+ *
286
+ * Returns all of the canonical converter names, regardless of the ability to open each converter.
287
+ */
288
+ VALUE icu4r_cnv_all_names(VALUE self)
289
+ {
290
+ UEnumeration * name_list;
291
+ UErrorCode status = U_ZERO_ERROR;
292
+ VALUE ret ;
293
+ char * name;
294
+ int32_t len;
295
+ name_list = ucnv_openAllNames(&status);
296
+ ICU_RAISE(status);
297
+ ret = rb_ary_new();
298
+ while( (name = (char*)uenum_next(name_list, &len, &status))) {
299
+ rb_ary_push(ret, rb_str_new2(name));
300
+ }
301
+ uenum_close(name_list);
302
+ return ret;
303
+ }
304
+ void initialize_converter(void)
305
+ {
306
+ rb_cUConverter = rb_define_class("UConverter", rb_cObject);
307
+ rb_define_alloc_func(rb_cUConverter, icu4r_cnv_alloc);
308
+ rb_define_method(rb_cUConverter, "initialize", icu4r_cnv_init, 1);
309
+
310
+ rb_define_method(rb_cUConverter, "to_u", icu4r_cnv_to_unicode, 1);
311
+ rb_define_method(rb_cUConverter, "from_u", icu4r_cnv_from_unicode, 1);
312
+ rb_define_method(rb_cUConverter, "reset", icu4r_cnv_reset, 0);
313
+ rb_define_method(rb_cUConverter, "name", icu4r_cnv_name, 0);
314
+ rb_define_method(rb_cUConverter, "convert", icu4r_cnv_convert_to, 2);
315
+ rb_define_method(rb_cUConverter, "subst_chars=", icu4r_cnv_set_subst_chars, 1);
316
+ rb_define_method(rb_cUConverter, "subst_chars", icu4r_cnv_get_subst_chars, 0);
317
+ rb_define_singleton_method(rb_cUConverter, "convert", icu4r_cnv_convert, 3);
318
+ rb_define_singleton_method(rb_cUConverter, "list_available", icu4r_cnv_list, 0);
319
+ rb_define_singleton_method(rb_cUConverter, "std_names", icu4r_cnv_standard_names, 2);
320
+ rb_define_singleton_method(rb_cUConverter, "all_names", icu4r_cnv_all_names, 0);
321
+ }
322
+
data/extconf.rb CHANGED
@@ -11,5 +11,7 @@ File.open("Makefile", "a") << <<-EOT
11
11
  check: $(DLLIB)
12
12
  @$(RUBY) $(srcdir)/test/test_ustring.rb
13
13
  @$(RUBY) $(srcdir)/test/test_calendar.rb
14
+ @$(RUBY) $(srcdir)/test/test_converter.rb
15
+ @$(RUBY) $(srcdir)/test/test_collator.rb
14
16
 
15
17
  EOT
data/fmt.cpp CHANGED
@@ -42,40 +42,74 @@ extern VALUE icu_ustr_new_set(const UChar * str, long len, long capa);
42
42
  }
43
43
  }
44
44
  }
45
- UnicodeString patString(pattern,len);
45
+ UnicodeString * patString = new UnicodeString(pattern,len);
46
46
  UErrorCode status = U_ZERO_ERROR;
47
- UnicodeString resultStr;
48
- FieldPosition fieldPosition(0);
49
- MessageFormat * fmt= new MessageFormat(patString,Locale(locale), status);
47
+ UnicodeString * resultStr = new UnicodeString();
48
+ FieldPosition * fieldPosition = new FieldPosition(0);
49
+ Locale * loc = new Locale(locale);
50
+ int32_t blen ;
51
+ UChar * buf ;
52
+ VALUE ret ;
53
+
54
+ MessageFormat * fmt= new MessageFormat(*patString,*loc, status);
50
55
  if( U_FAILURE(status) ){
51
- rb_raise(rb_eArgError, "Can't format: %s", u_errorName(status));
56
+ goto cleanup;
52
57
  }
53
- fmt->format(arguments,arg_len,resultStr,fieldPosition,status);
58
+ fmt->format(arguments,arg_len,*resultStr,*fieldPosition,status);
54
59
  if( U_FAILURE(status) ){
55
- rb_raise(rb_eArgError, "Can't format: %s", u_errorName(status));
60
+ goto cleanup;
56
61
  }
57
- int32_t blen = resultStr.length();
58
- UChar * buf = ALLOC_N(UChar, blen + 1);
59
- resultStr.extract(buf, blen, status);
60
- VALUE ret = icu_ustr_new( buf, blen);
62
+ blen = resultStr->length();
63
+ buf = ALLOC_N(UChar, blen + 1);
64
+ resultStr->extract(buf, blen, status);
65
+ ret = icu_ustr_new( buf, blen);
61
66
  free(buf);
62
- delete[] arguments;
63
- delete fmt;
64
- return ret;
67
+
68
+ cleanup:
69
+ delete fmt;
70
+ delete [] arguments;
71
+ delete patString;
72
+ delete resultStr;
73
+ delete fieldPosition;
74
+ delete loc;
75
+
76
+ if( U_FAILURE(status) ){
77
+ rb_raise(rb_eArgError, "Can't format: %s", u_errorName(status));
78
+ }else {
79
+ return ret;
80
+ }
65
81
  }
66
- VALUE icu_date_parse(UChar * str, int32_t str_len, char * locale, UChar * val, int32_t len)
82
+ UCalendar * icu_date_parse(UChar * str, int32_t str_len, char * locale, UChar * val, int32_t len)
67
83
  {
68
84
  UErrorCode status = U_ZERO_ERROR;
69
- SimpleDateFormat formatter(UnicodeString(str, str_len), Locale(locale), status);
85
+ UCalendar * c;
86
+ c = ucal_open(NULL, -1, NULL, UCAL_GREGORIAN, &status);
70
87
  if( U_FAILURE(status) ) {
71
88
  rb_raise(rb_eArgError, u_errorName(status));
72
89
  }
73
- status = U_ZERO_ERROR;
74
- UDate p_time = formatter.parse(UnicodeString(val, len), status);
90
+ UnicodeString * temp = new UnicodeString(str, str_len);
91
+ Locale * loc = new Locale(locale);
92
+ SimpleDateFormat * formatter = new SimpleDateFormat(*temp, *loc, status);
75
93
  if( U_FAILURE(status) ) {
76
- rb_raise(rb_eArgError, u_errorName(status));
94
+ delete formatter;
95
+ delete temp;
96
+ delete loc;
97
+ rb_raise(rb_eArgError, "Can't create formatter:%s", u_errorName(status));
98
+ }
99
+ formatter->setLenient( 0 );
100
+ UnicodeString * val_str = new UnicodeString(val, len);
101
+ UDate p_time = formatter->parse(*val_str, status);
102
+ ucal_setMillis(c, p_time, &status);
103
+ delete formatter;
104
+ delete temp;
105
+ delete loc;
106
+ delete val_str;
107
+
108
+ if( U_FAILURE(status) ) {
109
+ ucal_close(c);
110
+ rb_raise(rb_eArgError, "Can't parse date:%s", u_errorName(status));
77
111
  }
78
- return rb_time_new( (time_t) (p_time/1000.0), 0);
112
+ return c;
79
113
  }
80
114
  VALUE icu_transliterate(UChar * str, int32_t str_len, UChar * id, int32_t id_len, UChar * rules, int32_t rule_len)
81
115
  {
@@ -111,33 +145,6 @@ VALUE icu4r_cal_clone(VALUE cal)
111
145
  return Data_Wrap_Struct(rb_cUCalendar, 0, icu4r_cal_free, clon);
112
146
  }
113
147
  #define CPP_CALENDAR(obj) ((Calendar*)DATA_PTR(obj))
114
- VALUE icu4r_cal_before(VALUE cal, VALUE obj)
115
- {
116
- UErrorCode status = U_ZERO_ERROR;
117
- UBool answer;
118
- Check_Class( obj, rb_cUCalendar);
119
- Calendar *other = CPP_CALENDAR(obj);
120
- answer = CPP_CALENDAR(cal)->before(*other, status);
121
- if( U_FAILURE(status) ) rb_raise(rb_eArgError, u_errorName(status));
122
- return answer ? Qtrue : Qfalse;
123
- }
124
-
125
- VALUE icu4r_cal_time_equals(VALUE cal, VALUE obj)
126
- {
127
- UErrorCode status = U_ZERO_ERROR;
128
- UBool answer;
129
- Check_Class( obj, rb_cUCalendar);
130
- Calendar *other = CPP_CALENDAR(obj);
131
- answer = CPP_CALENDAR(cal)->equals(*other, status);
132
- if( U_FAILURE(status) ) rb_raise(rb_eArgError, u_errorName(status));
133
- return answer ? Qtrue : Qfalse;
134
- }
135
-
136
- VALUE icu4r_cal_after(VALUE cal, VALUE obj)
137
- {
138
- Check_Class( obj, rb_cUCalendar);
139
- return icu4r_cal_before(obj, cal);
140
- }
141
148
 
142
149
  VALUE icu4r_cal_equal(VALUE cal, VALUE obj)
143
150
  {
@@ -147,4 +154,3 @@ VALUE icu4r_cal_equal(VALUE cal, VALUE obj)
147
154
  return answer ? Qtrue : Qfalse;
148
155
  }
149
156
  }
150
-