icu 0.9.1 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.travis.yml +11 -0
  4. data/Gemfile +3 -0
  5. data/LICENSE +20 -0
  6. data/README.md +69 -0
  7. data/Rakefile +38 -0
  8. data/benchmark/normalization.rb +106 -0
  9. data/benchmark/normalization_phrases.txt +1031 -0
  10. data/benchmark/normalization_result.txt +45 -0
  11. data/benchmark/normalization_wikip.txt +2838 -0
  12. data/ext/icu/extconf.rb +242 -0
  13. data/ext/icu/icu.c +18 -0
  14. data/ext/icu/icu.h +78 -0
  15. data/ext/icu/icu_charset_detector.c +192 -0
  16. data/ext/icu/icu_collator.c +138 -0
  17. data/ext/icu/icu_locale.c +852 -0
  18. data/ext/icu/icu_normalizer.c +122 -0
  19. data/ext/icu/icu_number_format.c +0 -0
  20. data/ext/icu/icu_spoof_checker.c +194 -0
  21. data/ext/icu/icu_transliterator.c +159 -0
  22. data/ext/icu/internal_encoding.c +38 -0
  23. data/ext/icu/internal_ustring.c +304 -0
  24. data/ext/icu/internal_utils.c +50 -0
  25. data/ext/icu/rb_errors.c +14 -0
  26. data/icu.gemspec +22 -0
  27. data/lib/icu.rb +6 -18
  28. data/lib/icu/charset_detector.rb +5 -0
  29. data/lib/icu/collator.rb +24 -0
  30. data/lib/icu/locale.rb +19 -0
  31. data/lib/icu/transliterator.rb +8 -0
  32. data/lib/icu/version.rb +3 -0
  33. data/spec/charset_detector_spec.rb +47 -0
  34. data/spec/collator_spec.rb +73 -0
  35. data/spec/locale_spec.rb +312 -0
  36. data/spec/normalizer_spec.rb +35 -0
  37. data/spec/spec_helper.rb +8 -0
  38. data/spec/spoof_checker_spec.rb +56 -0
  39. data/spec/transliterator_spec.rb +41 -0
  40. metadata +132 -55
  41. data/COPYING +0 -674
  42. data/COPYING.LESSER +0 -165
  43. data/README +0 -81
  44. data/ext/extconf.rb +0 -31
  45. data/ext/icu.c +0 -128
  46. data/ext/icu.h +0 -34
  47. data/ext/icu_locale.c +0 -330
  48. data/ext/icu_locale_country.c +0 -99
  49. data/ext/icu_locale_language.c +0 -99
  50. data/ext/icu_numeric.c +0 -161
  51. data/ext/icu_time.c +0 -391
  52. data/test/test_locale.rb +0 -73
  53. data/test/test_numeric.rb +0 -78
  54. data/test/test_time.rb +0 -75
@@ -0,0 +1,138 @@
1
+ #include "icu.h"
2
+ #include "unicode/ucol.h"
3
+
4
+ #define GET_COLLATOR(_data) icu_collator_data* _data; \
5
+ TypedData_Get_Struct(self, icu_collator_data, &icu_collator_type, _data)
6
+
7
+ VALUE rb_cICU_Collator;
8
+ static ID ID_valid;
9
+
10
+ typedef struct {
11
+ VALUE rb_instance;
12
+ int enc_idx; // TODO: reexamine the necessary for this?
13
+ UCollator* service;
14
+ } icu_collator_data;
15
+
16
+ static void collator_free(void* _this)
17
+ {
18
+ icu_collator_data* this = _this;
19
+ ucol_close(this->service);
20
+ }
21
+
22
+ static size_t collator_memsize(const void* _)
23
+ {
24
+ return sizeof(icu_collator_data);
25
+ }
26
+
27
+ static const rb_data_type_t icu_collator_type = {
28
+ "icu/collator",
29
+ {NULL, collator_free, collator_memsize,},
30
+ 0, 0,
31
+ RUBY_TYPED_FREE_IMMEDIATELY,
32
+ };
33
+
34
+ VALUE collator_alloc(VALUE self)
35
+ {
36
+ icu_collator_data* this;
37
+ return TypedData_Make_Struct(self, icu_collator_data, &icu_collator_type, this);
38
+ }
39
+
40
+ VALUE collator_initialize(VALUE self, VALUE locale)
41
+ {
42
+ StringValue(locale);
43
+ GET_COLLATOR(this);
44
+
45
+ this->enc_idx = 0;
46
+ this->rb_instance = self;
47
+ UErrorCode status = U_ZERO_ERROR;
48
+ this->service = ucol_open(StringValueCStr(locale), &status);
49
+ if (U_FAILURE(status)) {
50
+ icu_rb_raise_icu_error(status);
51
+ }
52
+
53
+ return self;
54
+ }
55
+
56
+ /*ULOC_ACTUAL_LOCALE
57
+ This is locale the data actually comes from.
58
+
59
+ ULOC_VALID_LOCALE
60
+ This is the most specific locale supported by ICU. */
61
+ VALUE collator_locale(int argc, VALUE* argv, VALUE self)
62
+ {
63
+ GET_COLLATOR(this);
64
+ VALUE valid;
65
+
66
+ rb_scan_args(argc, argv, "01", &valid);
67
+ if (NIL_P(valid)) {
68
+ valid = ID2SYM(ID_valid);
69
+ }
70
+
71
+ ULocDataLocaleType type = ULOC_VALID_LOCALE;
72
+ if (SYM2ID(valid) != ID_valid) {
73
+ type = ULOC_ACTUAL_LOCALE;
74
+ }
75
+ UErrorCode status = U_ZERO_ERROR;
76
+ const char* locale_str = ucol_getLocaleByType(this->service, type, &status);
77
+ if (U_FAILURE(status)) {
78
+ icu_rb_raise_icu_error(status);
79
+ }
80
+ return locale_str != NULL ? rb_str_new_cstr(locale_str) : Qnil;
81
+ }
82
+
83
+ VALUE collator_compare(VALUE self, VALUE str_a, VALUE str_b)
84
+ {
85
+ StringValue(str_a);
86
+ StringValue(str_b);
87
+ GET_COLLATOR(this);
88
+ UCollationResult result = UCOL_EQUAL;
89
+
90
+ if (icu_is_rb_str_as_utf_8(str_a) &&
91
+ icu_is_rb_str_as_utf_8(str_b)) {
92
+ UErrorCode status = U_ZERO_ERROR;
93
+ result = ucol_strcollUTF8(this->service,
94
+ RSTRING_PTR(str_a),
95
+ RSTRING_LENINT(str_a),
96
+ RSTRING_PTR(str_b),
97
+ RSTRING_LENINT(str_b),
98
+ &status);
99
+ if (U_FAILURE(status)) {
100
+ icu_rb_raise_icu_error(status);
101
+ }
102
+ } else {
103
+ VALUE tmp_a = icu_ustring_from_rb_str(str_a);
104
+ VALUE tmp_b = icu_ustring_from_rb_str(str_b);
105
+ result = ucol_strcoll(this->service,
106
+ icu_ustring_ptr(tmp_a), icu_ustring_len(tmp_a),
107
+ icu_ustring_ptr(tmp_b), icu_ustring_len(tmp_b));
108
+ }
109
+
110
+ return INT2NUM(result);
111
+ }
112
+
113
+ VALUE collator_rules(VALUE self)
114
+ {
115
+ GET_COLLATOR(this);
116
+ int32_t len;
117
+ const UChar* res = ucol_getRules(this->service, &len);
118
+ VALUE str = icu_ustring_from_uchar_str(res, len);
119
+ VALUE ret = icu_ustring_to_rb_enc_str(str);
120
+ icu_ustring_clear_ptr(str);
121
+ return ret;
122
+ }
123
+
124
+ void init_icu_collator(void)
125
+ {
126
+ ID_valid = rb_intern("valid");
127
+
128
+ rb_cICU_Collator = rb_define_class_under(rb_mICU, "Collator", rb_cObject);
129
+ rb_define_alloc_func(rb_cICU_Collator, collator_alloc);
130
+ rb_define_method(rb_cICU_Collator, "initialize", collator_initialize, 1);
131
+ rb_define_method(rb_cICU_Collator, "locale", collator_locale, -1);
132
+ rb_define_method(rb_cICU_Collator, "compare", collator_compare, 2);
133
+ rb_define_method(rb_cICU_Collator, "rules", collator_rules, 0);
134
+ }
135
+
136
+ #undef GET_COLLATOR
137
+
138
+ /* vim: set expandtab sws=4 sw=4: */
@@ -0,0 +1,852 @@
1
+ #include "icu.h"
2
+ #include "unicode/uloc.h"
3
+ #include <string.h>
4
+ #include <stdlib.h>
5
+
6
+ VALUE rb_cICU_Locale;
7
+ static ID ID_ltr;
8
+ static ID ID_rtl;
9
+ static ID ID_ttb;
10
+ static ID ID_btt;
11
+ static ID ID_unknown;
12
+
13
+ VALUE locale_initialize(VALUE self, VALUE id)
14
+ {
15
+ id = rb_str_enc_to_ascii_as_utf8(id);
16
+ rb_iv_set(self, "@id", id);
17
+ return self;
18
+ }
19
+
20
+ inline static VALUE locale_new_from_cstr(const char* str)
21
+ {
22
+ VALUE rb_str = rb_str_new_cstr(str);
23
+ return locale_initialize(rb_obj_alloc(rb_cICU_Locale), rb_str_enc_to_ascii_as_utf8(rb_str));
24
+ }
25
+
26
+ VALUE locale_singleton_available(VALUE klass)
27
+ {
28
+ int32_t len = uloc_countAvailable();
29
+ VALUE result = rb_ary_new2(len);
30
+ for (int32_t i = 0; i < len; ++i) {
31
+ rb_ary_push(result, locale_new_from_cstr(uloc_getAvailable(i)));
32
+ }
33
+ return result;
34
+ }
35
+
36
+ static inline VALUE locale_singleton_get_default_internal(void)
37
+ {
38
+ const char* locale = uloc_getDefault();
39
+ return locale_new_from_cstr(locale);
40
+ }
41
+
42
+ VALUE locale_singleton_get_default(VALUE klass)
43
+ {
44
+ return locale_singleton_get_default_internal();
45
+ }
46
+
47
+ VALUE locale_singleton_set_default(VALUE klass, VALUE val)
48
+ {
49
+ val = rb_str_enc_to_ascii_as_utf8(val);
50
+ UErrorCode status = U_ZERO_ERROR;
51
+ uloc_setDefault(RSTRING_PTR(val), &status);
52
+ if (U_FAILURE(status)) {
53
+ icu_rb_raise_icu_error(status);
54
+ }
55
+ return locale_singleton_get_default_internal();
56
+ }
57
+
58
+ VALUE locale_singleton_for_language_tag(VALUE klass, VALUE tag)
59
+ {
60
+ tag = rb_str_enc_to_ascii_as_utf8(tag);
61
+ int32_t buffer_capa = 64;
62
+ char* buffer = char_buffer_new(buffer_capa);
63
+ UErrorCode status = U_ZERO_ERROR;
64
+ int retried = FALSE;
65
+ int32_t len;
66
+ do {
67
+ len = uloc_forLanguageTag(RSTRING_PTR(tag),
68
+ buffer,
69
+ buffer_capa,
70
+ NULL /* parsedLength of tag */,
71
+ &status);
72
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
73
+ retried = TRUE;
74
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
75
+ char_buffer_resize(buffer, buffer_capa);
76
+ status = U_ZERO_ERROR;
77
+ } else if (U_FAILURE(status)) {
78
+ char_buffer_free(buffer);
79
+ icu_rb_raise_icu_error(status);
80
+ } else { // retried == true && U_SUCCESS(status)
81
+ break;
82
+ }
83
+ } while (retried);
84
+ buffer[len] = '\0';
85
+
86
+ VALUE loc = locale_new_from_cstr(buffer);
87
+ char_buffer_free(buffer);
88
+
89
+ return loc;
90
+ }
91
+
92
+ VALUE locale_singleton_for_lcid(VALUE klass, VALUE lcid)
93
+ {
94
+ uint32_t host_id = NUM2UINT(lcid);
95
+ int32_t buffer_capa = 64;
96
+ char* buffer = char_buffer_new(buffer_capa);
97
+ UErrorCode status = U_ZERO_ERROR;
98
+ int retried = FALSE;
99
+ int32_t len;
100
+ do {
101
+ len = uloc_getLocaleForLCID(host_id,
102
+ buffer,
103
+ buffer_capa,
104
+ &status);
105
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
106
+ retried = TRUE;
107
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
108
+ char_buffer_resize(buffer, buffer_capa);
109
+ status = U_ZERO_ERROR;
110
+ } else if (U_FAILURE(status)) {
111
+ char_buffer_free(buffer);
112
+ icu_rb_raise_icu_error(status);
113
+ } else { // retried == true && U_SUCCESS(status)
114
+ break;
115
+ }
116
+ } while (retried);
117
+ buffer[len] = '\0';
118
+
119
+ VALUE res = locale_new_from_cstr(buffer);
120
+ char_buffer_free(buffer);
121
+ return res;
122
+ }
123
+
124
+ VALUE locale_singleton_iso_countries(VALUE klass)
125
+ {
126
+ const char* const* ary = uloc_getISOCountries();
127
+ VALUE result = rb_ary_new2(250); // the number of countries now is 249.
128
+ while (*ary != NULL) {
129
+ // NUL-terminated C string allocated by ICU, so use the buffer function
130
+ rb_ary_push(result, char_buffer_to_rb_str(*ary));
131
+ ary++;
132
+ }
133
+ return result;
134
+ }
135
+
136
+ VALUE locale_singleton_iso_languages(VALUE klass)
137
+ {
138
+ const char* const* ary = uloc_getISOLanguages();
139
+ VALUE result = rb_ary_new2(250); // the number of countries now is 249.
140
+ while (*ary != NULL) {
141
+ // NUL-terminated C string allocated by ICU, so use the buffer function
142
+ rb_ary_push(result, char_buffer_to_rb_str(*ary));
143
+ ary++;
144
+ }
145
+ return result;
146
+ }
147
+
148
+ VALUE locale_language_tag(int argc, VALUE* argv, VALUE self)
149
+ {
150
+ VALUE strict;
151
+ rb_scan_args(argc, argv, "01", &strict);
152
+ if (strict != Qtrue) {
153
+ strict = Qfalse;
154
+ }
155
+
156
+ VALUE id = rb_iv_get(self, "@id");
157
+ int32_t buffer_capa = 64;
158
+ char* buffer = char_buffer_new(buffer_capa);
159
+ UErrorCode status = U_ZERO_ERROR;
160
+ int retried = FALSE;
161
+ int32_t len;
162
+ do {
163
+ len = uloc_toLanguageTag(RSTRING_PTR(id),
164
+ buffer,
165
+ buffer_capa,
166
+ strict == Qtrue ? TRUE : FALSE,
167
+ &status);
168
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
169
+ retried = TRUE;
170
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
171
+ char_buffer_resize(buffer, buffer_capa);
172
+ status = U_ZERO_ERROR;
173
+ } else if (U_FAILURE(status)) {
174
+ char_buffer_free(buffer);
175
+ icu_rb_raise_icu_error(status);
176
+ } else { // retried == true && U_SUCCESS(status)
177
+ break;
178
+ }
179
+ } while (retried);
180
+ buffer[len] = '\0';
181
+
182
+ VALUE loc = char_buffer_to_rb_str(buffer);
183
+ char_buffer_free(buffer);
184
+ return loc;
185
+ }
186
+
187
+ VALUE locale_lcid(VALUE self)
188
+ {
189
+ VALUE id = rb_iv_get(self, "@id");
190
+ return ULONG2NUM(uloc_getLCID(RSTRING_PTR(id)));
191
+ }
192
+
193
+ VALUE locale_display_country(int argc, VALUE* argv, VALUE self)
194
+ {
195
+ VALUE display_locale;
196
+ rb_scan_args(argc, argv, "01", &display_locale);
197
+ if (!NIL_P(display_locale)) {
198
+ display_locale = rb_str_enc_to_ascii_as_utf8(display_locale);
199
+ }
200
+
201
+ VALUE id = rb_iv_get(self, "@id");
202
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
203
+ UErrorCode status = U_ZERO_ERROR;
204
+ int retried = FALSE;
205
+ int32_t len;
206
+
207
+ do {
208
+ len = uloc_getDisplayCountry(RSTRING_PTR(id),
209
+ NIL_P(display_locale) ? NULL : RSTRING_PTR(display_locale),
210
+ icu_ustring_ptr(buffer),
211
+ icu_ustring_capa(buffer),
212
+ &status);
213
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
214
+ retried = TRUE;
215
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
216
+ status = U_ZERO_ERROR;
217
+ } else if (U_FAILURE(status)) {
218
+ icu_rb_raise_icu_error(status);
219
+ } else { // retried == true && U_SUCCESS(status)
220
+ break;
221
+ }
222
+ } while (retried);
223
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
224
+ }
225
+
226
+ VALUE locale_display_language(int argc, VALUE* argv, VALUE self)
227
+ {
228
+ VALUE display_locale;
229
+ rb_scan_args(argc, argv, "01", &display_locale);
230
+ // if use NULL instead of the default, ICU fails which doesn't align with the doc at 59.1
231
+ display_locale = rb_str_enc_to_ascii_as_utf8((NIL_P(display_locale) ?
232
+ locale_singleton_get_default_internal() :
233
+ display_locale));
234
+
235
+ VALUE id = rb_iv_get(self, "@id");
236
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
237
+ UErrorCode status = U_ZERO_ERROR;
238
+ int retried = FALSE;
239
+ int32_t len;
240
+ do {
241
+ len = uloc_getDisplayLanguage(RSTRING_PTR(id),
242
+ RSTRING_PTR(display_locale),
243
+ icu_ustring_ptr(buffer),
244
+ icu_ustring_capa(buffer),
245
+ &status);
246
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
247
+ retried = TRUE;
248
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
249
+ status = U_ZERO_ERROR;
250
+ } else if (U_FAILURE(status)) {
251
+ icu_rb_raise_icu_error(status);
252
+ } else { // retried == true && U_SUCCESS(status)
253
+ break;
254
+ }
255
+ } while (retried);
256
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
257
+ }
258
+
259
+ VALUE locale_display_name(int argc, VALUE* argv, VALUE self)
260
+ {
261
+ VALUE display_locale;
262
+ rb_scan_args(argc, argv, "01", &display_locale);
263
+ if (!NIL_P(display_locale)) {
264
+ display_locale = rb_str_enc_to_ascii_as_utf8(display_locale);
265
+ }
266
+
267
+ VALUE id = rb_iv_get(self, "@id");
268
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
269
+ UErrorCode status = U_ZERO_ERROR;
270
+ int retried = FALSE;
271
+ int32_t len;
272
+ do {
273
+ len = uloc_getDisplayName(RSTRING_PTR(id),
274
+ NIL_P(display_locale) ? NULL : RSTRING_PTR(display_locale),
275
+ icu_ustring_ptr(buffer),
276
+ icu_ustring_capa(buffer),
277
+ &status);
278
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
279
+ retried = TRUE;
280
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
281
+ status = U_ZERO_ERROR;
282
+ } else if (U_FAILURE(status)) {
283
+ icu_rb_raise_icu_error(status);
284
+ } else { // retried == true && U_SUCCESS(status)
285
+ break;
286
+ }
287
+ } while (retried);
288
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
289
+ }
290
+
291
+ VALUE locale_display_script(int argc, VALUE* argv, VALUE self)
292
+ {
293
+ VALUE display_locale;
294
+ rb_scan_args(argc, argv, "01", &display_locale);
295
+ if (!NIL_P(display_locale)) {
296
+ display_locale = rb_str_enc_to_ascii_as_utf8(display_locale);
297
+ }
298
+
299
+ VALUE id = rb_iv_get(self, "@id");
300
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
301
+ UErrorCode status = U_ZERO_ERROR;
302
+ int retried = FALSE;
303
+ int32_t len;
304
+ do {
305
+ len = uloc_getDisplayScript(RSTRING_PTR(id),
306
+ NIL_P(display_locale) ? NULL : RSTRING_PTR(display_locale),
307
+ icu_ustring_ptr(buffer),
308
+ icu_ustring_capa(buffer),
309
+ &status);
310
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
311
+ retried = TRUE;
312
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
313
+ status = U_ZERO_ERROR;
314
+ } else if (U_FAILURE(status)) {
315
+ icu_rb_raise_icu_error(status);
316
+ } else { // retried == true && U_SUCCESS(status)
317
+ break;
318
+ }
319
+ } while (retried);
320
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
321
+ }
322
+
323
+ VALUE locale_display_variant(int argc, VALUE* argv, VALUE self)
324
+ {
325
+ VALUE display_locale;
326
+ rb_scan_args(argc, argv, "01", &display_locale);
327
+ if (!NIL_P(display_locale)) {
328
+ display_locale = rb_str_enc_to_ascii_as_utf8(display_locale);
329
+ }
330
+
331
+ VALUE id = rb_iv_get(self, "@id");
332
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
333
+ UErrorCode status = U_ZERO_ERROR;
334
+ int retried = FALSE;
335
+ int32_t len;
336
+ do {
337
+ len = uloc_getDisplayVariant(RSTRING_PTR(id),
338
+ NIL_P(display_locale) ? NULL : RSTRING_PTR(display_locale),
339
+ icu_ustring_ptr(buffer),
340
+ icu_ustring_capa(buffer),
341
+ &status);
342
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
343
+ retried = TRUE;
344
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
345
+ status = U_ZERO_ERROR;
346
+ } else if (U_FAILURE(status)) {
347
+ icu_rb_raise_icu_error(status);
348
+ } else { // retried == true && U_SUCCESS(status)
349
+ break;
350
+ }
351
+ } while (retried);
352
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
353
+ }
354
+
355
+ VALUE locale_name(VALUE self)
356
+ {
357
+ int32_t buffer_capa = 64;
358
+ VALUE id = rb_iv_get(self, "@id");
359
+ char* buffer = char_buffer_new(buffer_capa);
360
+ UErrorCode status = U_ZERO_ERROR;
361
+ int retried = FALSE;
362
+ int32_t len;
363
+ do {
364
+ len = uloc_getName(RSTRING_PTR(id),
365
+ buffer,
366
+ buffer_capa,
367
+ &status);
368
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
369
+ retried = TRUE;
370
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
371
+ char_buffer_resize(buffer, buffer_capa);
372
+ status = U_ZERO_ERROR;
373
+ } else if (U_FAILURE(status)) {
374
+ char_buffer_free(buffer);
375
+ icu_rb_raise_icu_error(status);
376
+ } else { // retried == true && U_SUCCESS(status)
377
+ break;
378
+ }
379
+ } while (retried);
380
+ buffer[len] = '\0';
381
+
382
+ VALUE res = char_buffer_to_rb_str(buffer);
383
+ char_buffer_free(buffer);
384
+ return res;
385
+ }
386
+
387
+ VALUE locale_base_name(VALUE self)
388
+ {
389
+ int32_t buffer_capa = 64;
390
+ VALUE id = rb_iv_get(self, "@id");
391
+ char* buffer = char_buffer_new(buffer_capa);
392
+ UErrorCode status = U_ZERO_ERROR;
393
+ int retried = FALSE;
394
+ int32_t len;
395
+ do {
396
+ len = uloc_getBaseName(RSTRING_PTR(id),
397
+ buffer,
398
+ buffer_capa,
399
+ &status);
400
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
401
+ retried = TRUE;
402
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
403
+ char_buffer_resize(buffer, buffer_capa);
404
+ status = U_ZERO_ERROR;
405
+ } else if (U_FAILURE(status)) {
406
+ char_buffer_free(buffer);
407
+ icu_rb_raise_icu_error(status);
408
+ } else { // retried == true && U_SUCCESS(status)
409
+ break;
410
+ }
411
+ } while (retried);
412
+ buffer[len] = '\0';
413
+
414
+ VALUE res = char_buffer_to_rb_str(buffer);
415
+ char_buffer_free(buffer);
416
+ return res;
417
+ }
418
+
419
+ VALUE locale_canonical_name(VALUE self)
420
+ {
421
+ int32_t buffer_capa = 64;
422
+ VALUE id = rb_iv_get(self, "@id");
423
+ char* buffer = char_buffer_new(buffer_capa);
424
+ UErrorCode status = U_ZERO_ERROR;
425
+ int retried = FALSE;
426
+ int32_t len;
427
+ do {
428
+ len = uloc_canonicalize(RSTRING_PTR(id),
429
+ buffer,
430
+ buffer_capa,
431
+ &status);
432
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
433
+ retried = TRUE;
434
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
435
+ char_buffer_resize(buffer, buffer_capa);
436
+ status = U_ZERO_ERROR;
437
+ } else if (U_FAILURE(status)) {
438
+ char_buffer_free(buffer);
439
+ icu_rb_raise_icu_error(status);
440
+ } else { // retried == true && U_SUCCESS(status)
441
+ break;
442
+ }
443
+ } while (retried);
444
+ buffer[len] = '\0';
445
+
446
+ VALUE res = char_buffer_to_rb_str(buffer);
447
+ char_buffer_free(buffer);
448
+ return res;
449
+ }
450
+
451
+ VALUE locale_parent(VALUE self)
452
+ {
453
+ int32_t buffer_capa = 64;
454
+ VALUE id = rb_iv_get(self, "@id");
455
+ char* buffer = char_buffer_new(buffer_capa);
456
+ UErrorCode status = U_ZERO_ERROR;
457
+ int retried = FALSE;
458
+ int32_t len;
459
+ do {
460
+ len = uloc_getParent(RSTRING_PTR(id),
461
+ buffer,
462
+ buffer_capa,
463
+ &status);
464
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
465
+ retried = TRUE;
466
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
467
+ char_buffer_resize(buffer, buffer_capa);
468
+ status = U_ZERO_ERROR;
469
+ } else if (U_FAILURE(status)) {
470
+ char_buffer_free(buffer);
471
+ icu_rb_raise_icu_error(status);
472
+ } else { // retried == true && U_SUCCESS(status)
473
+ break;
474
+ }
475
+ } while (retried);
476
+ buffer[len] = '\0';
477
+
478
+ VALUE res = char_buffer_to_rb_str(buffer);
479
+ char_buffer_free(buffer);
480
+ return res;
481
+ }
482
+
483
+ VALUE locale_iso_country(VALUE self)
484
+ {
485
+ VALUE id = rb_iv_get(self, "@id");
486
+ return rb_str_new_cstr(uloc_getISO3Country(RSTRING_PTR(id)));
487
+ }
488
+
489
+ VALUE locale_iso_language(VALUE self)
490
+ {
491
+ VALUE id = rb_iv_get(self, "@id");
492
+ return rb_str_new_cstr(uloc_getISO3Language(RSTRING_PTR(id)));
493
+ }
494
+
495
+ VALUE locale_keyword(VALUE self, VALUE keyword)
496
+ {
497
+ keyword = rb_str_enc_to_ascii_as_utf8(keyword);
498
+ int32_t buffer_capa = 64;
499
+ VALUE id = rb_iv_get(self, "@id");
500
+ char* buffer = char_buffer_new(buffer_capa);
501
+ UErrorCode status = U_ZERO_ERROR;
502
+ int retried = FALSE;
503
+ int32_t len;
504
+ do {
505
+ len = uloc_getKeywordValue(RSTRING_PTR(id),
506
+ RSTRING_PTR(keyword),
507
+ buffer,
508
+ buffer_capa,
509
+ &status);
510
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
511
+ retried = TRUE;
512
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
513
+ char_buffer_resize(buffer, buffer_capa);
514
+ status = U_ZERO_ERROR;
515
+ } else if (U_FAILURE(status)) {
516
+ char_buffer_free(buffer);
517
+ icu_rb_raise_icu_error(status);
518
+ } else { // retried == true && U_SUCCESS(status)
519
+ break;
520
+ }
521
+ } while (retried);
522
+ buffer[len] = '\0';
523
+
524
+ VALUE res = char_buffer_to_rb_str(buffer);
525
+ char_buffer_free(buffer);
526
+ return res;
527
+ }
528
+
529
+ VALUE locale_keywords(VALUE self)
530
+ {
531
+ VALUE id = rb_iv_get(self, "@id");
532
+ UErrorCode status = U_ZERO_ERROR;
533
+ UEnumeration* result = uloc_openKeywords(RSTRING_PTR(id), &status);
534
+ return icu_enum_to_rb_ary(result, status, 3);
535
+ }
536
+
537
+ // TODO: check the keyword and value
538
+ VALUE locale_with_keyword(VALUE self, VALUE keyword, VALUE value)
539
+ {
540
+ keyword = rb_str_enc_to_ascii_as_utf8(keyword);
541
+ int32_t len_keyword = RSTRING_LENINT(keyword);
542
+ if (len_keyword == 0) {
543
+ rb_raise(rb_eArgError, "invalid value for keyword: %+"PRIsVALUE, self);
544
+ }
545
+ if (!NIL_P(value)) {
546
+ value = rb_str_enc_to_ascii_as_utf8(value);
547
+ }
548
+
549
+ VALUE id = rb_iv_get(self, "@id");
550
+ int32_t len_id = RSTRING_LENINT(id);
551
+ int32_t buffer_capa = 64 + len_id + len_keyword + (NIL_P(value) ? 0 : RSTRING_LENINT(value));
552
+ char* buffer = char_buffer_new(buffer_capa);
553
+ memmove(buffer, RSTRING_PTR(id), len_id);
554
+ buffer[len_id] = '\0';
555
+
556
+ UErrorCode status = U_ZERO_ERROR;
557
+ int retried = FALSE;
558
+ int32_t buffer_required;
559
+ do {
560
+ buffer_required = uloc_setKeywordValue(RSTRING_PTR(keyword),
561
+ NIL_P(value) ? NULL : RSTRING_PTR(value),
562
+ buffer,
563
+ buffer_capa,
564
+ &status);
565
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
566
+ retried = TRUE;
567
+ buffer_capa = buffer_required;
568
+ char_buffer_resize(buffer, buffer_capa);
569
+ status = U_ZERO_ERROR;
570
+ } else if (U_FAILURE(status)) {
571
+ char_buffer_free(buffer);
572
+ icu_rb_raise_icu_error(status);
573
+ } else { // retried == true && U_SUCCESS(status)
574
+ break;
575
+ }
576
+ } while (retried);
577
+ // NUL is inserted by ICU when buffer is enough
578
+
579
+ VALUE res = locale_new_from_cstr(buffer);
580
+ char_buffer_free(buffer);
581
+ return res;
582
+ }
583
+
584
+ static inline VALUE locale_layout_symbol(ULayoutType result)
585
+ {
586
+ switch (result) {
587
+ case ULOC_LAYOUT_LTR:
588
+ return ID2SYM(ID_ltr);
589
+ case ULOC_LAYOUT_RTL:
590
+ return ID2SYM(ID_rtl);
591
+ case ULOC_LAYOUT_TTB:
592
+ return ID2SYM(ID_ttb);
593
+ case ULOC_LAYOUT_BTT:
594
+ return ID2SYM(ID_btt);
595
+ case ULOC_LAYOUT_UNKNOWN: default:
596
+ return ID2SYM(ID_unknown);
597
+ }
598
+ }
599
+
600
+ VALUE locale_character_orientation(VALUE self)
601
+ {
602
+ VALUE id = rb_iv_get(self, "@id");
603
+ UErrorCode status = U_ZERO_ERROR;
604
+ ULayoutType result = uloc_getCharacterOrientation(RSTRING_PTR(id), &status);
605
+ return locale_layout_symbol(result);
606
+ }
607
+
608
+ VALUE locale_line_orientation(VALUE self)
609
+ {
610
+ VALUE id = rb_iv_get(self, "@id");
611
+ UErrorCode status = U_ZERO_ERROR;
612
+ ULayoutType result = uloc_getLineOrientation(RSTRING_PTR(id), &status);
613
+ return locale_layout_symbol(result);
614
+ }
615
+
616
+ VALUE locale_country(VALUE self)
617
+ {
618
+ VALUE id = rb_iv_get(self, "@id");
619
+ int32_t buffer_capa = 64;
620
+ char* buffer = char_buffer_new(buffer_capa);
621
+ UErrorCode status = U_ZERO_ERROR;
622
+ int retried = FALSE;
623
+ int32_t len;
624
+ do {
625
+ len = uloc_getCountry(RSTRING_PTR(id),
626
+ buffer,
627
+ buffer_capa,
628
+ &status);
629
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
630
+ retried = TRUE;
631
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
632
+ char_buffer_resize(buffer, buffer_capa);
633
+ status = U_ZERO_ERROR;
634
+ } else if (U_FAILURE(status)) {
635
+ char_buffer_free(buffer);
636
+ icu_rb_raise_icu_error(status);
637
+ } else { // retried == true && U_SUCCESS(status)
638
+ break;
639
+ }
640
+ } while (retried);
641
+ buffer[len] = '\0';
642
+
643
+ VALUE loc = char_buffer_to_rb_str(buffer);
644
+ char_buffer_free(buffer);
645
+ return loc;
646
+ }
647
+
648
+ VALUE locale_language(VALUE self)
649
+ {
650
+ VALUE id = rb_iv_get(self, "@id");
651
+ int32_t buffer_capa = 64;
652
+ char* buffer = char_buffer_new(buffer_capa);
653
+ UErrorCode status = U_ZERO_ERROR;
654
+ int retried = FALSE;
655
+ int32_t len;
656
+ do {
657
+ len = uloc_getLanguage(RSTRING_PTR(id),
658
+ buffer,
659
+ buffer_capa,
660
+ &status);
661
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
662
+ retried = TRUE;
663
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
664
+ char_buffer_resize(buffer, buffer_capa);
665
+ status = U_ZERO_ERROR;
666
+ } else if (U_FAILURE(status)) {
667
+ char_buffer_free(buffer);
668
+ icu_rb_raise_icu_error(status);
669
+ } else { // retried == true && U_SUCCESS(status)
670
+ break;
671
+ }
672
+ } while (retried);
673
+ buffer[len] = '\0';
674
+
675
+ VALUE loc = char_buffer_to_rb_str(buffer);
676
+ char_buffer_free(buffer);
677
+ return loc;
678
+ }
679
+
680
+ VALUE locale_script(VALUE self)
681
+ {
682
+ VALUE id = rb_iv_get(self, "@id");
683
+ int32_t buffer_capa = 64;
684
+ char* buffer = char_buffer_new(buffer_capa);
685
+ UErrorCode status = U_ZERO_ERROR;
686
+ int retried = FALSE;
687
+ int32_t len;
688
+ do {
689
+ len = uloc_getScript(RSTRING_PTR(id),
690
+ buffer,
691
+ buffer_capa,
692
+ &status);
693
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
694
+ retried = TRUE;
695
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
696
+ char_buffer_resize(buffer, buffer_capa);
697
+ status = U_ZERO_ERROR;
698
+ } else if (U_FAILURE(status)) {
699
+ char_buffer_free(buffer);
700
+ icu_rb_raise_icu_error(status);
701
+ } else { // retried == true && U_SUCCESS(status)
702
+ break;
703
+ }
704
+ } while (retried);
705
+ buffer[len] = '\0';
706
+
707
+ VALUE res = char_buffer_to_rb_str(buffer);
708
+ char_buffer_free(buffer);
709
+ return res;
710
+ }
711
+
712
+ VALUE locale_variant(VALUE self)
713
+ {
714
+ VALUE id = rb_iv_get(self, "@id");
715
+ int32_t buffer_capa = 64;
716
+ char* buffer = char_buffer_new(buffer_capa);
717
+ UErrorCode status = U_ZERO_ERROR;
718
+ int retried = FALSE;
719
+ int32_t len;
720
+ do {
721
+ len = uloc_getVariant(RSTRING_PTR(id),
722
+ buffer,
723
+ buffer_capa,
724
+ &status);
725
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
726
+ retried = TRUE;
727
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
728
+ char_buffer_resize(buffer, buffer_capa);
729
+ status = U_ZERO_ERROR;
730
+ } else if (U_FAILURE(status)) {
731
+ char_buffer_free(buffer);
732
+ icu_rb_raise_icu_error(status);
733
+ } else { // retried == true && U_SUCCESS(status)
734
+ break;
735
+ }
736
+ } while (retried);
737
+ buffer[len] = '\0';
738
+
739
+ VALUE res = char_buffer_to_rb_str(buffer);
740
+ char_buffer_free(buffer);
741
+ return res;
742
+ }
743
+
744
+ VALUE locale_with_likely_subtags(VALUE self)
745
+ {
746
+ VALUE id = rb_iv_get(self, "@id");
747
+ int32_t buffer_capa = 64;
748
+ char* buffer = char_buffer_new(buffer_capa);
749
+ UErrorCode status = U_ZERO_ERROR;
750
+ int retried = FALSE;
751
+ int32_t len;
752
+ do {
753
+ len = uloc_addLikelySubtags(RSTRING_PTR(id),
754
+ buffer,
755
+ buffer_capa,
756
+ &status);
757
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
758
+ retried = TRUE;
759
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
760
+ char_buffer_resize(buffer, buffer_capa);
761
+ status = U_ZERO_ERROR;
762
+ } else if (U_FAILURE(status)) {
763
+ char_buffer_free(buffer);
764
+ icu_rb_raise_icu_error(status);
765
+ } else { // retried == true && U_SUCCESS(status)
766
+ break;
767
+ }
768
+ } while (retried);
769
+ buffer[len] = '\0';
770
+
771
+ VALUE res = locale_new_from_cstr(buffer);
772
+ char_buffer_free(buffer);
773
+ return res;
774
+ }
775
+
776
+ VALUE locale_with_minimized_subtags(VALUE self)
777
+ {
778
+ VALUE id = rb_iv_get(self, "@id");
779
+ int32_t buffer_capa = 64;
780
+ char* buffer = char_buffer_new(buffer_capa);
781
+ UErrorCode status = U_ZERO_ERROR;
782
+ int retried = FALSE;
783
+ int32_t len;
784
+ do {
785
+ len = uloc_minimizeSubtags(RSTRING_PTR(id),
786
+ buffer,
787
+ buffer_capa,
788
+ &status);
789
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
790
+ retried = TRUE;
791
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
792
+ char_buffer_resize(buffer, buffer_capa);
793
+ status = U_ZERO_ERROR;
794
+ } else if (U_FAILURE(status)) {
795
+ char_buffer_free(buffer);
796
+ icu_rb_raise_icu_error(status);
797
+ } else { // retried == true && U_SUCCESS(status)
798
+ break;
799
+ }
800
+ } while (retried);
801
+ buffer[len] = '\0';
802
+
803
+ VALUE res = locale_new_from_cstr(buffer);
804
+ char_buffer_free(buffer);
805
+ return res;
806
+ }
807
+
808
+ void init_icu_locale(void)
809
+ {
810
+ ID_ltr = rb_intern("ltr");
811
+ ID_rtl = rb_intern("rtl");
812
+ ID_ttb = rb_intern("ttb");
813
+ ID_btt = rb_intern("btt");
814
+ ID_unknown = rb_intern("unknown");
815
+
816
+ rb_cICU_Locale = rb_define_class_under(rb_mICU, "Locale", rb_cObject);
817
+ rb_define_singleton_method(rb_cICU_Locale, "available", locale_singleton_available, 0);
818
+ rb_define_singleton_method(rb_cICU_Locale, "default", locale_singleton_get_default, 0);
819
+ rb_define_singleton_method(rb_cICU_Locale, "default=", locale_singleton_set_default, 1);
820
+ rb_define_singleton_method(rb_cICU_Locale, "for_language_tag", locale_singleton_for_language_tag, 1);
821
+ rb_define_singleton_method(rb_cICU_Locale, "for_lcid", locale_singleton_for_lcid, 1);
822
+ rb_define_singleton_method(rb_cICU_Locale, "iso_countries", locale_singleton_iso_countries, 0);
823
+ rb_define_singleton_method(rb_cICU_Locale, "iso_languages", locale_singleton_iso_languages, 0);
824
+ rb_define_method(rb_cICU_Locale, "initialize", locale_initialize, 1);
825
+ rb_define_method(rb_cICU_Locale, "language_tag", locale_language_tag, -1);
826
+ rb_define_method(rb_cICU_Locale, "lcid", locale_lcid, 0);
827
+ rb_define_method(rb_cICU_Locale, "display_country", locale_display_country, -1);
828
+ rb_define_method(rb_cICU_Locale, "display_language", locale_display_language, -1);
829
+ rb_define_method(rb_cICU_Locale, "display_name", locale_display_name, -1);
830
+ rb_define_method(rb_cICU_Locale, "display_script", locale_display_script, -1);
831
+ rb_define_method(rb_cICU_Locale, "display_variant", locale_display_variant, -1);
832
+ rb_define_method(rb_cICU_Locale, "name", locale_name, 0);
833
+ rb_define_method(rb_cICU_Locale, "base_name", locale_base_name, 0);
834
+ rb_define_method(rb_cICU_Locale, "canonical_name", locale_canonical_name, 0);
835
+ rb_define_method(rb_cICU_Locale, "parent", locale_parent, 0);
836
+ rb_define_method(rb_cICU_Locale, "iso_country", locale_iso_country, 0);
837
+ rb_define_method(rb_cICU_Locale, "iso_language", locale_iso_language, 0);
838
+ rb_define_method(rb_cICU_Locale, "keyword", locale_keyword, 1);
839
+ rb_define_method(rb_cICU_Locale, "keywords", locale_keywords, 0);
840
+ rb_define_method(rb_cICU_Locale, "with_keyword", locale_with_keyword, 2);
841
+ rb_define_method(rb_cICU_Locale, "character_orientation", locale_character_orientation, 0);
842
+ rb_define_method(rb_cICU_Locale, "line_orientation", locale_line_orientation, 0);
843
+ rb_define_method(rb_cICU_Locale, "country", locale_country, 0);
844
+ rb_define_method(rb_cICU_Locale, "language", locale_language, 0);
845
+ rb_define_method(rb_cICU_Locale, "script", locale_script, 0);
846
+ rb_define_method(rb_cICU_Locale, "variant", locale_variant, 0);
847
+ rb_define_method(rb_cICU_Locale, "with_likely_subtags", locale_with_likely_subtags, 0);
848
+ rb_define_method(rb_cICU_Locale, "with_minimized_subtags", locale_with_minimized_subtags, 0);
849
+
850
+ }
851
+
852
+ /* vim: set expandtab sws=4 sw=4: */