icu 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.travis.yml +11 -0
  4. data/Gemfile +3 -0
  5. data/LICENSE +20 -0
  6. data/README.md +69 -0
  7. data/Rakefile +38 -0
  8. data/benchmark/normalization.rb +106 -0
  9. data/benchmark/normalization_phrases.txt +1031 -0
  10. data/benchmark/normalization_result.txt +45 -0
  11. data/benchmark/normalization_wikip.txt +2838 -0
  12. data/ext/icu/extconf.rb +242 -0
  13. data/ext/icu/icu.c +18 -0
  14. data/ext/icu/icu.h +78 -0
  15. data/ext/icu/icu_charset_detector.c +192 -0
  16. data/ext/icu/icu_collator.c +138 -0
  17. data/ext/icu/icu_locale.c +852 -0
  18. data/ext/icu/icu_normalizer.c +122 -0
  19. data/ext/icu/icu_number_format.c +0 -0
  20. data/ext/icu/icu_spoof_checker.c +194 -0
  21. data/ext/icu/icu_transliterator.c +159 -0
  22. data/ext/icu/internal_encoding.c +38 -0
  23. data/ext/icu/internal_ustring.c +304 -0
  24. data/ext/icu/internal_utils.c +50 -0
  25. data/ext/icu/rb_errors.c +14 -0
  26. data/icu.gemspec +22 -0
  27. data/lib/icu.rb +6 -18
  28. data/lib/icu/charset_detector.rb +5 -0
  29. data/lib/icu/collator.rb +24 -0
  30. data/lib/icu/locale.rb +19 -0
  31. data/lib/icu/transliterator.rb +8 -0
  32. data/lib/icu/version.rb +3 -0
  33. data/spec/charset_detector_spec.rb +47 -0
  34. data/spec/collator_spec.rb +73 -0
  35. data/spec/locale_spec.rb +312 -0
  36. data/spec/normalizer_spec.rb +35 -0
  37. data/spec/spec_helper.rb +8 -0
  38. data/spec/spoof_checker_spec.rb +56 -0
  39. data/spec/transliterator_spec.rb +41 -0
  40. metadata +132 -55
  41. data/COPYING +0 -674
  42. data/COPYING.LESSER +0 -165
  43. data/README +0 -81
  44. data/ext/extconf.rb +0 -31
  45. data/ext/icu.c +0 -128
  46. data/ext/icu.h +0 -34
  47. data/ext/icu_locale.c +0 -330
  48. data/ext/icu_locale_country.c +0 -99
  49. data/ext/icu_locale_language.c +0 -99
  50. data/ext/icu_numeric.c +0 -161
  51. data/ext/icu_time.c +0 -391
  52. data/test/test_locale.rb +0 -73
  53. data/test/test_numeric.rb +0 -78
  54. data/test/test_time.rb +0 -75
@@ -0,0 +1,138 @@
1
+ #include "icu.h"
2
+ #include "unicode/ucol.h"
3
+
4
+ #define GET_COLLATOR(_data) icu_collator_data* _data; \
5
+ TypedData_Get_Struct(self, icu_collator_data, &icu_collator_type, _data)
6
+
7
+ VALUE rb_cICU_Collator;
8
+ static ID ID_valid;
9
+
10
+ typedef struct {
11
+ VALUE rb_instance;
12
+ int enc_idx; // TODO: reexamine the necessary for this?
13
+ UCollator* service;
14
+ } icu_collator_data;
15
+
16
+ static void collator_free(void* _this)
17
+ {
18
+ icu_collator_data* this = _this;
19
+ ucol_close(this->service);
20
+ }
21
+
22
+ static size_t collator_memsize(const void* _)
23
+ {
24
+ return sizeof(icu_collator_data);
25
+ }
26
+
27
+ static const rb_data_type_t icu_collator_type = {
28
+ "icu/collator",
29
+ {NULL, collator_free, collator_memsize,},
30
+ 0, 0,
31
+ RUBY_TYPED_FREE_IMMEDIATELY,
32
+ };
33
+
34
+ VALUE collator_alloc(VALUE self)
35
+ {
36
+ icu_collator_data* this;
37
+ return TypedData_Make_Struct(self, icu_collator_data, &icu_collator_type, this);
38
+ }
39
+
40
+ VALUE collator_initialize(VALUE self, VALUE locale)
41
+ {
42
+ StringValue(locale);
43
+ GET_COLLATOR(this);
44
+
45
+ this->enc_idx = 0;
46
+ this->rb_instance = self;
47
+ UErrorCode status = U_ZERO_ERROR;
48
+ this->service = ucol_open(StringValueCStr(locale), &status);
49
+ if (U_FAILURE(status)) {
50
+ icu_rb_raise_icu_error(status);
51
+ }
52
+
53
+ return self;
54
+ }
55
+
56
+ /*ULOC_ACTUAL_LOCALE
57
+ This is locale the data actually comes from.
58
+
59
+ ULOC_VALID_LOCALE
60
+ This is the most specific locale supported by ICU. */
61
+ VALUE collator_locale(int argc, VALUE* argv, VALUE self)
62
+ {
63
+ GET_COLLATOR(this);
64
+ VALUE valid;
65
+
66
+ rb_scan_args(argc, argv, "01", &valid);
67
+ if (NIL_P(valid)) {
68
+ valid = ID2SYM(ID_valid);
69
+ }
70
+
71
+ ULocDataLocaleType type = ULOC_VALID_LOCALE;
72
+ if (SYM2ID(valid) != ID_valid) {
73
+ type = ULOC_ACTUAL_LOCALE;
74
+ }
75
+ UErrorCode status = U_ZERO_ERROR;
76
+ const char* locale_str = ucol_getLocaleByType(this->service, type, &status);
77
+ if (U_FAILURE(status)) {
78
+ icu_rb_raise_icu_error(status);
79
+ }
80
+ return locale_str != NULL ? rb_str_new_cstr(locale_str) : Qnil;
81
+ }
82
+
83
+ VALUE collator_compare(VALUE self, VALUE str_a, VALUE str_b)
84
+ {
85
+ StringValue(str_a);
86
+ StringValue(str_b);
87
+ GET_COLLATOR(this);
88
+ UCollationResult result = UCOL_EQUAL;
89
+
90
+ if (icu_is_rb_str_as_utf_8(str_a) &&
91
+ icu_is_rb_str_as_utf_8(str_b)) {
92
+ UErrorCode status = U_ZERO_ERROR;
93
+ result = ucol_strcollUTF8(this->service,
94
+ RSTRING_PTR(str_a),
95
+ RSTRING_LENINT(str_a),
96
+ RSTRING_PTR(str_b),
97
+ RSTRING_LENINT(str_b),
98
+ &status);
99
+ if (U_FAILURE(status)) {
100
+ icu_rb_raise_icu_error(status);
101
+ }
102
+ } else {
103
+ VALUE tmp_a = icu_ustring_from_rb_str(str_a);
104
+ VALUE tmp_b = icu_ustring_from_rb_str(str_b);
105
+ result = ucol_strcoll(this->service,
106
+ icu_ustring_ptr(tmp_a), icu_ustring_len(tmp_a),
107
+ icu_ustring_ptr(tmp_b), icu_ustring_len(tmp_b));
108
+ }
109
+
110
+ return INT2NUM(result);
111
+ }
112
+
113
+ VALUE collator_rules(VALUE self)
114
+ {
115
+ GET_COLLATOR(this);
116
+ int32_t len;
117
+ const UChar* res = ucol_getRules(this->service, &len);
118
+ VALUE str = icu_ustring_from_uchar_str(res, len);
119
+ VALUE ret = icu_ustring_to_rb_enc_str(str);
120
+ icu_ustring_clear_ptr(str);
121
+ return ret;
122
+ }
123
+
124
+ void init_icu_collator(void)
125
+ {
126
+ ID_valid = rb_intern("valid");
127
+
128
+ rb_cICU_Collator = rb_define_class_under(rb_mICU, "Collator", rb_cObject);
129
+ rb_define_alloc_func(rb_cICU_Collator, collator_alloc);
130
+ rb_define_method(rb_cICU_Collator, "initialize", collator_initialize, 1);
131
+ rb_define_method(rb_cICU_Collator, "locale", collator_locale, -1);
132
+ rb_define_method(rb_cICU_Collator, "compare", collator_compare, 2);
133
+ rb_define_method(rb_cICU_Collator, "rules", collator_rules, 0);
134
+ }
135
+
136
+ #undef GET_COLLATOR
137
+
138
+ /* vim: set expandtab sws=4 sw=4: */
@@ -0,0 +1,852 @@
1
+ #include "icu.h"
2
+ #include "unicode/uloc.h"
3
+ #include <string.h>
4
+ #include <stdlib.h>
5
+
6
+ VALUE rb_cICU_Locale;
7
+ static ID ID_ltr;
8
+ static ID ID_rtl;
9
+ static ID ID_ttb;
10
+ static ID ID_btt;
11
+ static ID ID_unknown;
12
+
13
+ VALUE locale_initialize(VALUE self, VALUE id)
14
+ {
15
+ id = rb_str_enc_to_ascii_as_utf8(id);
16
+ rb_iv_set(self, "@id", id);
17
+ return self;
18
+ }
19
+
20
+ inline static VALUE locale_new_from_cstr(const char* str)
21
+ {
22
+ VALUE rb_str = rb_str_new_cstr(str);
23
+ return locale_initialize(rb_obj_alloc(rb_cICU_Locale), rb_str_enc_to_ascii_as_utf8(rb_str));
24
+ }
25
+
26
+ VALUE locale_singleton_available(VALUE klass)
27
+ {
28
+ int32_t len = uloc_countAvailable();
29
+ VALUE result = rb_ary_new2(len);
30
+ for (int32_t i = 0; i < len; ++i) {
31
+ rb_ary_push(result, locale_new_from_cstr(uloc_getAvailable(i)));
32
+ }
33
+ return result;
34
+ }
35
+
36
+ static inline VALUE locale_singleton_get_default_internal(void)
37
+ {
38
+ const char* locale = uloc_getDefault();
39
+ return locale_new_from_cstr(locale);
40
+ }
41
+
42
+ VALUE locale_singleton_get_default(VALUE klass)
43
+ {
44
+ return locale_singleton_get_default_internal();
45
+ }
46
+
47
+ VALUE locale_singleton_set_default(VALUE klass, VALUE val)
48
+ {
49
+ val = rb_str_enc_to_ascii_as_utf8(val);
50
+ UErrorCode status = U_ZERO_ERROR;
51
+ uloc_setDefault(RSTRING_PTR(val), &status);
52
+ if (U_FAILURE(status)) {
53
+ icu_rb_raise_icu_error(status);
54
+ }
55
+ return locale_singleton_get_default_internal();
56
+ }
57
+
58
+ VALUE locale_singleton_for_language_tag(VALUE klass, VALUE tag)
59
+ {
60
+ tag = rb_str_enc_to_ascii_as_utf8(tag);
61
+ int32_t buffer_capa = 64;
62
+ char* buffer = char_buffer_new(buffer_capa);
63
+ UErrorCode status = U_ZERO_ERROR;
64
+ int retried = FALSE;
65
+ int32_t len;
66
+ do {
67
+ len = uloc_forLanguageTag(RSTRING_PTR(tag),
68
+ buffer,
69
+ buffer_capa,
70
+ NULL /* parsedLength of tag */,
71
+ &status);
72
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
73
+ retried = TRUE;
74
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
75
+ char_buffer_resize(buffer, buffer_capa);
76
+ status = U_ZERO_ERROR;
77
+ } else if (U_FAILURE(status)) {
78
+ char_buffer_free(buffer);
79
+ icu_rb_raise_icu_error(status);
80
+ } else { // retried == true && U_SUCCESS(status)
81
+ break;
82
+ }
83
+ } while (retried);
84
+ buffer[len] = '\0';
85
+
86
+ VALUE loc = locale_new_from_cstr(buffer);
87
+ char_buffer_free(buffer);
88
+
89
+ return loc;
90
+ }
91
+
92
+ VALUE locale_singleton_for_lcid(VALUE klass, VALUE lcid)
93
+ {
94
+ uint32_t host_id = NUM2UINT(lcid);
95
+ int32_t buffer_capa = 64;
96
+ char* buffer = char_buffer_new(buffer_capa);
97
+ UErrorCode status = U_ZERO_ERROR;
98
+ int retried = FALSE;
99
+ int32_t len;
100
+ do {
101
+ len = uloc_getLocaleForLCID(host_id,
102
+ buffer,
103
+ buffer_capa,
104
+ &status);
105
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
106
+ retried = TRUE;
107
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
108
+ char_buffer_resize(buffer, buffer_capa);
109
+ status = U_ZERO_ERROR;
110
+ } else if (U_FAILURE(status)) {
111
+ char_buffer_free(buffer);
112
+ icu_rb_raise_icu_error(status);
113
+ } else { // retried == true && U_SUCCESS(status)
114
+ break;
115
+ }
116
+ } while (retried);
117
+ buffer[len] = '\0';
118
+
119
+ VALUE res = locale_new_from_cstr(buffer);
120
+ char_buffer_free(buffer);
121
+ return res;
122
+ }
123
+
124
+ VALUE locale_singleton_iso_countries(VALUE klass)
125
+ {
126
+ const char* const* ary = uloc_getISOCountries();
127
+ VALUE result = rb_ary_new2(250); // the number of countries now is 249.
128
+ while (*ary != NULL) {
129
+ // NUL-terminated C string allocated by ICU, so use the buffer function
130
+ rb_ary_push(result, char_buffer_to_rb_str(*ary));
131
+ ary++;
132
+ }
133
+ return result;
134
+ }
135
+
136
+ VALUE locale_singleton_iso_languages(VALUE klass)
137
+ {
138
+ const char* const* ary = uloc_getISOLanguages();
139
+ VALUE result = rb_ary_new2(250); // the number of countries now is 249.
140
+ while (*ary != NULL) {
141
+ // NUL-terminated C string allocated by ICU, so use the buffer function
142
+ rb_ary_push(result, char_buffer_to_rb_str(*ary));
143
+ ary++;
144
+ }
145
+ return result;
146
+ }
147
+
148
+ VALUE locale_language_tag(int argc, VALUE* argv, VALUE self)
149
+ {
150
+ VALUE strict;
151
+ rb_scan_args(argc, argv, "01", &strict);
152
+ if (strict != Qtrue) {
153
+ strict = Qfalse;
154
+ }
155
+
156
+ VALUE id = rb_iv_get(self, "@id");
157
+ int32_t buffer_capa = 64;
158
+ char* buffer = char_buffer_new(buffer_capa);
159
+ UErrorCode status = U_ZERO_ERROR;
160
+ int retried = FALSE;
161
+ int32_t len;
162
+ do {
163
+ len = uloc_toLanguageTag(RSTRING_PTR(id),
164
+ buffer,
165
+ buffer_capa,
166
+ strict == Qtrue ? TRUE : FALSE,
167
+ &status);
168
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
169
+ retried = TRUE;
170
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
171
+ char_buffer_resize(buffer, buffer_capa);
172
+ status = U_ZERO_ERROR;
173
+ } else if (U_FAILURE(status)) {
174
+ char_buffer_free(buffer);
175
+ icu_rb_raise_icu_error(status);
176
+ } else { // retried == true && U_SUCCESS(status)
177
+ break;
178
+ }
179
+ } while (retried);
180
+ buffer[len] = '\0';
181
+
182
+ VALUE loc = char_buffer_to_rb_str(buffer);
183
+ char_buffer_free(buffer);
184
+ return loc;
185
+ }
186
+
187
+ VALUE locale_lcid(VALUE self)
188
+ {
189
+ VALUE id = rb_iv_get(self, "@id");
190
+ return ULONG2NUM(uloc_getLCID(RSTRING_PTR(id)));
191
+ }
192
+
193
+ VALUE locale_display_country(int argc, VALUE* argv, VALUE self)
194
+ {
195
+ VALUE display_locale;
196
+ rb_scan_args(argc, argv, "01", &display_locale);
197
+ if (!NIL_P(display_locale)) {
198
+ display_locale = rb_str_enc_to_ascii_as_utf8(display_locale);
199
+ }
200
+
201
+ VALUE id = rb_iv_get(self, "@id");
202
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
203
+ UErrorCode status = U_ZERO_ERROR;
204
+ int retried = FALSE;
205
+ int32_t len;
206
+
207
+ do {
208
+ len = uloc_getDisplayCountry(RSTRING_PTR(id),
209
+ NIL_P(display_locale) ? NULL : RSTRING_PTR(display_locale),
210
+ icu_ustring_ptr(buffer),
211
+ icu_ustring_capa(buffer),
212
+ &status);
213
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
214
+ retried = TRUE;
215
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
216
+ status = U_ZERO_ERROR;
217
+ } else if (U_FAILURE(status)) {
218
+ icu_rb_raise_icu_error(status);
219
+ } else { // retried == true && U_SUCCESS(status)
220
+ break;
221
+ }
222
+ } while (retried);
223
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
224
+ }
225
+
226
+ VALUE locale_display_language(int argc, VALUE* argv, VALUE self)
227
+ {
228
+ VALUE display_locale;
229
+ rb_scan_args(argc, argv, "01", &display_locale);
230
+ // if use NULL instead of the default, ICU fails which doesn't align with the doc at 59.1
231
+ display_locale = rb_str_enc_to_ascii_as_utf8((NIL_P(display_locale) ?
232
+ locale_singleton_get_default_internal() :
233
+ display_locale));
234
+
235
+ VALUE id = rb_iv_get(self, "@id");
236
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
237
+ UErrorCode status = U_ZERO_ERROR;
238
+ int retried = FALSE;
239
+ int32_t len;
240
+ do {
241
+ len = uloc_getDisplayLanguage(RSTRING_PTR(id),
242
+ RSTRING_PTR(display_locale),
243
+ icu_ustring_ptr(buffer),
244
+ icu_ustring_capa(buffer),
245
+ &status);
246
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
247
+ retried = TRUE;
248
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
249
+ status = U_ZERO_ERROR;
250
+ } else if (U_FAILURE(status)) {
251
+ icu_rb_raise_icu_error(status);
252
+ } else { // retried == true && U_SUCCESS(status)
253
+ break;
254
+ }
255
+ } while (retried);
256
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
257
+ }
258
+
259
+ VALUE locale_display_name(int argc, VALUE* argv, VALUE self)
260
+ {
261
+ VALUE display_locale;
262
+ rb_scan_args(argc, argv, "01", &display_locale);
263
+ if (!NIL_P(display_locale)) {
264
+ display_locale = rb_str_enc_to_ascii_as_utf8(display_locale);
265
+ }
266
+
267
+ VALUE id = rb_iv_get(self, "@id");
268
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
269
+ UErrorCode status = U_ZERO_ERROR;
270
+ int retried = FALSE;
271
+ int32_t len;
272
+ do {
273
+ len = uloc_getDisplayName(RSTRING_PTR(id),
274
+ NIL_P(display_locale) ? NULL : RSTRING_PTR(display_locale),
275
+ icu_ustring_ptr(buffer),
276
+ icu_ustring_capa(buffer),
277
+ &status);
278
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
279
+ retried = TRUE;
280
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
281
+ status = U_ZERO_ERROR;
282
+ } else if (U_FAILURE(status)) {
283
+ icu_rb_raise_icu_error(status);
284
+ } else { // retried == true && U_SUCCESS(status)
285
+ break;
286
+ }
287
+ } while (retried);
288
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
289
+ }
290
+
291
+ VALUE locale_display_script(int argc, VALUE* argv, VALUE self)
292
+ {
293
+ VALUE display_locale;
294
+ rb_scan_args(argc, argv, "01", &display_locale);
295
+ if (!NIL_P(display_locale)) {
296
+ display_locale = rb_str_enc_to_ascii_as_utf8(display_locale);
297
+ }
298
+
299
+ VALUE id = rb_iv_get(self, "@id");
300
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
301
+ UErrorCode status = U_ZERO_ERROR;
302
+ int retried = FALSE;
303
+ int32_t len;
304
+ do {
305
+ len = uloc_getDisplayScript(RSTRING_PTR(id),
306
+ NIL_P(display_locale) ? NULL : RSTRING_PTR(display_locale),
307
+ icu_ustring_ptr(buffer),
308
+ icu_ustring_capa(buffer),
309
+ &status);
310
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
311
+ retried = TRUE;
312
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
313
+ status = U_ZERO_ERROR;
314
+ } else if (U_FAILURE(status)) {
315
+ icu_rb_raise_icu_error(status);
316
+ } else { // retried == true && U_SUCCESS(status)
317
+ break;
318
+ }
319
+ } while (retried);
320
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
321
+ }
322
+
323
+ VALUE locale_display_variant(int argc, VALUE* argv, VALUE self)
324
+ {
325
+ VALUE display_locale;
326
+ rb_scan_args(argc, argv, "01", &display_locale);
327
+ if (!NIL_P(display_locale)) {
328
+ display_locale = rb_str_enc_to_ascii_as_utf8(display_locale);
329
+ }
330
+
331
+ VALUE id = rb_iv_get(self, "@id");
332
+ VALUE buffer = icu_ustring_init_with_capa_enc(64, ICU_RUBY_ENCODING_INDEX);
333
+ UErrorCode status = U_ZERO_ERROR;
334
+ int retried = FALSE;
335
+ int32_t len;
336
+ do {
337
+ len = uloc_getDisplayVariant(RSTRING_PTR(id),
338
+ NIL_P(display_locale) ? NULL : RSTRING_PTR(display_locale),
339
+ icu_ustring_ptr(buffer),
340
+ icu_ustring_capa(buffer),
341
+ &status);
342
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
343
+ retried = TRUE;
344
+ icu_ustring_resize(buffer, len + RUBY_C_STRING_TERMINATOR_SIZE);
345
+ status = U_ZERO_ERROR;
346
+ } else if (U_FAILURE(status)) {
347
+ icu_rb_raise_icu_error(status);
348
+ } else { // retried == true && U_SUCCESS(status)
349
+ break;
350
+ }
351
+ } while (retried);
352
+ return icu_ustring_to_rb_enc_str_with_len(buffer, len);
353
+ }
354
+
355
+ VALUE locale_name(VALUE self)
356
+ {
357
+ int32_t buffer_capa = 64;
358
+ VALUE id = rb_iv_get(self, "@id");
359
+ char* buffer = char_buffer_new(buffer_capa);
360
+ UErrorCode status = U_ZERO_ERROR;
361
+ int retried = FALSE;
362
+ int32_t len;
363
+ do {
364
+ len = uloc_getName(RSTRING_PTR(id),
365
+ buffer,
366
+ buffer_capa,
367
+ &status);
368
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
369
+ retried = TRUE;
370
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
371
+ char_buffer_resize(buffer, buffer_capa);
372
+ status = U_ZERO_ERROR;
373
+ } else if (U_FAILURE(status)) {
374
+ char_buffer_free(buffer);
375
+ icu_rb_raise_icu_error(status);
376
+ } else { // retried == true && U_SUCCESS(status)
377
+ break;
378
+ }
379
+ } while (retried);
380
+ buffer[len] = '\0';
381
+
382
+ VALUE res = char_buffer_to_rb_str(buffer);
383
+ char_buffer_free(buffer);
384
+ return res;
385
+ }
386
+
387
+ VALUE locale_base_name(VALUE self)
388
+ {
389
+ int32_t buffer_capa = 64;
390
+ VALUE id = rb_iv_get(self, "@id");
391
+ char* buffer = char_buffer_new(buffer_capa);
392
+ UErrorCode status = U_ZERO_ERROR;
393
+ int retried = FALSE;
394
+ int32_t len;
395
+ do {
396
+ len = uloc_getBaseName(RSTRING_PTR(id),
397
+ buffer,
398
+ buffer_capa,
399
+ &status);
400
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
401
+ retried = TRUE;
402
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
403
+ char_buffer_resize(buffer, buffer_capa);
404
+ status = U_ZERO_ERROR;
405
+ } else if (U_FAILURE(status)) {
406
+ char_buffer_free(buffer);
407
+ icu_rb_raise_icu_error(status);
408
+ } else { // retried == true && U_SUCCESS(status)
409
+ break;
410
+ }
411
+ } while (retried);
412
+ buffer[len] = '\0';
413
+
414
+ VALUE res = char_buffer_to_rb_str(buffer);
415
+ char_buffer_free(buffer);
416
+ return res;
417
+ }
418
+
419
+ VALUE locale_canonical_name(VALUE self)
420
+ {
421
+ int32_t buffer_capa = 64;
422
+ VALUE id = rb_iv_get(self, "@id");
423
+ char* buffer = char_buffer_new(buffer_capa);
424
+ UErrorCode status = U_ZERO_ERROR;
425
+ int retried = FALSE;
426
+ int32_t len;
427
+ do {
428
+ len = uloc_canonicalize(RSTRING_PTR(id),
429
+ buffer,
430
+ buffer_capa,
431
+ &status);
432
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
433
+ retried = TRUE;
434
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
435
+ char_buffer_resize(buffer, buffer_capa);
436
+ status = U_ZERO_ERROR;
437
+ } else if (U_FAILURE(status)) {
438
+ char_buffer_free(buffer);
439
+ icu_rb_raise_icu_error(status);
440
+ } else { // retried == true && U_SUCCESS(status)
441
+ break;
442
+ }
443
+ } while (retried);
444
+ buffer[len] = '\0';
445
+
446
+ VALUE res = char_buffer_to_rb_str(buffer);
447
+ char_buffer_free(buffer);
448
+ return res;
449
+ }
450
+
451
+ VALUE locale_parent(VALUE self)
452
+ {
453
+ int32_t buffer_capa = 64;
454
+ VALUE id = rb_iv_get(self, "@id");
455
+ char* buffer = char_buffer_new(buffer_capa);
456
+ UErrorCode status = U_ZERO_ERROR;
457
+ int retried = FALSE;
458
+ int32_t len;
459
+ do {
460
+ len = uloc_getParent(RSTRING_PTR(id),
461
+ buffer,
462
+ buffer_capa,
463
+ &status);
464
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
465
+ retried = TRUE;
466
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
467
+ char_buffer_resize(buffer, buffer_capa);
468
+ status = U_ZERO_ERROR;
469
+ } else if (U_FAILURE(status)) {
470
+ char_buffer_free(buffer);
471
+ icu_rb_raise_icu_error(status);
472
+ } else { // retried == true && U_SUCCESS(status)
473
+ break;
474
+ }
475
+ } while (retried);
476
+ buffer[len] = '\0';
477
+
478
+ VALUE res = char_buffer_to_rb_str(buffer);
479
+ char_buffer_free(buffer);
480
+ return res;
481
+ }
482
+
483
+ VALUE locale_iso_country(VALUE self)
484
+ {
485
+ VALUE id = rb_iv_get(self, "@id");
486
+ return rb_str_new_cstr(uloc_getISO3Country(RSTRING_PTR(id)));
487
+ }
488
+
489
+ VALUE locale_iso_language(VALUE self)
490
+ {
491
+ VALUE id = rb_iv_get(self, "@id");
492
+ return rb_str_new_cstr(uloc_getISO3Language(RSTRING_PTR(id)));
493
+ }
494
+
495
+ VALUE locale_keyword(VALUE self, VALUE keyword)
496
+ {
497
+ keyword = rb_str_enc_to_ascii_as_utf8(keyword);
498
+ int32_t buffer_capa = 64;
499
+ VALUE id = rb_iv_get(self, "@id");
500
+ char* buffer = char_buffer_new(buffer_capa);
501
+ UErrorCode status = U_ZERO_ERROR;
502
+ int retried = FALSE;
503
+ int32_t len;
504
+ do {
505
+ len = uloc_getKeywordValue(RSTRING_PTR(id),
506
+ RSTRING_PTR(keyword),
507
+ buffer,
508
+ buffer_capa,
509
+ &status);
510
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
511
+ retried = TRUE;
512
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
513
+ char_buffer_resize(buffer, buffer_capa);
514
+ status = U_ZERO_ERROR;
515
+ } else if (U_FAILURE(status)) {
516
+ char_buffer_free(buffer);
517
+ icu_rb_raise_icu_error(status);
518
+ } else { // retried == true && U_SUCCESS(status)
519
+ break;
520
+ }
521
+ } while (retried);
522
+ buffer[len] = '\0';
523
+
524
+ VALUE res = char_buffer_to_rb_str(buffer);
525
+ char_buffer_free(buffer);
526
+ return res;
527
+ }
528
+
529
+ VALUE locale_keywords(VALUE self)
530
+ {
531
+ VALUE id = rb_iv_get(self, "@id");
532
+ UErrorCode status = U_ZERO_ERROR;
533
+ UEnumeration* result = uloc_openKeywords(RSTRING_PTR(id), &status);
534
+ return icu_enum_to_rb_ary(result, status, 3);
535
+ }
536
+
537
+ // TODO: check the keyword and value
538
+ VALUE locale_with_keyword(VALUE self, VALUE keyword, VALUE value)
539
+ {
540
+ keyword = rb_str_enc_to_ascii_as_utf8(keyword);
541
+ int32_t len_keyword = RSTRING_LENINT(keyword);
542
+ if (len_keyword == 0) {
543
+ rb_raise(rb_eArgError, "invalid value for keyword: %+"PRIsVALUE, self);
544
+ }
545
+ if (!NIL_P(value)) {
546
+ value = rb_str_enc_to_ascii_as_utf8(value);
547
+ }
548
+
549
+ VALUE id = rb_iv_get(self, "@id");
550
+ int32_t len_id = RSTRING_LENINT(id);
551
+ int32_t buffer_capa = 64 + len_id + len_keyword + (NIL_P(value) ? 0 : RSTRING_LENINT(value));
552
+ char* buffer = char_buffer_new(buffer_capa);
553
+ memmove(buffer, RSTRING_PTR(id), len_id);
554
+ buffer[len_id] = '\0';
555
+
556
+ UErrorCode status = U_ZERO_ERROR;
557
+ int retried = FALSE;
558
+ int32_t buffer_required;
559
+ do {
560
+ buffer_required = uloc_setKeywordValue(RSTRING_PTR(keyword),
561
+ NIL_P(value) ? NULL : RSTRING_PTR(value),
562
+ buffer,
563
+ buffer_capa,
564
+ &status);
565
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
566
+ retried = TRUE;
567
+ buffer_capa = buffer_required;
568
+ char_buffer_resize(buffer, buffer_capa);
569
+ status = U_ZERO_ERROR;
570
+ } else if (U_FAILURE(status)) {
571
+ char_buffer_free(buffer);
572
+ icu_rb_raise_icu_error(status);
573
+ } else { // retried == true && U_SUCCESS(status)
574
+ break;
575
+ }
576
+ } while (retried);
577
+ // NUL is inserted by ICU when buffer is enough
578
+
579
+ VALUE res = locale_new_from_cstr(buffer);
580
+ char_buffer_free(buffer);
581
+ return res;
582
+ }
583
+
584
+ static inline VALUE locale_layout_symbol(ULayoutType result)
585
+ {
586
+ switch (result) {
587
+ case ULOC_LAYOUT_LTR:
588
+ return ID2SYM(ID_ltr);
589
+ case ULOC_LAYOUT_RTL:
590
+ return ID2SYM(ID_rtl);
591
+ case ULOC_LAYOUT_TTB:
592
+ return ID2SYM(ID_ttb);
593
+ case ULOC_LAYOUT_BTT:
594
+ return ID2SYM(ID_btt);
595
+ case ULOC_LAYOUT_UNKNOWN: default:
596
+ return ID2SYM(ID_unknown);
597
+ }
598
+ }
599
+
600
+ VALUE locale_character_orientation(VALUE self)
601
+ {
602
+ VALUE id = rb_iv_get(self, "@id");
603
+ UErrorCode status = U_ZERO_ERROR;
604
+ ULayoutType result = uloc_getCharacterOrientation(RSTRING_PTR(id), &status);
605
+ return locale_layout_symbol(result);
606
+ }
607
+
608
+ VALUE locale_line_orientation(VALUE self)
609
+ {
610
+ VALUE id = rb_iv_get(self, "@id");
611
+ UErrorCode status = U_ZERO_ERROR;
612
+ ULayoutType result = uloc_getLineOrientation(RSTRING_PTR(id), &status);
613
+ return locale_layout_symbol(result);
614
+ }
615
+
616
+ VALUE locale_country(VALUE self)
617
+ {
618
+ VALUE id = rb_iv_get(self, "@id");
619
+ int32_t buffer_capa = 64;
620
+ char* buffer = char_buffer_new(buffer_capa);
621
+ UErrorCode status = U_ZERO_ERROR;
622
+ int retried = FALSE;
623
+ int32_t len;
624
+ do {
625
+ len = uloc_getCountry(RSTRING_PTR(id),
626
+ buffer,
627
+ buffer_capa,
628
+ &status);
629
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
630
+ retried = TRUE;
631
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
632
+ char_buffer_resize(buffer, buffer_capa);
633
+ status = U_ZERO_ERROR;
634
+ } else if (U_FAILURE(status)) {
635
+ char_buffer_free(buffer);
636
+ icu_rb_raise_icu_error(status);
637
+ } else { // retried == true && U_SUCCESS(status)
638
+ break;
639
+ }
640
+ } while (retried);
641
+ buffer[len] = '\0';
642
+
643
+ VALUE loc = char_buffer_to_rb_str(buffer);
644
+ char_buffer_free(buffer);
645
+ return loc;
646
+ }
647
+
648
+ VALUE locale_language(VALUE self)
649
+ {
650
+ VALUE id = rb_iv_get(self, "@id");
651
+ int32_t buffer_capa = 64;
652
+ char* buffer = char_buffer_new(buffer_capa);
653
+ UErrorCode status = U_ZERO_ERROR;
654
+ int retried = FALSE;
655
+ int32_t len;
656
+ do {
657
+ len = uloc_getLanguage(RSTRING_PTR(id),
658
+ buffer,
659
+ buffer_capa,
660
+ &status);
661
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
662
+ retried = TRUE;
663
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
664
+ char_buffer_resize(buffer, buffer_capa);
665
+ status = U_ZERO_ERROR;
666
+ } else if (U_FAILURE(status)) {
667
+ char_buffer_free(buffer);
668
+ icu_rb_raise_icu_error(status);
669
+ } else { // retried == true && U_SUCCESS(status)
670
+ break;
671
+ }
672
+ } while (retried);
673
+ buffer[len] = '\0';
674
+
675
+ VALUE loc = char_buffer_to_rb_str(buffer);
676
+ char_buffer_free(buffer);
677
+ return loc;
678
+ }
679
+
680
+ VALUE locale_script(VALUE self)
681
+ {
682
+ VALUE id = rb_iv_get(self, "@id");
683
+ int32_t buffer_capa = 64;
684
+ char* buffer = char_buffer_new(buffer_capa);
685
+ UErrorCode status = U_ZERO_ERROR;
686
+ int retried = FALSE;
687
+ int32_t len;
688
+ do {
689
+ len = uloc_getScript(RSTRING_PTR(id),
690
+ buffer,
691
+ buffer_capa,
692
+ &status);
693
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
694
+ retried = TRUE;
695
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
696
+ char_buffer_resize(buffer, buffer_capa);
697
+ status = U_ZERO_ERROR;
698
+ } else if (U_FAILURE(status)) {
699
+ char_buffer_free(buffer);
700
+ icu_rb_raise_icu_error(status);
701
+ } else { // retried == true && U_SUCCESS(status)
702
+ break;
703
+ }
704
+ } while (retried);
705
+ buffer[len] = '\0';
706
+
707
+ VALUE res = char_buffer_to_rb_str(buffer);
708
+ char_buffer_free(buffer);
709
+ return res;
710
+ }
711
+
712
+ VALUE locale_variant(VALUE self)
713
+ {
714
+ VALUE id = rb_iv_get(self, "@id");
715
+ int32_t buffer_capa = 64;
716
+ char* buffer = char_buffer_new(buffer_capa);
717
+ UErrorCode status = U_ZERO_ERROR;
718
+ int retried = FALSE;
719
+ int32_t len;
720
+ do {
721
+ len = uloc_getVariant(RSTRING_PTR(id),
722
+ buffer,
723
+ buffer_capa,
724
+ &status);
725
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
726
+ retried = TRUE;
727
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
728
+ char_buffer_resize(buffer, buffer_capa);
729
+ status = U_ZERO_ERROR;
730
+ } else if (U_FAILURE(status)) {
731
+ char_buffer_free(buffer);
732
+ icu_rb_raise_icu_error(status);
733
+ } else { // retried == true && U_SUCCESS(status)
734
+ break;
735
+ }
736
+ } while (retried);
737
+ buffer[len] = '\0';
738
+
739
+ VALUE res = char_buffer_to_rb_str(buffer);
740
+ char_buffer_free(buffer);
741
+ return res;
742
+ }
743
+
744
+ VALUE locale_with_likely_subtags(VALUE self)
745
+ {
746
+ VALUE id = rb_iv_get(self, "@id");
747
+ int32_t buffer_capa = 64;
748
+ char* buffer = char_buffer_new(buffer_capa);
749
+ UErrorCode status = U_ZERO_ERROR;
750
+ int retried = FALSE;
751
+ int32_t len;
752
+ do {
753
+ len = uloc_addLikelySubtags(RSTRING_PTR(id),
754
+ buffer,
755
+ buffer_capa,
756
+ &status);
757
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
758
+ retried = TRUE;
759
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
760
+ char_buffer_resize(buffer, buffer_capa);
761
+ status = U_ZERO_ERROR;
762
+ } else if (U_FAILURE(status)) {
763
+ char_buffer_free(buffer);
764
+ icu_rb_raise_icu_error(status);
765
+ } else { // retried == true && U_SUCCESS(status)
766
+ break;
767
+ }
768
+ } while (retried);
769
+ buffer[len] = '\0';
770
+
771
+ VALUE res = locale_new_from_cstr(buffer);
772
+ char_buffer_free(buffer);
773
+ return res;
774
+ }
775
+
776
+ VALUE locale_with_minimized_subtags(VALUE self)
777
+ {
778
+ VALUE id = rb_iv_get(self, "@id");
779
+ int32_t buffer_capa = 64;
780
+ char* buffer = char_buffer_new(buffer_capa);
781
+ UErrorCode status = U_ZERO_ERROR;
782
+ int retried = FALSE;
783
+ int32_t len;
784
+ do {
785
+ len = uloc_minimizeSubtags(RSTRING_PTR(id),
786
+ buffer,
787
+ buffer_capa,
788
+ &status);
789
+ if (!retried && status == U_BUFFER_OVERFLOW_ERROR) {
790
+ retried = TRUE;
791
+ buffer_capa = len + RUBY_C_STRING_TERMINATOR_SIZE;
792
+ char_buffer_resize(buffer, buffer_capa);
793
+ status = U_ZERO_ERROR;
794
+ } else if (U_FAILURE(status)) {
795
+ char_buffer_free(buffer);
796
+ icu_rb_raise_icu_error(status);
797
+ } else { // retried == true && U_SUCCESS(status)
798
+ break;
799
+ }
800
+ } while (retried);
801
+ buffer[len] = '\0';
802
+
803
+ VALUE res = locale_new_from_cstr(buffer);
804
+ char_buffer_free(buffer);
805
+ return res;
806
+ }
807
+
808
+ void init_icu_locale(void)
809
+ {
810
+ ID_ltr = rb_intern("ltr");
811
+ ID_rtl = rb_intern("rtl");
812
+ ID_ttb = rb_intern("ttb");
813
+ ID_btt = rb_intern("btt");
814
+ ID_unknown = rb_intern("unknown");
815
+
816
+ rb_cICU_Locale = rb_define_class_under(rb_mICU, "Locale", rb_cObject);
817
+ rb_define_singleton_method(rb_cICU_Locale, "available", locale_singleton_available, 0);
818
+ rb_define_singleton_method(rb_cICU_Locale, "default", locale_singleton_get_default, 0);
819
+ rb_define_singleton_method(rb_cICU_Locale, "default=", locale_singleton_set_default, 1);
820
+ rb_define_singleton_method(rb_cICU_Locale, "for_language_tag", locale_singleton_for_language_tag, 1);
821
+ rb_define_singleton_method(rb_cICU_Locale, "for_lcid", locale_singleton_for_lcid, 1);
822
+ rb_define_singleton_method(rb_cICU_Locale, "iso_countries", locale_singleton_iso_countries, 0);
823
+ rb_define_singleton_method(rb_cICU_Locale, "iso_languages", locale_singleton_iso_languages, 0);
824
+ rb_define_method(rb_cICU_Locale, "initialize", locale_initialize, 1);
825
+ rb_define_method(rb_cICU_Locale, "language_tag", locale_language_tag, -1);
826
+ rb_define_method(rb_cICU_Locale, "lcid", locale_lcid, 0);
827
+ rb_define_method(rb_cICU_Locale, "display_country", locale_display_country, -1);
828
+ rb_define_method(rb_cICU_Locale, "display_language", locale_display_language, -1);
829
+ rb_define_method(rb_cICU_Locale, "display_name", locale_display_name, -1);
830
+ rb_define_method(rb_cICU_Locale, "display_script", locale_display_script, -1);
831
+ rb_define_method(rb_cICU_Locale, "display_variant", locale_display_variant, -1);
832
+ rb_define_method(rb_cICU_Locale, "name", locale_name, 0);
833
+ rb_define_method(rb_cICU_Locale, "base_name", locale_base_name, 0);
834
+ rb_define_method(rb_cICU_Locale, "canonical_name", locale_canonical_name, 0);
835
+ rb_define_method(rb_cICU_Locale, "parent", locale_parent, 0);
836
+ rb_define_method(rb_cICU_Locale, "iso_country", locale_iso_country, 0);
837
+ rb_define_method(rb_cICU_Locale, "iso_language", locale_iso_language, 0);
838
+ rb_define_method(rb_cICU_Locale, "keyword", locale_keyword, 1);
839
+ rb_define_method(rb_cICU_Locale, "keywords", locale_keywords, 0);
840
+ rb_define_method(rb_cICU_Locale, "with_keyword", locale_with_keyword, 2);
841
+ rb_define_method(rb_cICU_Locale, "character_orientation", locale_character_orientation, 0);
842
+ rb_define_method(rb_cICU_Locale, "line_orientation", locale_line_orientation, 0);
843
+ rb_define_method(rb_cICU_Locale, "country", locale_country, 0);
844
+ rb_define_method(rb_cICU_Locale, "language", locale_language, 0);
845
+ rb_define_method(rb_cICU_Locale, "script", locale_script, 0);
846
+ rb_define_method(rb_cICU_Locale, "variant", locale_variant, 0);
847
+ rb_define_method(rb_cICU_Locale, "with_likely_subtags", locale_with_likely_subtags, 0);
848
+ rb_define_method(rb_cICU_Locale, "with_minimized_subtags", locale_with_minimized_subtags, 0);
849
+
850
+ }
851
+
852
+ /* vim: set expandtab sws=4 sw=4: */