i18nema19 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,132 @@
1
+ # I18nema
2
+
3
+ Fast I18n backend to keep things running smoothly.
4
+
5
+ I18nema is a drop-in replacement for I18n::Backend::Simple, for faster
6
+ lookups (15-20%) and quicker GC runs (ymmv). Translations are stored
7
+ outside of the ruby heap, and lookups happen in C (rather than the usual
8
+ inject on nested ruby hashes).
9
+
10
+ ## How do I use it?
11
+
12
+ ```ruby
13
+ gem 'i18nema' # or 'i18nema-19' if you're on Ruby 1.9
14
+ ```
15
+
16
+ Then do something like this in an initializer:
17
+
18
+ ```ruby
19
+ I18n.backend = I18nema::Backend.new
20
+ ```
21
+
22
+ You can pull in additional features, e.g.
23
+
24
+ ```ruby
25
+ I18nema::Backend.send(:include, I18n::Backend::Fallbacks)
26
+ ```
27
+
28
+ As with regular I18n, you should probably load translations before you
29
+ fork, so that all processes can use the same translations in memory. In
30
+ an initializer, just do `I18n.backend.init_translations`.
31
+
32
+ ## What sort of improvements will I see?
33
+
34
+ ### Faster Startup
35
+
36
+ Loading all the translations into memory is dramatically faster with
37
+ I18nema (about 4x). While this is just a one time hit, it's pretty
38
+ noticeable when you're waiting on it (e.g. console, specs). In
39
+ [canvas-lms](https://github/com/instructure/canvas-lms), I18nema brings
40
+ it down to just over half a second (from almost 2.5).
41
+
42
+ ### Faster GC Runs
43
+
44
+ Because there are fewer ruby objects, the periodic GC runs will be
45
+ proportionally faster. How much faster is a question of how many
46
+ translations you have versus how many other ruby objects. Applications
47
+ that are localized in more languages should see a bigger boost (since
48
+ the translations make up a bigger share of the original ObjectSpace).
49
+
50
+ For example, [canvas-lms](https://github/com/instructure/canvas-lms) is
51
+ translated into seven other languages, and I18nema reduces (startup)
52
+ ObjectSpace by about 18% and GC runtime by about 11%.
53
+
54
+ I18nema also moves I18n's normalized_key_cache into C structs. This key
55
+ cache grows over time (it eventually holds a key/value for every
56
+ translation key used in the app), so that's another area where I18nema
57
+ is nicer on ObjectSpace than vanilla I18n.
58
+
59
+ ### Faster Translation Lookups
60
+
61
+ Simple lookups (i.e. no options or interpolation) take a bit over 15%
62
+ less time.
63
+
64
+ Lookups with options see slightly bigger gains (over 20% less time), in
65
+ part due to some speedups on the ruby side of things (I18n uses
66
+ `Hash#except`, which is quite slow when you have a long list of
67
+ arguments).
68
+
69
+ ## Show me the benchmarks
70
+
71
+ Here are some basic ones done with `Benchmark.bmbm` (edited for brevity)
72
+ We run `I18n.translate` 100000 times on 4 different translation keys.
73
+ The `n` in `translate(n)` denotes how many parts there are in the key,
74
+ e.g. `I18n.t('foo') -> 1`, `I18n.t('foo.bar') -> 2`
75
+
76
+ ### simple `translate` (no options)
77
+
78
+ #### I18nema
79
+
80
+ user system total real
81
+ translate(1): 0.900000 0.010000 0.910000 ( 0.910228)
82
+ translate(2): 1.010000 0.010000 1.020000 ( 1.009545)
83
+ translate(3): 1.020000 0.010000 1.030000 ( 1.028098)
84
+ translate(4): 1.210000 0.000000 1.210000 ( 1.214737)
85
+
86
+ #### I18n
87
+
88
+ user system total real
89
+ translate(1): 1.000000 0.000000 1.000000 ( 1.007367)
90
+ translate(2): 1.260000 0.000000 1.260000 ( 1.268323)
91
+ translate(3): 1.320000 0.000000 1.320000 ( 1.315132)
92
+ translate(4): 1.390000 0.010000 1.400000 ( 1.393478)
93
+
94
+ ### `translate` with options (locale, interpolation)
95
+
96
+ #### I18nema
97
+
98
+ user system total real
99
+ translate(1): 0.950000 0.000000 0.950000 ( 0.943904)
100
+ translate(2): 1.040000 0.000000 1.040000 ( 1.036595)
101
+ translate(3): 1.060000 0.010000 1.070000 ( 1.059588)
102
+ translate(4): 1.240000 0.000000 1.240000 ( 1.237322)
103
+
104
+ #### I18n
105
+
106
+ user system total real
107
+ translate(1): 1.090000 0.000000 1.090000 ( 1.099866)
108
+ translate(2): 1.360000 0.000000 1.360000 ( 1.364869)
109
+ translate(3): 1.430000 0.000000 1.430000 ( 1.425103)
110
+ translate(4): 1.500000 0.010000 1.510000 ( 1.500952)
111
+
112
+ ## OK, so what's the catch?
113
+
114
+ I18nema is still a work in progress, so there are some compatibility
115
+ notes you should be aware of:
116
+
117
+ I18nema requires ruby 1.9.3 or later.
118
+
119
+ I18nema only supports `.yml` translation files (no `.rb`).
120
+
121
+ I18nema requires UTF-8 `.yml` files. That means that your translations
122
+ should actually be in their UTF-8 form (e.g. "Contraseña"), not some
123
+ escaped representation. I18nema uses a simplified syck implementation
124
+ and does not support many optional yml types (e.g. `binary`).
125
+
126
+ I18nema doesn't yet support symbols as translation *values* (note that
127
+ symbol [keys](http://guides.rubyonrails.org/i18n.html#basic-lookup-scopes-and-nested-keys)
128
+ and [defaults](http://guides.rubyonrails.org/i18n.html#defaults) work
129
+ just fine). Symbol values in your `.yml` file can be used in the same
130
+ way that symbol defaults can, i.e. they tell I18n to find the
131
+ translation under some other key.
132
+
@@ -0,0 +1,19 @@
1
+ require 'rake'
2
+ require 'rake/extensiontask'
3
+
4
+ desc 'Default: run unit tests.'
5
+ task :default => :test
6
+
7
+ require 'rake/testtask'
8
+ desc 'Test the immigrant plugin.'
9
+ Rake::TestTask.new(:test) do |t|
10
+ t.libs << 'lib'
11
+ t.libs << 'test'
12
+ t.pattern = 'test/**/*_test.rb'
13
+ t.verbose = true
14
+ end
15
+ Rake::Task[:test].prerequisites << :compile
16
+
17
+ Rake::ExtensionTask.new('i18nema') do |ext|
18
+ ext.lib_dir = File.join('lib', 'i18nema')
19
+ end
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+ dir_config "i18nema/i18nema"
3
+ have_header "st.h"
4
+ $CFLAGS << " -std=c99"
5
+ create_makefile 'i18nema/i18nema'
@@ -0,0 +1,601 @@
1
+ #include <ruby.h>
2
+ #include <ruby/encoding.h>
3
+ #include "vendor/syck.h"
4
+ #include "vendor/uthash.h"
5
+
6
+ #define CAN_FREE(item) item != NULL && item->type != i_type_true && item->type != i_type_false && item->type != i_type_null
7
+
8
+ VALUE I18nema = Qnil,
9
+ I18nemaBackend = Qnil,
10
+ I18nemaBackendLoadError = Qnil;
11
+
12
+ struct i_object;
13
+ struct i_key_value;
14
+ static VALUE array_to_rarray(struct i_object *array);
15
+ static VALUE hash_to_rhash(struct i_object *hash);
16
+ static void merge_hash(struct i_object *hash, struct i_object *other_hash);
17
+ static void delete_hash(struct i_key_value **hash, int recurse);
18
+ static void delete_object(struct i_object *object, int recurse);
19
+ static void delete_object_r(struct i_object *object);
20
+ static VALUE normalize_key(VALUE self, VALUE key, VALUE separator);
21
+
22
+ enum i_object_type {
23
+ i_type_unused,
24
+ i_type_string,
25
+ i_type_array,
26
+ i_type_hash,
27
+ i_type_int,
28
+ i_type_float,
29
+ i_type_symbol,
30
+ i_type_true,
31
+ i_type_false,
32
+ i_type_null
33
+ };
34
+
35
+ union i_object_data {
36
+ char *string;
37
+ struct i_object *array;
38
+ struct i_key_value *hash;
39
+ };
40
+
41
+ typedef struct i_object
42
+ {
43
+ unsigned long size;
44
+ enum i_object_type type;
45
+ union i_object_data data;
46
+ } i_object_t;
47
+
48
+ typedef struct i_key_value
49
+ {
50
+ char *key;
51
+ struct i_object *value;
52
+ UT_hash_handle hh;
53
+ } i_key_value_t;
54
+
55
+ static int current_translation_count = 0;
56
+ static ID s_init_translations,
57
+ s_to_f,
58
+ s_to_s,
59
+ s_to_sym;
60
+ static i_object_t i_object_null,
61
+ i_object_true,
62
+ i_object_false;
63
+
64
+ static VALUE
65
+ i_object_to_robject(i_object_t *object) {
66
+ VALUE s;
67
+ if (object == NULL)
68
+ return Qnil;
69
+ switch (object->type) {
70
+ case i_type_string:
71
+ return rb_enc_str_new(object->data.string, object->size, rb_utf8_encoding());
72
+ case i_type_array:
73
+ return array_to_rarray(object);
74
+ case i_type_hash:
75
+ return hash_to_rhash(object);
76
+ case i_type_int:
77
+ return rb_cstr2inum(object->data.string, 10);
78
+ case i_type_float:
79
+ s = rb_str_new(object->data.string, object->size);
80
+ return rb_funcall(s, s_to_f, 0);
81
+ case i_type_symbol:
82
+ return ID2SYM(rb_intern(object->data.string));
83
+ case i_type_true:
84
+ return Qtrue;
85
+ case i_type_false:
86
+ return Qfalse;
87
+ default:
88
+ return Qnil;
89
+ }
90
+ }
91
+
92
+ static VALUE
93
+ array_to_rarray(i_object_t *array)
94
+ {
95
+ VALUE result = rb_ary_new2(array->size);
96
+ for (unsigned long i = 0; i < array->size; i++)
97
+ rb_ary_store(result, i, i_object_to_robject(&array->data.array[i]));
98
+ return result;
99
+ }
100
+
101
+ static VALUE
102
+ hash_to_rhash(i_object_t *hash)
103
+ {
104
+ i_key_value_t *handle = hash->data.hash;
105
+ VALUE result = rb_hash_new();
106
+ for (; handle != NULL; handle = handle->hh.next)
107
+ rb_hash_aset(result, ID2SYM(rb_intern(handle->key)), i_object_to_robject(handle->value));
108
+ return result;
109
+ }
110
+
111
+ static i_object_t*
112
+ i_object_get(VALUE self, const char *iv)
113
+ {
114
+ i_object_t *object;
115
+ VALUE wrapped;
116
+ wrapped = rb_iv_get(self, iv);
117
+ Data_Get_Struct(wrapped, i_object_t, object);
118
+ return object;
119
+ }
120
+
121
+ static i_object_t*
122
+ translations_get(VALUE self)
123
+ {
124
+ return i_object_get(self, "@translations");
125
+ }
126
+
127
+ static i_object_t*
128
+ normalized_key_cache_get(VALUE self)
129
+ {
130
+ return i_object_get(self, "@normalized_key_cache");
131
+ }
132
+
133
+ static i_object_t*
134
+ hash_get(i_object_t *current, VALUE *keys, int num_keys)
135
+ {
136
+ i_key_value_t *kv = NULL;
137
+ for (int i = 0; i < num_keys && current != NULL && current->type == i_type_hash; i++) {
138
+ Check_Type(keys[i], T_STRING);
139
+ HASH_FIND_STR(current->data.hash, StringValueCStr(keys[i]), kv);
140
+ current = kv == NULL ? NULL : kv->value;
141
+ }
142
+ return current;
143
+ }
144
+
145
+ /*
146
+ * call-seq:
147
+ * backend.direct_lookup([part]+) -> localized_str
148
+ *
149
+ * Returns the translation(s) found under the specified key.
150
+ *
151
+ * backend.direct_lookup("en", "foo", "bar") #=> "lol"
152
+ * backend.direct_lookup("en", "foo") #=> {"bar"=>"lol", "baz"=>["asdf", "qwerty"]}
153
+ */
154
+
155
+ static VALUE
156
+ direct_lookup(int argc, VALUE *argv, VALUE self)
157
+ {
158
+ i_object_t *translations = translations_get(self);
159
+ return i_object_to_robject(hash_get(translations, argv, argc));
160
+ }
161
+
162
+ static void
163
+ empty_object(i_object_t *object, int recurse)
164
+ {
165
+ if (object == NULL)
166
+ return;
167
+
168
+ switch (object->type) {
169
+ case i_type_array:
170
+ if (recurse)
171
+ for (unsigned long i = 0; i < object->size; i++)
172
+ empty_object(&object->data.array[i], 1);
173
+ xfree(object->data.array);
174
+ break;
175
+ case i_type_hash:
176
+ delete_hash(&object->data.hash, recurse);
177
+ break;
178
+ case i_type_unused:
179
+ break;
180
+ default:
181
+ xfree(object->data.string);
182
+ break;
183
+ }
184
+ }
185
+
186
+ static void
187
+ delete_object(i_object_t *object, int recurse)
188
+ {
189
+ empty_object(object, recurse);
190
+ if (CAN_FREE(object))
191
+ xfree(object);
192
+ }
193
+
194
+ static void
195
+ delete_object_r(i_object_t *object)
196
+ {
197
+ delete_object(object, 1);
198
+ }
199
+
200
+ static void
201
+ delete_key_value(i_key_value_t *kv, int delete_value)
202
+ {
203
+ if (delete_value)
204
+ delete_object_r(kv->value);
205
+ xfree(kv->key);
206
+ xfree(kv);
207
+ }
208
+
209
+ static void
210
+ delete_hash(i_key_value_t **hash, int recurse)
211
+ {
212
+ i_key_value_t *kv, *tmp;
213
+ HASH_ITER(hh, *hash, kv, tmp) {
214
+ HASH_DEL(*hash, kv);
215
+ delete_key_value(kv, recurse);
216
+ }
217
+ }
218
+
219
+ static void
220
+ add_key_value(i_key_value_t **hash, i_key_value_t *kv)
221
+ {
222
+ i_key_value_t *existing = NULL;
223
+ HASH_FIND_STR(*hash, kv->key, existing);
224
+
225
+ if (existing != NULL) {
226
+ if (existing->value->type == i_type_hash && kv->value->type == i_type_hash) {
227
+ merge_hash(existing->value, kv->value);
228
+ delete_key_value(kv, 0);
229
+ return;
230
+ }
231
+ HASH_DEL(*hash, existing);
232
+ delete_key_value(existing, 1);
233
+ }
234
+ HASH_ADD_KEYPTR(hh, *hash, kv->key, strlen(kv->key), kv);
235
+ }
236
+
237
+ static void
238
+ merge_hash(i_object_t *hash, i_object_t *other_hash)
239
+ {
240
+ i_key_value_t *kv, *tmp;
241
+
242
+ HASH_ITER(hh, other_hash->data.hash, kv, tmp) {
243
+ HASH_DEL(other_hash->data.hash, kv);
244
+ add_key_value(&hash->data.hash, kv);
245
+ }
246
+ delete_object_r(other_hash);
247
+ }
248
+
249
+ static int
250
+ delete_syck_st_entry(char *key, char *value, char *arg)
251
+ {
252
+ i_object_t *object = (i_object_t *)value;
253
+ // key object whose string we have yoinked into a kv, or item that
254
+ // has been copied into an array
255
+ if (object->type == i_type_unused)
256
+ delete_object_r(object);
257
+ return ST_DELETE;
258
+ }
259
+
260
+ static int
261
+ delete_syck_object(char *key, char *value, char *arg)
262
+ {
263
+ i_object_t *object = (i_object_t *)value;
264
+ delete_object(object, 0); // objects are in the syck symbol table, thus we don't want to double-free
265
+ return ST_DELETE;
266
+ }
267
+
268
+ static void
269
+ handle_syck_error(SyckParser *parser, const char *str)
270
+ {
271
+ char *endl = parser->cursor;
272
+ while (*endl != '\0' && *endl != '\n')
273
+ endl++;
274
+ endl[0] = '\0';
275
+
276
+ if (parser->syms)
277
+ st_foreach(parser->syms, delete_syck_object, 0);
278
+ rb_raise(I18nemaBackendLoadError, "%s on line %d, col %ld: `%s'", str, parser->linect + 1, parser->cursor - parser->lineptr, parser->lineptr);
279
+ }
280
+
281
+ static SyckNode*
282
+ handle_syck_badanchor(SyckParser *parser, char *anchor)
283
+ {
284
+ char error[strlen(anchor) + 14];
285
+ sprintf(error, "bad anchor `%s'", anchor);
286
+ handle_syck_error(parser, error);
287
+ return NULL;
288
+ }
289
+
290
+ static char*
291
+ new_string(char *orig, long len)
292
+ {
293
+ char *str = xmalloc(len + 1);
294
+ strncpy(str, orig, len);
295
+ str[len] = '\0';
296
+ return str;
297
+ }
298
+
299
+ static void
300
+ set_string_object(i_object_t *object, char *str, long len)
301
+ {
302
+ object->type = i_type_string;
303
+ object->size = len;
304
+ object->data.string = new_string(str, len);
305
+ }
306
+
307
+ static i_object_t*
308
+ new_string_object(char *str, long len)
309
+ {
310
+ i_object_t *object = ALLOC(i_object_t);
311
+ set_string_object(object, str, len);
312
+ return object;
313
+ }
314
+
315
+ static i_object_t*
316
+ new_array_object(long size)
317
+ {
318
+ i_object_t *object = ALLOC(i_object_t);
319
+ object->type = i_type_array;
320
+ object->size = size;
321
+ object->data.array = ALLOC_N(i_object_t, size);
322
+ return object;
323
+ }
324
+
325
+ static i_object_t*
326
+ new_hash_object()
327
+ {
328
+ i_object_t *object = ALLOC(i_object_t);
329
+ object->type = i_type_hash;
330
+ object->data.hash = NULL;
331
+ return object;
332
+ }
333
+
334
+ static i_key_value_t*
335
+ new_key_value(char *key, i_object_t *value)
336
+ {
337
+ i_key_value_t *kv = ALLOC(i_key_value_t);
338
+ kv->key = key;
339
+ kv->value = value;
340
+ return kv;
341
+ }
342
+
343
+ static SYMID
344
+ handle_syck_node(SyckParser *parser, SyckNode *node)
345
+ {
346
+ i_object_t *result;
347
+ SYMID oid;
348
+
349
+ switch (node->kind) {
350
+ case syck_str_kind:
351
+ if (node->type_id == NULL) {
352
+ result = new_string_object(node->data.str->ptr, node->data.str->len);
353
+ } else if (strcmp(node->type_id, "null") == 0) {
354
+ result = &i_object_null;
355
+ } else if (strcmp(node->type_id, "bool#yes") == 0) {
356
+ result = &i_object_true;
357
+ } else if (strcmp(node->type_id, "bool#no") == 0) {
358
+ result = &i_object_false;
359
+ } else if (strcmp(node->type_id, "int") == 0) {
360
+ syck_str_blow_away_commas(node);
361
+ result = new_string_object(node->data.str->ptr, node->data.str->len);
362
+ result->type = i_type_int;
363
+ } else if (strcmp(node->type_id, "float#fix") == 0 || strcmp(node->type_id, "float#exp") == 0) {
364
+ syck_str_blow_away_commas(node);
365
+ result = new_string_object(node->data.str->ptr, node->data.str->len);
366
+ result->type = i_type_float;
367
+ } else if (node->data.str->style == scalar_plain && node->data.str->len > 1 && strncmp(node->data.str->ptr, ":", 1) == 0) {
368
+ result = new_string_object(node->data.str->ptr + 1, node->data.str->len - 1);
369
+ result->type = i_type_symbol;
370
+ } else {
371
+ // legit strings, and everything else get the string treatment (binary, int#hex, timestamp, etc.)
372
+ result = new_string_object(node->data.str->ptr, node->data.str->len);
373
+ }
374
+ break;
375
+ case syck_seq_kind:
376
+ result = new_array_object(node->data.list->idx);
377
+ for (long i = 0; i < node->data.list->idx; i++) {
378
+ i_object_t *item = NULL;
379
+
380
+ oid = syck_seq_read(node, i);
381
+ syck_lookup_sym(parser, oid, (void **)&item);
382
+ if (item->type == i_type_string)
383
+ current_translation_count++;
384
+ memcpy(&result->data.array[i], item, sizeof(i_object_t));
385
+ if (CAN_FREE(item))
386
+ item->type = i_type_unused;
387
+ }
388
+ break;
389
+ case syck_map_kind:
390
+ result = new_hash_object();
391
+ for (long i = 0; i < node->data.pairs->idx; i++) {
392
+ i_object_t *key = NULL, *value = NULL;
393
+
394
+ oid = syck_map_read(node, map_key, i);
395
+ syck_lookup_sym(parser, oid, (void **)&key);
396
+ oid = syck_map_read(node, map_value, i);
397
+ syck_lookup_sym(parser, oid, (void **)&value);
398
+
399
+ i_key_value_t *kv = new_key_value(key->data.string, value);
400
+ key->type = i_type_unused; // so we know to free this node in delete_syck_st_entry
401
+ if (value->type == i_type_string)
402
+ current_translation_count++;
403
+ add_key_value(&result->data.hash, kv);
404
+ }
405
+ break;
406
+ }
407
+
408
+
409
+ return syck_add_sym(parser, (char *)result);
410
+ }
411
+
412
+ /*
413
+ * call-seq:
414
+ * backend.load_yaml_string(yaml_str) -> num_translations
415
+ *
416
+ * Loads translations from the specified yaml string, and returns the
417
+ * number of (new) translations stored.
418
+ *
419
+ * backend.load_yaml_string("en:\n foo: bar") #=> 1
420
+ */
421
+
422
+ static VALUE
423
+ load_yml_string(VALUE self, VALUE yml)
424
+ {
425
+ SYMID oid;
426
+ i_object_t *root_object = translations_get(self);
427
+ i_object_t *new_root_object = NULL;
428
+ current_translation_count = 0;
429
+ SyckParser* parser = syck_new_parser();
430
+ syck_parser_handler(parser, handle_syck_node);
431
+ StringValue(yml);
432
+ syck_parser_str(parser, RSTRING_PTR(yml), RSTRING_LEN(yml), NULL);
433
+ syck_parser_bad_anchor_handler(parser, handle_syck_badanchor);
434
+ syck_parser_error_handler(parser, handle_syck_error);
435
+
436
+ oid = syck_parse(parser);
437
+ syck_lookup_sym(parser, oid, (void **)&new_root_object);
438
+ if (parser->syms)
439
+ st_foreach(parser->syms, delete_syck_st_entry, 0);
440
+ syck_free_parser(parser);
441
+ if (new_root_object == NULL || new_root_object->type != i_type_hash) {
442
+ delete_object_r(new_root_object);
443
+ rb_raise(I18nemaBackendLoadError, "root yml node is not a hash");
444
+ }
445
+ merge_hash(root_object, new_root_object);
446
+
447
+ return INT2NUM(current_translation_count);
448
+ }
449
+
450
+ /*
451
+ * call-seq:
452
+ * backend.available_locales -> locales
453
+ *
454
+ * Returns the currently loaded locales. Order is not guaranteed.
455
+ *
456
+ * backend.available_locales #=> [:en, :es]
457
+ */
458
+
459
+ static VALUE
460
+ available_locales(VALUE self)
461
+ {
462
+ if (!RTEST(rb_iv_get(self, "@initialized")))
463
+ rb_funcall(self, s_init_translations, 0);
464
+ i_object_t *root_object = translations_get(self);
465
+ i_key_value_t *current = root_object->data.hash;
466
+ VALUE ary = rb_ary_new2(0);
467
+
468
+ for (; current != NULL; current = current->hh.next)
469
+ rb_ary_push(ary, rb_str_intern(rb_str_new2(current->key)));
470
+
471
+ return ary;
472
+ }
473
+
474
+ /*
475
+ * call-seq:
476
+ * backend.reload! -> true
477
+ *
478
+ * Clears out all currently stored translations.
479
+ *
480
+ * backend.reload! #=> true
481
+ */
482
+
483
+ static VALUE
484
+ reload(VALUE self)
485
+ {
486
+ i_object_t *root_object = translations_get(self);
487
+ empty_object(root_object, 1);
488
+ rb_iv_set(self, "@initialized", Qfalse);
489
+ return Qtrue;
490
+ }
491
+
492
+ static VALUE
493
+ join_array_key(VALUE self, VALUE key, VALUE separator)
494
+ {
495
+ long len = RARRAY_LEN(key);
496
+ if (len == 0)
497
+ return rb_str_new("", 0);
498
+
499
+ VALUE ret = rb_ary_join(normalize_key(self, RARRAY_PTR(key)[0], separator), separator);
500
+ for (long i = 1; i < len; i++) {
501
+ rb_str_concat(ret, separator);
502
+ rb_str_concat(ret, rb_ary_join(normalize_key(self, RARRAY_PTR(key)[i], separator), separator));
503
+ }
504
+ return ret;
505
+ }
506
+
507
+ /*
508
+ * call-seq:
509
+ * backend.normalize_key(key, separator) -> key
510
+ *
511
+ * Normalizes and splits a key based on the separator.
512
+ *
513
+ * backend.normalize_key "asdf", "." #=> ["asdf"]
514
+ * backend.normalize_key "a.b.c", "." #=> ["a", "b", "c"]
515
+ * backend.normalize_key "a.b.c", ":" #=> ["a.b.c"]
516
+ * backend.normalize_key %{a b.c}, "." #=> ["a", "b", "c"]
517
+ */
518
+
519
+ static VALUE
520
+ normalize_key(VALUE self, VALUE key, VALUE separator)
521
+ {
522
+ Check_Type(separator, T_STRING);
523
+
524
+ i_object_t *key_map = normalized_key_cache_get(self),
525
+ *sub_map = hash_get(key_map, &separator, 1);
526
+ if (sub_map == NULL) {
527
+ sub_map = new_hash_object();
528
+ char *key = new_string(RSTRING_PTR(separator), RSTRING_LEN(separator));
529
+ i_key_value_t *kv = new_key_value(key, sub_map);
530
+ add_key_value(&key_map->data.hash, kv);
531
+ }
532
+
533
+ if (TYPE(key) == T_ARRAY)
534
+ key = join_array_key(self, key, separator);
535
+ else if (TYPE(key) != T_STRING)
536
+ key = rb_funcall(key, s_to_s, 0);
537
+
538
+ i_object_t *key_frd = hash_get(sub_map, &key, 1);
539
+
540
+ if (key_frd == NULL) {
541
+ char *sep = StringValueCStr(separator);
542
+ VALUE parts = rb_str_split(key, sep);
543
+ long parts_len = RARRAY_LEN(parts),
544
+ skipped = 0;
545
+ key_frd = new_array_object(parts_len);
546
+ for (long i = 0; i < parts_len; i++) {
547
+ VALUE part = RARRAY_PTR(parts)[i];
548
+ // TODO: don't alloc for empty strings, since we discard them
549
+ if (RSTRING_LEN(part) == 0)
550
+ skipped++;
551
+ else
552
+ set_string_object(&key_frd->data.array[i - skipped], RSTRING_PTR(part), RSTRING_LEN(part));
553
+ }
554
+ key_frd->size -= skipped;
555
+
556
+ char *key_orig = new_string(RSTRING_PTR(key), RSTRING_LEN(key));
557
+ i_key_value_t *kv = new_key_value(key_orig, key_frd);
558
+ add_key_value(&sub_map->data.hash, kv);
559
+ }
560
+ return i_object_to_robject(key_frd);
561
+ }
562
+
563
+ static VALUE
564
+ initialize(VALUE self)
565
+ {
566
+ VALUE translations, key_cache;
567
+
568
+ i_object_t *root_object = new_hash_object();
569
+ translations = Data_Wrap_Struct(I18nemaBackend, 0, delete_object_r, root_object);
570
+ rb_iv_set(self, "@translations", translations);
571
+
572
+ i_object_t *key_map = new_hash_object();
573
+ key_cache = Data_Wrap_Struct(I18nemaBackend, 0, delete_object_r, key_map);
574
+ rb_iv_set(self, "@normalized_key_cache", key_cache);
575
+
576
+ return self;
577
+ }
578
+
579
+ void
580
+ Init_i18nema()
581
+ {
582
+ I18nema = rb_define_module("I18nema");
583
+ I18nemaBackend = rb_define_class_under(I18nema, "Backend", rb_cObject);
584
+ I18nemaBackendLoadError = rb_define_class_under(I18nemaBackend, "LoadError", rb_eStandardError);
585
+
586
+ s_init_translations = rb_intern("init_translations");
587
+ s_to_f = rb_intern("to_f");
588
+ s_to_s = rb_intern("to_s");
589
+ s_to_sym = rb_intern("to_sym");
590
+
591
+ i_object_null.type = i_type_null;
592
+ i_object_true.type = i_type_true;
593
+ i_object_false.type = i_type_false;
594
+
595
+ rb_define_method(I18nemaBackend, "initialize", initialize, 0);
596
+ rb_define_method(I18nemaBackend, "load_yml_string", load_yml_string, 1);
597
+ rb_define_method(I18nemaBackend, "available_locales", available_locales, 0);
598
+ rb_define_method(I18nemaBackend, "reload!", reload, 0);
599
+ rb_define_method(I18nemaBackend, "direct_lookup", direct_lookup, -1);
600
+ rb_define_method(I18nemaBackend, "normalize_key", normalize_key, 2);
601
+ }