fairy 0.6.0 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/Makefile +1 -0
  2. data/bin/fairy +35 -5
  3. data/ext/extconf.rb +3 -0
  4. data/ext/fairy.c +180 -0
  5. data/ext/fairy.h +94 -0
  6. data/ext/fiber_mon.h +32 -0
  7. data/ext/fixnum-buffer.c +483 -0
  8. data/ext/p-group-by.c +529 -0
  9. data/ext/p-xgroup-by.c +467 -0
  10. data/ext/simple-hash.c +44 -0
  11. data/ext/string-buffer.c +286 -0
  12. data/ext/xmarshaled-queue.c +699 -0
  13. data/ext/xsized-queue.c +528 -0
  14. data/ext/xthread.h +65 -0
  15. data/fairy.gemspec +5 -2
  16. data/lib/fairy.rb +10 -1
  17. data/lib/fairy/client/group-by.rb +57 -2
  18. data/lib/fairy/client/here.rb +2 -1
  19. data/lib/fairy/controller.rb +25 -4
  20. data/lib/fairy/master.rb +17 -3
  21. data/lib/fairy/master/c-basic-group-by.rb +4 -2
  22. data/lib/fairy/master/c-cat.rb +3 -2
  23. data/lib/fairy/master/c-direct-product.rb +5 -3
  24. data/lib/fairy/master/c-filter.rb +5 -3
  25. data/lib/fairy/master/c-group-by.rb +13 -0
  26. data/lib/fairy/master/c-junction.rb +3 -2
  27. data/lib/fairy/master/c-seg-join.rb +3 -1
  28. data/lib/fairy/master/c-seg-shuffle.rb +3 -2
  29. data/lib/fairy/master/c-seg-split.rb +1 -1
  30. data/lib/fairy/master/c-seg-zip.rb +3 -1
  31. data/lib/fairy/master/c-sort.rb +7 -2
  32. data/lib/fairy/master/c-wc.rb +5 -3
  33. data/lib/fairy/node.rb +13 -2
  34. data/lib/fairy/node/p-barrier.rb +1 -1
  35. data/lib/fairy/node/p-basic-group-by.rb +22 -12
  36. data/lib/fairy/node/p-direct-product.rb +4 -2
  37. data/lib/fairy/node/p-filter.rb +8 -7
  38. data/lib/fairy/node/p-find.rb +2 -1
  39. data/lib/fairy/node/p-group-by.rb +17 -6
  40. data/lib/fairy/node/p-inject.rb +3 -2
  41. data/lib/fairy/node/p-output-file.rb +1 -1
  42. data/lib/fairy/node/p-seg-join.rb +2 -1
  43. data/lib/fairy/node/p-seg-zip.rb +2 -1
  44. data/lib/fairy/node/p-single-exportable.rb +3 -1
  45. data/lib/fairy/node/p-sort.rb +4 -2
  46. data/lib/fairy/node/p-task.rb +1 -1
  47. data/lib/fairy/node/p-wc.rb +5 -2
  48. data/lib/fairy/processor.rb +25 -18
  49. data/lib/fairy/share/block-source.rb +12 -2
  50. data/lib/fairy/share/conf.rb +35 -5
  51. data/lib/fairy/share/hash-simple-hash.rb +1 -1
  52. data/lib/fairy/share/log.rb +11 -4
  53. data/lib/fairy/share/pool-dictionary.rb +2 -1
  54. data/lib/fairy/share/port-marshaled-queue.rb +8 -1
  55. data/lib/fairy/share/port.rb +55 -45
  56. data/lib/fairy/share/reference.rb +2 -1
  57. data/lib/fairy/share/varray.rb +3 -1
  58. data/lib/fairy/share/vfile.rb +4 -2
  59. data/lib/fairy/version.rb +1 -1
  60. data/sample/sort.rb +69 -3
  61. data/spec/fairy8_spec.rb +1 -1
  62. data/test/testc.rb +380 -2
  63. data/tools/cap_recipe/Capfile +3 -3
  64. data/tools/fairy_conf_wizard.rb +375 -0
  65. data/tools/fairy_perf_graph.rb +15 -3
  66. data/tools/git-tag +1 -0
  67. data/tools/log-analysis.rb +59 -11
  68. metadata +33 -34
  69. data/ext/simple_hash/extconf.rb +0 -4
  70. data/ext/simple_hash/simple_hash.c +0 -42
data/ext/p-xgroup-by.c ADDED
@@ -0,0 +1,467 @@
1
+ /**********************************************************************
2
+
3
+ p-xgroup-by.c -
4
+ Copyright (C) 2007-2011 Rakuten, Inc.
5
+
6
+ **********************************************************************/
7
+
8
+ #include "ruby.h"
9
+ #include "xthread.h"
10
+
11
+ #include "fairy.h"
12
+
13
+ static ID id_init_key_proc;
14
+ static ID id_aref;
15
+ static ID id_yield;
16
+ static ID id_each;
17
+ static ID id_push;
18
+ static ID id_add_exports;
19
+ static ID id_start;
20
+ static ID id_set_njob_id;
21
+ static ID id_add_key;
22
+
23
+ static ID id_close;
24
+
25
+ static VALUE rb_cFairyPGroupBy;
26
+ static VALUE PPostFilter;
27
+
28
+ VALUE rb_cFairyPXGroupBy;
29
+ VALUE rb_cFairyPXGPostfilter;
30
+
31
+ typedef struct rb_fairy_p_xgroup_by_struct
32
+ {
33
+ VALUE bjob;
34
+ VALUE input;
35
+ VALUE opts;
36
+ VALUE id;
37
+ long mod;
38
+
39
+ VALUE postqueuing_policy;
40
+ VALUE exports_queue;
41
+ VALUE key_proc;
42
+
43
+ VALUE *exports;
44
+ long *counter;
45
+ } fairy_p_xgroup_by_t;
46
+
47
+ #define GetFairyPXGroupByPtr(obj, tobj) \
48
+ TypedData_Get_Struct((obj), fairy_p_xgroup_by_t, &fairy_p_xgroup_by_data_type, (tobj))
49
+
50
+ static void
51
+ fairy_p_xgroup_by_mark(void *ptr)
52
+ {
53
+ fairy_p_xgroup_by_t *gb = (fairy_p_xgroup_by_t*)ptr;
54
+
55
+ rb_gc_mark(gb->bjob);
56
+ rb_gc_mark(gb->input);
57
+ rb_gc_mark(gb->opts);
58
+ rb_gc_mark(gb->id);
59
+
60
+ rb_gc_mark(gb->postqueuing_policy);
61
+ rb_gc_mark(gb->exports_queue);
62
+ rb_gc_mark(gb->key_proc);
63
+
64
+ if (gb->exports) {
65
+ int i;
66
+ for (i = 0; i < gb->mod; i++) {
67
+ if (!NIL_P(gb->exports[i])) {
68
+ rb_gc_mark(gb->exports[i]);
69
+ }
70
+ }
71
+ }
72
+ }
73
+
74
+ static void
75
+ fairy_p_xgroup_by_free(void *ptr)
76
+ {
77
+ fairy_p_xgroup_by_t *gb = (fairy_p_xgroup_by_t*)ptr;
78
+
79
+ if(gb->exports) {
80
+ ruby_xfree(gb->exports);
81
+ }
82
+ if (gb->counter) {
83
+ ruby_xfree(gb->counter);
84
+ }
85
+ ruby_xfree(ptr);
86
+ }
87
+
88
+ static size_t
89
+ fairy_p_xgroup_by_memsize(const void *ptr)
90
+ {
91
+ fairy_p_xgroup_by_t *gb = (fairy_p_xgroup_by_t*)ptr;
92
+
93
+ return ptr ? sizeof(fairy_p_xgroup_by_t) +(sizeof(VALUE) + sizeof(long)) * gb->mod : 0;
94
+ }
95
+
96
+
97
+ #ifdef HAVE_RB_DATA_TYPE_T_FUNCTION
98
+ static const rb_data_type_t fairy_p_xgroup_by_data_type = {
99
+ "fairy_p_xgroup_by",
100
+ {fairy_p_xgroup_by_mark, fairy_p_xgroup_by_free, fairy_p_xgroup_by_memsize,},
101
+ };
102
+ #else
103
+ static const rb_data_type_t fairy_p_xgroup_by_data_type = {
104
+ "fairy_p_xgroup_by",
105
+ fairy_p_xgroup_by_mark,
106
+ fairy_p_xgroup_by_free,
107
+ fairy_p_xgroup_by_memsize,
108
+ };
109
+ #endif
110
+
111
+
112
+ static VALUE
113
+ fairy_p_xgroup_by_alloc(VALUE klass)
114
+ {
115
+ VALUE volatile obj;
116
+ fairy_p_xgroup_by_t *gb;
117
+
118
+ obj = TypedData_Make_Struct(klass, fairy_p_xgroup_by_t, &fairy_p_xgroup_by_data_type, gb);
119
+
120
+ gb->bjob = Qnil;
121
+ gb->input = Qnil;
122
+ gb->opts = Qnil;
123
+ gb->id = Qnil;
124
+
125
+ gb->mod = 0;
126
+
127
+ gb->postqueuing_policy = Qnil;
128
+ gb->exports_queue = Qnil;
129
+ gb->key_proc = Qnil;
130
+
131
+ gb->exports = NULL;
132
+ gb->counter = NULL;
133
+
134
+ return obj;
135
+ }
136
+
137
+ static VALUE
138
+ rb_fairy_p_xgroup_by_initialize(VALUE self, VALUE id, VALUE ntask, VALUE bjob, VALUE opts, VALUE block_source)
139
+ {
140
+ fairy_p_xgroup_by_t *gb;
141
+ VALUE argv[] = {
142
+ id,
143
+ ntask,
144
+ bjob,
145
+ opts,
146
+ block_source,
147
+ };
148
+
149
+ GetFairyPXGroupByPtr(self, gb);
150
+ rb_call_super(5, argv);
151
+
152
+ gb->bjob = rb_iv_get(self, "@bjob");
153
+ gb->opts = rb_iv_get(self, "@opts");
154
+ gb->id = rb_iv_get(self, "@id");
155
+
156
+ gb->postqueuing_policy = rb_fairy_conf(NULL, gb->opts, "postqueuing_policy");
157
+ gb->mod = NUM2LONG(rb_fairy_conf("GROUP_BY_NO_SEGMENT", gb->opts, "no_segment"));
158
+ gb->exports_queue = rb_iv_get(self, "@exports_queue");
159
+
160
+ gb->exports = ALLOC_N(VALUE, gb->mod);
161
+ {
162
+ long i;
163
+ for (i = 0; i < gb->mod; i++) {
164
+ gb->exports[i] = Qnil;
165
+ }
166
+ }
167
+
168
+ gb->counter = ALLOC_N(long, gb->mod);
169
+ {
170
+ long i;
171
+ for (i = 0; i < gb->mod; i++) {
172
+ gb->counter[i] = 0;
173
+ }
174
+ }
175
+
176
+ return self;
177
+ }
178
+
179
+ static VALUE
180
+ rb_fairy_p_xgroup_by_add_export(VALUE self, long key, VALUE export)
181
+ {
182
+ fairy_p_xgroup_by_t *gb;
183
+ GetFairyPXGroupByPtr(self, gb);
184
+
185
+ gb->exports[key] = export;
186
+ rb_funcall(gb->bjob, id_add_exports, 3, LONG2NUM(key), export, self);
187
+ return self;
188
+ }
189
+
190
+ static VALUE start_block(VALUE, VALUE, int, VALUE*);
191
+ static VALUE start_main(VALUE);
192
+ static VALUE start_main_i(VALUE, VALUE, int, VALUE*);
193
+
194
+ static VALUE
195
+ rb_fairy_p_xgroup_by_start_export(VALUE self)
196
+ {
197
+ fairy_p_xgroup_by_t *gb;
198
+ GetFairyPXGroupByPtr(self, gb);
199
+
200
+ rb_fairy_debug(self, "START_EXPORT");
201
+
202
+ gb->key_proc = rb_funcall(self, id_init_key_proc, 0);
203
+
204
+ return rb_block_call(self, id_start, 0, 0, start_block, self);
205
+ }
206
+
207
+ static VALUE
208
+ start_block(VALUE e, VALUE self, int argc, VALUE *argv)
209
+ {
210
+ VALUE result;
211
+ fairy_p_xgroup_by_t *gb;
212
+ int state;
213
+
214
+ GetFairyPXGroupByPtr(self, gb);
215
+
216
+ gb->input = rb_iv_get(self, "@input");
217
+
218
+ result = rb_protect(start_main, self, &state);
219
+ {
220
+ long i;
221
+ rb_xthread_queue_push(gb->exports_queue, Qnil);
222
+ for (i = 0; i < gb->mod; i++) {
223
+ static char buf[256];
224
+
225
+ snprintf(buf, sizeof(buf), "G0 %d => %d", i, gb->counter[i]);
226
+ rb_fairy_debug(self, buf);
227
+
228
+ if (!NIL_P(gb->exports[i])) {
229
+ rb_funcall(gb->exports[i], id_push, 1, rb_FairyEOS);
230
+ }
231
+ }
232
+
233
+ if (state) {
234
+ rb_fairy_debug_exception(self);
235
+ rb_jump_tag(state);
236
+ }
237
+ }
238
+ return result;
239
+ }
240
+
241
+ static VALUE
242
+ start_main(VALUE self)
243
+ {
244
+ fairy_p_xgroup_by_t *gb;
245
+
246
+ GetFairyPXGroupByPtr(self, gb);
247
+ return rb_block_call(gb->input, id_each, 0, 0, start_main_i, self);
248
+
249
+ }
250
+
251
+ static VALUE
252
+ start_main_i(VALUE e, VALUE self, int argc, VALUE *argv)
253
+ {
254
+ fairy_p_xgroup_by_t *gb;
255
+ VALUE key;
256
+ unsigned int hashkey;
257
+ volatile VALUE export;
258
+
259
+ GetFairyPXGroupByPtr(self, gb);
260
+ if (CLASS_OF(gb->key_proc) == rb_cProc) {
261
+ key = rb_proc_call(gb->key_proc, rb_ary_new3(1, e));
262
+ }
263
+ else {
264
+ key = rb_funcall(gb->key_proc, id_yield, 1, e);
265
+ }
266
+
267
+ if (CLASS_OF(key) == rb_cFairyImportCTLTOKEN_NULLVALUE) {
268
+ return self;
269
+ }
270
+
271
+ hashkey = rb_fairy_simple_hash_uint(rb_mFairySimpleHash, key) % gb->mod;
272
+ export = gb->exports[hashkey];
273
+ if (NIL_P(export)) {
274
+ export = rb_class_new_instance(1, &gb->postqueuing_policy, rb_cFairyExport);
275
+ rb_funcall(export, id_set_njob_id, 1, gb->id);
276
+ rb_funcall(export, id_add_key, 1, INT2FIX(hashkey));
277
+ rb_fairy_p_xgroup_by_add_export(self, hashkey, export);
278
+ }
279
+ rb_funcall(export, id_push, 1, e);
280
+ gb->counter[hashkey]++;
281
+ return self;
282
+ }
283
+
284
+ #define xpf(name) fairy_pxg_postfilter##name
285
+ #define rb_xpf(name) rb_fairy_pxg_postfilter##name
286
+
287
+ typedef struct rb_xpf(_struct)
288
+ {
289
+ VALUE opts;
290
+ VALUE buffering_policy;
291
+ VALUE buffering_class;
292
+ VALUE key_proc;
293
+ VALUE key_value_buffer;
294
+ } xpf(_t);
295
+
296
+ #define GetFairyPXGPostFilterPtr(obj, tobj) \
297
+ TypedData_Get_Struct((obj), xpf(_t), &xpf(_data_type), (tobj))
298
+
299
+ #define GetFXPFPtr(obj, tobj) \
300
+ GetFairyPXGPostFilterPtr(obj, tobj)
301
+
302
+
303
+ static void
304
+ xpf(_mark)(void *ptr)
305
+ {
306
+ xpf(_t) *pf = (xpf(_t)*)ptr;
307
+ rb_gc_mark(pf->opts);
308
+ rb_gc_mark(pf->buffering_policy);
309
+ rb_gc_mark(pf->buffering_class);
310
+ rb_gc_mark(pf->key_proc);
311
+ rb_gc_mark(pf->key_value_buffer);
312
+ }
313
+
314
+ static void
315
+ xpf(_free)(void *ptr)
316
+ {
317
+ ruby_xfree(ptr);
318
+ }
319
+
320
+ static size_t
321
+ xpf(_memsize)(const void *ptr)
322
+ {
323
+ xpf(_t) *pf = (xpf(_t)*)ptr;
324
+
325
+ return ptr ? sizeof(xpf(_t)) : 0;
326
+ }
327
+
328
+ #ifdef HAVE_RB_DATA_TYPE_T_FUNCTION
329
+ static const rb_data_type_t xpf(_data_type) = {
330
+ "fairy_p_xgroup_by_postfiter",
331
+ {xpf(_mark), xpf(_free), xpf(_memsize),},
332
+ };
333
+ #else
334
+ static const rb_data_type_t xpf(_data_type) = {
335
+ "fairy_p_xgroup_by_postfiter",
336
+ xpf(_mark),
337
+ xpf(_free),
338
+ xpf(_memsize),
339
+ };
340
+ #endif
341
+
342
+ static VALUE
343
+ xpf(_alloc)(VALUE klass)
344
+ {
345
+ VALUE volatile obj;
346
+ xpf(_t) *pf;
347
+
348
+ obj = TypedData_Make_Struct(klass, xpf(_t), &xpf(_data_type), pf);
349
+
350
+ pf->opts = Qnil;
351
+ pf->buffering_policy = Qnil;
352
+ pf->buffering_class = Qnil;
353
+
354
+ pf->key_proc = Qnil;
355
+ pf->key_value_buffer = Qnil;
356
+
357
+ return obj;
358
+ }
359
+
360
+ static VALUE
361
+ rb_xpf(_initialize)(VALUE self, VALUE id, VALUE ntask, VALUE bjob, VALUE opts, VALUE block_source)
362
+ {
363
+ xpf(_t) *pf;
364
+ VALUE buf_class_name;
365
+
366
+ VALUE argv[] = {
367
+ id, ntask, bjob, opts, block_source,
368
+ };
369
+ GetFXPFPtr(self, pf);
370
+ rb_call_super(5, argv);
371
+
372
+ pf->opts = rb_iv_get(self, "@opts");
373
+ pf->buffering_policy = rb_fairy_conf("XGROUP_BY_BUFFERING_POLICY",
374
+ pf->opts, "buffering_policy");
375
+ buf_class_name = rb_funcall(pf->buffering_policy, id_aref, 1,
376
+ ID2SYM(rb_intern("buffering_class")));
377
+ if (NIL_P(buf_class_name)) {
378
+ VALUE policy = rb_fairy_conf("XGROUP_BY_BUFFERING_POLICY", Qnil, NULL);
379
+ buf_class_name = rb_hash_aref(policy, ID2SYM(rb_intern("buffering_class")));
380
+ }
381
+
382
+ rb_fairy_debug_p2(self, "Buffering Class", buf_class_name);
383
+ pf->buffering_class = rb_const_get(rb_cFairyPXGroupBy, SYM2ID(buf_class_name));
384
+ return self;
385
+ }
386
+
387
+ static VALUE
388
+ rb_xpf(_basic_each_input)(VALUE e, VALUE self, int argc, VALUE *argv)
389
+ {
390
+ xpf(_t) *pf;
391
+ GetFXPFPtr(self, pf);
392
+ rb_funcall(pf->key_value_buffer, id_push, 1, e);
393
+ return self;
394
+ }
395
+
396
+ static VALUE
397
+ rb_xpf(_basic_each_kvb)(VALUE kvs, VALUE self, int argc, VALUE *argv)
398
+ {
399
+ rb_yield(kvs);
400
+ }
401
+
402
+ static VALUE
403
+ rb_xpf(_basic_each)(VALUE self)
404
+ {
405
+ xpf(_t) *pf;
406
+ VALUE input;
407
+ VALUE arg[2];
408
+
409
+ GetFXPFPtr(self, pf);
410
+
411
+ arg[0] = self;
412
+ arg[1] = pf->buffering_policy;
413
+
414
+ pf->key_value_buffer = rb_class_new_instance(2, arg, pf->buffering_class);
415
+ pf->key_proc = rb_funcall(self, id_init_key_proc, 0);
416
+
417
+ input = rb_iv_get(self, "@input");
418
+
419
+ rb_block_call(input, id_each, 0, 0, rb_xpf(_basic_each_input), self);
420
+ rb_block_call(pf->key_value_buffer, id_each, 0, 0, rb_xpf(_basic_each_kvb), self);
421
+ pf->key_value_buffer = Qnil;
422
+ return self;
423
+ }
424
+
425
+ void
426
+ Init_p_xgroup_by()
427
+ {
428
+ VALUE pxg;
429
+ VALUE xpf;
430
+
431
+ if (!rb_const_defined(rb_mFairy, rb_intern("PGroupBy"))) {
432
+ return;
433
+ }
434
+ rb_cFairyPGroupBy = rb_const_get(rb_mFairy, rb_intern("PGroupBy"));
435
+
436
+ id_init_key_proc = rb_intern("init_key_proc");
437
+ id_aref = rb_intern("[]");
438
+ id_yield = rb_intern("yield");
439
+ id_each = rb_intern("each");
440
+ id_push = rb_intern("push");
441
+ id_add_exports = rb_intern("add_exports");
442
+ id_start = rb_intern("start");
443
+ id_set_njob_id = rb_intern("njob_id=");
444
+ id_add_key = rb_intern("add_key");
445
+
446
+ rb_cFairyPXGroupBy = rb_define_class_under(rb_mFairy, "PXGroupBy", rb_cFairyPGroupBy);
447
+ pxg = rb_cFairyPXGroupBy;
448
+ rb_define_alloc_func(pxg, fairy_p_xgroup_by_alloc);
449
+ rb_define_method(pxg, "initialize", rb_fairy_p_xgroup_by_initialize, 5);
450
+ rb_define_method(pxg, "add_export", rb_fairy_p_xgroup_by_add_export, 2);
451
+ rb_define_method(pxg, "start_export", rb_fairy_p_xgroup_by_start_export, 0);
452
+
453
+ rb_fairy_processor_def_export(rb_cFairyPXGroupBy);
454
+
455
+ PPostFilter = rb_const_get(rb_cFairyPGroupBy, rb_intern("PPostFilter"));
456
+
457
+ xpf = rb_define_class_under(rb_cFairyPXGroupBy,
458
+ "PPostFilter", PPostFilter);
459
+ rb_cFairyPXGPostfilter = xpf;
460
+ rb_define_alloc_func(xpf, xpf(_alloc));
461
+ rb_define_method(xpf, "initialize", rb_xpf(_initialize), 5);
462
+ rb_define_method(xpf, "basic_each", rb_xpf(_basic_each), 0);
463
+
464
+ rb_fairy_processor_def_export(xpf);
465
+
466
+
467
+ }