fairy 0.6.0 → 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/Makefile +1 -0
  2. data/bin/fairy +35 -5
  3. data/ext/extconf.rb +3 -0
  4. data/ext/fairy.c +180 -0
  5. data/ext/fairy.h +94 -0
  6. data/ext/fiber_mon.h +32 -0
  7. data/ext/fixnum-buffer.c +483 -0
  8. data/ext/p-group-by.c +529 -0
  9. data/ext/p-xgroup-by.c +467 -0
  10. data/ext/simple-hash.c +44 -0
  11. data/ext/string-buffer.c +286 -0
  12. data/ext/xmarshaled-queue.c +699 -0
  13. data/ext/xsized-queue.c +528 -0
  14. data/ext/xthread.h +65 -0
  15. data/fairy.gemspec +5 -2
  16. data/lib/fairy.rb +10 -1
  17. data/lib/fairy/client/group-by.rb +57 -2
  18. data/lib/fairy/client/here.rb +2 -1
  19. data/lib/fairy/controller.rb +25 -4
  20. data/lib/fairy/master.rb +17 -3
  21. data/lib/fairy/master/c-basic-group-by.rb +4 -2
  22. data/lib/fairy/master/c-cat.rb +3 -2
  23. data/lib/fairy/master/c-direct-product.rb +5 -3
  24. data/lib/fairy/master/c-filter.rb +5 -3
  25. data/lib/fairy/master/c-group-by.rb +13 -0
  26. data/lib/fairy/master/c-junction.rb +3 -2
  27. data/lib/fairy/master/c-seg-join.rb +3 -1
  28. data/lib/fairy/master/c-seg-shuffle.rb +3 -2
  29. data/lib/fairy/master/c-seg-split.rb +1 -1
  30. data/lib/fairy/master/c-seg-zip.rb +3 -1
  31. data/lib/fairy/master/c-sort.rb +7 -2
  32. data/lib/fairy/master/c-wc.rb +5 -3
  33. data/lib/fairy/node.rb +13 -2
  34. data/lib/fairy/node/p-barrier.rb +1 -1
  35. data/lib/fairy/node/p-basic-group-by.rb +22 -12
  36. data/lib/fairy/node/p-direct-product.rb +4 -2
  37. data/lib/fairy/node/p-filter.rb +8 -7
  38. data/lib/fairy/node/p-find.rb +2 -1
  39. data/lib/fairy/node/p-group-by.rb +17 -6
  40. data/lib/fairy/node/p-inject.rb +3 -2
  41. data/lib/fairy/node/p-output-file.rb +1 -1
  42. data/lib/fairy/node/p-seg-join.rb +2 -1
  43. data/lib/fairy/node/p-seg-zip.rb +2 -1
  44. data/lib/fairy/node/p-single-exportable.rb +3 -1
  45. data/lib/fairy/node/p-sort.rb +4 -2
  46. data/lib/fairy/node/p-task.rb +1 -1
  47. data/lib/fairy/node/p-wc.rb +5 -2
  48. data/lib/fairy/processor.rb +25 -18
  49. data/lib/fairy/share/block-source.rb +12 -2
  50. data/lib/fairy/share/conf.rb +35 -5
  51. data/lib/fairy/share/hash-simple-hash.rb +1 -1
  52. data/lib/fairy/share/log.rb +11 -4
  53. data/lib/fairy/share/pool-dictionary.rb +2 -1
  54. data/lib/fairy/share/port-marshaled-queue.rb +8 -1
  55. data/lib/fairy/share/port.rb +55 -45
  56. data/lib/fairy/share/reference.rb +2 -1
  57. data/lib/fairy/share/varray.rb +3 -1
  58. data/lib/fairy/share/vfile.rb +4 -2
  59. data/lib/fairy/version.rb +1 -1
  60. data/sample/sort.rb +69 -3
  61. data/spec/fairy8_spec.rb +1 -1
  62. data/test/testc.rb +380 -2
  63. data/tools/cap_recipe/Capfile +3 -3
  64. data/tools/fairy_conf_wizard.rb +375 -0
  65. data/tools/fairy_perf_graph.rb +15 -3
  66. data/tools/git-tag +1 -0
  67. data/tools/log-analysis.rb +59 -11
  68. metadata +33 -34
  69. data/ext/simple_hash/extconf.rb +0 -4
  70. data/ext/simple_hash/simple_hash.c +0 -42
data/ext/p-xgroup-by.c ADDED
@@ -0,0 +1,467 @@
1
+ /**********************************************************************
2
+
3
+ p-xgroup-by.c -
4
+ Copyright (C) 2007-2011 Rakuten, Inc.
5
+
6
+ **********************************************************************/
7
+
8
+ #include "ruby.h"
9
+ #include "xthread.h"
10
+
11
+ #include "fairy.h"
12
+
13
+ static ID id_init_key_proc;
14
+ static ID id_aref;
15
+ static ID id_yield;
16
+ static ID id_each;
17
+ static ID id_push;
18
+ static ID id_add_exports;
19
+ static ID id_start;
20
+ static ID id_set_njob_id;
21
+ static ID id_add_key;
22
+
23
+ static ID id_close;
24
+
25
+ static VALUE rb_cFairyPGroupBy;
26
+ static VALUE PPostFilter;
27
+
28
+ VALUE rb_cFairyPXGroupBy;
29
+ VALUE rb_cFairyPXGPostfilter;
30
+
31
+ typedef struct rb_fairy_p_xgroup_by_struct
32
+ {
33
+ VALUE bjob;
34
+ VALUE input;
35
+ VALUE opts;
36
+ VALUE id;
37
+ long mod;
38
+
39
+ VALUE postqueuing_policy;
40
+ VALUE exports_queue;
41
+ VALUE key_proc;
42
+
43
+ VALUE *exports;
44
+ long *counter;
45
+ } fairy_p_xgroup_by_t;
46
+
47
+ #define GetFairyPXGroupByPtr(obj, tobj) \
48
+ TypedData_Get_Struct((obj), fairy_p_xgroup_by_t, &fairy_p_xgroup_by_data_type, (tobj))
49
+
50
+ static void
51
+ fairy_p_xgroup_by_mark(void *ptr)
52
+ {
53
+ fairy_p_xgroup_by_t *gb = (fairy_p_xgroup_by_t*)ptr;
54
+
55
+ rb_gc_mark(gb->bjob);
56
+ rb_gc_mark(gb->input);
57
+ rb_gc_mark(gb->opts);
58
+ rb_gc_mark(gb->id);
59
+
60
+ rb_gc_mark(gb->postqueuing_policy);
61
+ rb_gc_mark(gb->exports_queue);
62
+ rb_gc_mark(gb->key_proc);
63
+
64
+ if (gb->exports) {
65
+ int i;
66
+ for (i = 0; i < gb->mod; i++) {
67
+ if (!NIL_P(gb->exports[i])) {
68
+ rb_gc_mark(gb->exports[i]);
69
+ }
70
+ }
71
+ }
72
+ }
73
+
74
+ static void
75
+ fairy_p_xgroup_by_free(void *ptr)
76
+ {
77
+ fairy_p_xgroup_by_t *gb = (fairy_p_xgroup_by_t*)ptr;
78
+
79
+ if(gb->exports) {
80
+ ruby_xfree(gb->exports);
81
+ }
82
+ if (gb->counter) {
83
+ ruby_xfree(gb->counter);
84
+ }
85
+ ruby_xfree(ptr);
86
+ }
87
+
88
+ static size_t
89
+ fairy_p_xgroup_by_memsize(const void *ptr)
90
+ {
91
+ fairy_p_xgroup_by_t *gb = (fairy_p_xgroup_by_t*)ptr;
92
+
93
+ return ptr ? sizeof(fairy_p_xgroup_by_t) +(sizeof(VALUE) + sizeof(long)) * gb->mod : 0;
94
+ }
95
+
96
+
97
+ #ifdef HAVE_RB_DATA_TYPE_T_FUNCTION
98
+ static const rb_data_type_t fairy_p_xgroup_by_data_type = {
99
+ "fairy_p_xgroup_by",
100
+ {fairy_p_xgroup_by_mark, fairy_p_xgroup_by_free, fairy_p_xgroup_by_memsize,},
101
+ };
102
+ #else
103
+ static const rb_data_type_t fairy_p_xgroup_by_data_type = {
104
+ "fairy_p_xgroup_by",
105
+ fairy_p_xgroup_by_mark,
106
+ fairy_p_xgroup_by_free,
107
+ fairy_p_xgroup_by_memsize,
108
+ };
109
+ #endif
110
+
111
+
112
+ static VALUE
113
+ fairy_p_xgroup_by_alloc(VALUE klass)
114
+ {
115
+ VALUE volatile obj;
116
+ fairy_p_xgroup_by_t *gb;
117
+
118
+ obj = TypedData_Make_Struct(klass, fairy_p_xgroup_by_t, &fairy_p_xgroup_by_data_type, gb);
119
+
120
+ gb->bjob = Qnil;
121
+ gb->input = Qnil;
122
+ gb->opts = Qnil;
123
+ gb->id = Qnil;
124
+
125
+ gb->mod = 0;
126
+
127
+ gb->postqueuing_policy = Qnil;
128
+ gb->exports_queue = Qnil;
129
+ gb->key_proc = Qnil;
130
+
131
+ gb->exports = NULL;
132
+ gb->counter = NULL;
133
+
134
+ return obj;
135
+ }
136
+
137
+ static VALUE
138
+ rb_fairy_p_xgroup_by_initialize(VALUE self, VALUE id, VALUE ntask, VALUE bjob, VALUE opts, VALUE block_source)
139
+ {
140
+ fairy_p_xgroup_by_t *gb;
141
+ VALUE argv[] = {
142
+ id,
143
+ ntask,
144
+ bjob,
145
+ opts,
146
+ block_source,
147
+ };
148
+
149
+ GetFairyPXGroupByPtr(self, gb);
150
+ rb_call_super(5, argv);
151
+
152
+ gb->bjob = rb_iv_get(self, "@bjob");
153
+ gb->opts = rb_iv_get(self, "@opts");
154
+ gb->id = rb_iv_get(self, "@id");
155
+
156
+ gb->postqueuing_policy = rb_fairy_conf(NULL, gb->opts, "postqueuing_policy");
157
+ gb->mod = NUM2LONG(rb_fairy_conf("GROUP_BY_NO_SEGMENT", gb->opts, "no_segment"));
158
+ gb->exports_queue = rb_iv_get(self, "@exports_queue");
159
+
160
+ gb->exports = ALLOC_N(VALUE, gb->mod);
161
+ {
162
+ long i;
163
+ for (i = 0; i < gb->mod; i++) {
164
+ gb->exports[i] = Qnil;
165
+ }
166
+ }
167
+
168
+ gb->counter = ALLOC_N(long, gb->mod);
169
+ {
170
+ long i;
171
+ for (i = 0; i < gb->mod; i++) {
172
+ gb->counter[i] = 0;
173
+ }
174
+ }
175
+
176
+ return self;
177
+ }
178
+
179
+ static VALUE
180
+ rb_fairy_p_xgroup_by_add_export(VALUE self, long key, VALUE export)
181
+ {
182
+ fairy_p_xgroup_by_t *gb;
183
+ GetFairyPXGroupByPtr(self, gb);
184
+
185
+ gb->exports[key] = export;
186
+ rb_funcall(gb->bjob, id_add_exports, 3, LONG2NUM(key), export, self);
187
+ return self;
188
+ }
189
+
190
+ static VALUE start_block(VALUE, VALUE, int, VALUE*);
191
+ static VALUE start_main(VALUE);
192
+ static VALUE start_main_i(VALUE, VALUE, int, VALUE*);
193
+
194
+ static VALUE
195
+ rb_fairy_p_xgroup_by_start_export(VALUE self)
196
+ {
197
+ fairy_p_xgroup_by_t *gb;
198
+ GetFairyPXGroupByPtr(self, gb);
199
+
200
+ rb_fairy_debug(self, "START_EXPORT");
201
+
202
+ gb->key_proc = rb_funcall(self, id_init_key_proc, 0);
203
+
204
+ return rb_block_call(self, id_start, 0, 0, start_block, self);
205
+ }
206
+
207
+ static VALUE
208
+ start_block(VALUE e, VALUE self, int argc, VALUE *argv)
209
+ {
210
+ VALUE result;
211
+ fairy_p_xgroup_by_t *gb;
212
+ int state;
213
+
214
+ GetFairyPXGroupByPtr(self, gb);
215
+
216
+ gb->input = rb_iv_get(self, "@input");
217
+
218
+ result = rb_protect(start_main, self, &state);
219
+ {
220
+ long i;
221
+ rb_xthread_queue_push(gb->exports_queue, Qnil);
222
+ for (i = 0; i < gb->mod; i++) {
223
+ static char buf[256];
224
+
225
+ snprintf(buf, sizeof(buf), "G0 %d => %d", i, gb->counter[i]);
226
+ rb_fairy_debug(self, buf);
227
+
228
+ if (!NIL_P(gb->exports[i])) {
229
+ rb_funcall(gb->exports[i], id_push, 1, rb_FairyEOS);
230
+ }
231
+ }
232
+
233
+ if (state) {
234
+ rb_fairy_debug_exception(self);
235
+ rb_jump_tag(state);
236
+ }
237
+ }
238
+ return result;
239
+ }
240
+
241
+ static VALUE
242
+ start_main(VALUE self)
243
+ {
244
+ fairy_p_xgroup_by_t *gb;
245
+
246
+ GetFairyPXGroupByPtr(self, gb);
247
+ return rb_block_call(gb->input, id_each, 0, 0, start_main_i, self);
248
+
249
+ }
250
+
251
+ static VALUE
252
+ start_main_i(VALUE e, VALUE self, int argc, VALUE *argv)
253
+ {
254
+ fairy_p_xgroup_by_t *gb;
255
+ VALUE key;
256
+ unsigned int hashkey;
257
+ volatile VALUE export;
258
+
259
+ GetFairyPXGroupByPtr(self, gb);
260
+ if (CLASS_OF(gb->key_proc) == rb_cProc) {
261
+ key = rb_proc_call(gb->key_proc, rb_ary_new3(1, e));
262
+ }
263
+ else {
264
+ key = rb_funcall(gb->key_proc, id_yield, 1, e);
265
+ }
266
+
267
+ if (CLASS_OF(key) == rb_cFairyImportCTLTOKEN_NULLVALUE) {
268
+ return self;
269
+ }
270
+
271
+ hashkey = rb_fairy_simple_hash_uint(rb_mFairySimpleHash, key) % gb->mod;
272
+ export = gb->exports[hashkey];
273
+ if (NIL_P(export)) {
274
+ export = rb_class_new_instance(1, &gb->postqueuing_policy, rb_cFairyExport);
275
+ rb_funcall(export, id_set_njob_id, 1, gb->id);
276
+ rb_funcall(export, id_add_key, 1, INT2FIX(hashkey));
277
+ rb_fairy_p_xgroup_by_add_export(self, hashkey, export);
278
+ }
279
+ rb_funcall(export, id_push, 1, e);
280
+ gb->counter[hashkey]++;
281
+ return self;
282
+ }
283
+
284
+ #define xpf(name) fairy_pxg_postfilter##name
285
+ #define rb_xpf(name) rb_fairy_pxg_postfilter##name
286
+
287
+ typedef struct rb_xpf(_struct)
288
+ {
289
+ VALUE opts;
290
+ VALUE buffering_policy;
291
+ VALUE buffering_class;
292
+ VALUE key_proc;
293
+ VALUE key_value_buffer;
294
+ } xpf(_t);
295
+
296
+ #define GetFairyPXGPostFilterPtr(obj, tobj) \
297
+ TypedData_Get_Struct((obj), xpf(_t), &xpf(_data_type), (tobj))
298
+
299
+ #define GetFXPFPtr(obj, tobj) \
300
+ GetFairyPXGPostFilterPtr(obj, tobj)
301
+
302
+
303
+ static void
304
+ xpf(_mark)(void *ptr)
305
+ {
306
+ xpf(_t) *pf = (xpf(_t)*)ptr;
307
+ rb_gc_mark(pf->opts);
308
+ rb_gc_mark(pf->buffering_policy);
309
+ rb_gc_mark(pf->buffering_class);
310
+ rb_gc_mark(pf->key_proc);
311
+ rb_gc_mark(pf->key_value_buffer);
312
+ }
313
+
314
+ static void
315
+ xpf(_free)(void *ptr)
316
+ {
317
+ ruby_xfree(ptr);
318
+ }
319
+
320
+ static size_t
321
+ xpf(_memsize)(const void *ptr)
322
+ {
323
+ xpf(_t) *pf = (xpf(_t)*)ptr;
324
+
325
+ return ptr ? sizeof(xpf(_t)) : 0;
326
+ }
327
+
328
+ #ifdef HAVE_RB_DATA_TYPE_T_FUNCTION
329
+ static const rb_data_type_t xpf(_data_type) = {
330
+ "fairy_p_xgroup_by_postfiter",
331
+ {xpf(_mark), xpf(_free), xpf(_memsize),},
332
+ };
333
+ #else
334
+ static const rb_data_type_t xpf(_data_type) = {
335
+ "fairy_p_xgroup_by_postfiter",
336
+ xpf(_mark),
337
+ xpf(_free),
338
+ xpf(_memsize),
339
+ };
340
+ #endif
341
+
342
+ static VALUE
343
+ xpf(_alloc)(VALUE klass)
344
+ {
345
+ VALUE volatile obj;
346
+ xpf(_t) *pf;
347
+
348
+ obj = TypedData_Make_Struct(klass, xpf(_t), &xpf(_data_type), pf);
349
+
350
+ pf->opts = Qnil;
351
+ pf->buffering_policy = Qnil;
352
+ pf->buffering_class = Qnil;
353
+
354
+ pf->key_proc = Qnil;
355
+ pf->key_value_buffer = Qnil;
356
+
357
+ return obj;
358
+ }
359
+
360
+ static VALUE
361
+ rb_xpf(_initialize)(VALUE self, VALUE id, VALUE ntask, VALUE bjob, VALUE opts, VALUE block_source)
362
+ {
363
+ xpf(_t) *pf;
364
+ VALUE buf_class_name;
365
+
366
+ VALUE argv[] = {
367
+ id, ntask, bjob, opts, block_source,
368
+ };
369
+ GetFXPFPtr(self, pf);
370
+ rb_call_super(5, argv);
371
+
372
+ pf->opts = rb_iv_get(self, "@opts");
373
+ pf->buffering_policy = rb_fairy_conf("XGROUP_BY_BUFFERING_POLICY",
374
+ pf->opts, "buffering_policy");
375
+ buf_class_name = rb_funcall(pf->buffering_policy, id_aref, 1,
376
+ ID2SYM(rb_intern("buffering_class")));
377
+ if (NIL_P(buf_class_name)) {
378
+ VALUE policy = rb_fairy_conf("XGROUP_BY_BUFFERING_POLICY", Qnil, NULL);
379
+ buf_class_name = rb_hash_aref(policy, ID2SYM(rb_intern("buffering_class")));
380
+ }
381
+
382
+ rb_fairy_debug_p2(self, "Buffering Class", buf_class_name);
383
+ pf->buffering_class = rb_const_get(rb_cFairyPXGroupBy, SYM2ID(buf_class_name));
384
+ return self;
385
+ }
386
+
387
+ static VALUE
388
+ rb_xpf(_basic_each_input)(VALUE e, VALUE self, int argc, VALUE *argv)
389
+ {
390
+ xpf(_t) *pf;
391
+ GetFXPFPtr(self, pf);
392
+ rb_funcall(pf->key_value_buffer, id_push, 1, e);
393
+ return self;
394
+ }
395
+
396
+ static VALUE
397
+ rb_xpf(_basic_each_kvb)(VALUE kvs, VALUE self, int argc, VALUE *argv)
398
+ {
399
+ rb_yield(kvs);
400
+ }
401
+
402
+ static VALUE
403
+ rb_xpf(_basic_each)(VALUE self)
404
+ {
405
+ xpf(_t) *pf;
406
+ VALUE input;
407
+ VALUE arg[2];
408
+
409
+ GetFXPFPtr(self, pf);
410
+
411
+ arg[0] = self;
412
+ arg[1] = pf->buffering_policy;
413
+
414
+ pf->key_value_buffer = rb_class_new_instance(2, arg, pf->buffering_class);
415
+ pf->key_proc = rb_funcall(self, id_init_key_proc, 0);
416
+
417
+ input = rb_iv_get(self, "@input");
418
+
419
+ rb_block_call(input, id_each, 0, 0, rb_xpf(_basic_each_input), self);
420
+ rb_block_call(pf->key_value_buffer, id_each, 0, 0, rb_xpf(_basic_each_kvb), self);
421
+ pf->key_value_buffer = Qnil;
422
+ return self;
423
+ }
424
+
425
+ void
426
+ Init_p_xgroup_by()
427
+ {
428
+ VALUE pxg;
429
+ VALUE xpf;
430
+
431
+ if (!rb_const_defined(rb_mFairy, rb_intern("PGroupBy"))) {
432
+ return;
433
+ }
434
+ rb_cFairyPGroupBy = rb_const_get(rb_mFairy, rb_intern("PGroupBy"));
435
+
436
+ id_init_key_proc = rb_intern("init_key_proc");
437
+ id_aref = rb_intern("[]");
438
+ id_yield = rb_intern("yield");
439
+ id_each = rb_intern("each");
440
+ id_push = rb_intern("push");
441
+ id_add_exports = rb_intern("add_exports");
442
+ id_start = rb_intern("start");
443
+ id_set_njob_id = rb_intern("njob_id=");
444
+ id_add_key = rb_intern("add_key");
445
+
446
+ rb_cFairyPXGroupBy = rb_define_class_under(rb_mFairy, "PXGroupBy", rb_cFairyPGroupBy);
447
+ pxg = rb_cFairyPXGroupBy;
448
+ rb_define_alloc_func(pxg, fairy_p_xgroup_by_alloc);
449
+ rb_define_method(pxg, "initialize", rb_fairy_p_xgroup_by_initialize, 5);
450
+ rb_define_method(pxg, "add_export", rb_fairy_p_xgroup_by_add_export, 2);
451
+ rb_define_method(pxg, "start_export", rb_fairy_p_xgroup_by_start_export, 0);
452
+
453
+ rb_fairy_processor_def_export(rb_cFairyPXGroupBy);
454
+
455
+ PPostFilter = rb_const_get(rb_cFairyPGroupBy, rb_intern("PPostFilter"));
456
+
457
+ xpf = rb_define_class_under(rb_cFairyPXGroupBy,
458
+ "PPostFilter", PPostFilter);
459
+ rb_cFairyPXGPostfilter = xpf;
460
+ rb_define_alloc_func(xpf, xpf(_alloc));
461
+ rb_define_method(xpf, "initialize", rb_xpf(_initialize), 5);
462
+ rb_define_method(xpf, "basic_each", rb_xpf(_basic_each), 0);
463
+
464
+ rb_fairy_processor_def_export(xpf);
465
+
466
+
467
+ }