fairy 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. data/LICENSE +674 -0
  2. data/Makefile +116 -0
  3. data/README +15 -0
  4. data/bin/fairy +582 -0
  5. data/bin/fairy-cat +74 -0
  6. data/bin/fairy-cp +128 -0
  7. data/bin/fairy-rm +122 -0
  8. data/bin/subcmd/controller +41 -0
  9. data/bin/subcmd/inspector +81 -0
  10. data/bin/subcmd/master +43 -0
  11. data/bin/subcmd/node +47 -0
  12. data/bin/subcmd/processor +54 -0
  13. data/doc/programming-interface.html +240 -0
  14. data/doc/programming-interface.rd +300 -0
  15. data/etc/fairy.conf.tmpl +118 -0
  16. data/ext/simple_hash/extconf.rb +4 -0
  17. data/ext/simple_hash/simple_hash.c +42 -0
  18. data/fairy.gemspec +60 -0
  19. data/lib/fairy/client/addins.rb +20 -0
  20. data/lib/fairy/client/barrier.rb +29 -0
  21. data/lib/fairy/client/basic-group-by.rb +52 -0
  22. data/lib/fairy/client/cat.rb +41 -0
  23. data/lib/fairy/client/direct-product.rb +51 -0
  24. data/lib/fairy/client/equijoin.rb +79 -0
  25. data/lib/fairy/client/exec.rb +54 -0
  26. data/lib/fairy/client/filter.rb +62 -0
  27. data/lib/fairy/client/find.rb +35 -0
  28. data/lib/fairy/client/group-by.rb +194 -0
  29. data/lib/fairy/client/here.rb +84 -0
  30. data/lib/fairy/client/inject.rb +70 -0
  31. data/lib/fairy/client/input-file.rb +53 -0
  32. data/lib/fairy/client/input-iota.rb +49 -0
  33. data/lib/fairy/client/input-local-file.rb +188 -0
  34. data/lib/fairy/client/input-varray.rb +30 -0
  35. data/lib/fairy/client/input.rb +42 -0
  36. data/lib/fairy/client/io-filter.rb +26 -0
  37. data/lib/fairy/client/junction.rb +31 -0
  38. data/lib/fairy/client/map.rb +34 -0
  39. data/lib/fairy/client/merge-group-by.rb +71 -0
  40. data/lib/fairy/client/output-file.rb +64 -0
  41. data/lib/fairy/client/output-local-file.rb +60 -0
  42. data/lib/fairy/client/output-null.rb +47 -0
  43. data/lib/fairy/client/output-varray.rb +50 -0
  44. data/lib/fairy/client/output.rb +29 -0
  45. data/lib/fairy/client/roma-put.rb +62 -0
  46. data/lib/fairy/client/roma.rb +156 -0
  47. data/lib/fairy/client/seg-join.rb +61 -0
  48. data/lib/fairy/client/seg-map.rb +78 -0
  49. data/lib/fairy/client/seg-shuffle.rb +35 -0
  50. data/lib/fairy/client/seg-split.rb +27 -0
  51. data/lib/fairy/client/seg-zip.rb +60 -0
  52. data/lib/fairy/client/select.rb +38 -0
  53. data/lib/fairy/client/sort.rb +48 -0
  54. data/lib/fairy/client/sort18.rb +56 -0
  55. data/lib/fairy/client/sort19.rb +61 -0
  56. data/lib/fairy/client/there.rb +47 -0
  57. data/lib/fairy/client/top_n_into_roma.rb +34 -0
  58. data/lib/fairy/client/wc.rb +92 -0
  59. data/lib/fairy/controller.rb +1103 -0
  60. data/lib/fairy/logger.rb +107 -0
  61. data/lib/fairy/master/addins.rb +20 -0
  62. data/lib/fairy/master/atom.rb +17 -0
  63. data/lib/fairy/master/c-barrier.rb +283 -0
  64. data/lib/fairy/master/c-basic-group-by.rb +250 -0
  65. data/lib/fairy/master/c-cat.rb +159 -0
  66. data/lib/fairy/master/c-direct-product.rb +203 -0
  67. data/lib/fairy/master/c-exec.rb +68 -0
  68. data/lib/fairy/master/c-filter.rb +422 -0
  69. data/lib/fairy/master/c-find.rb +138 -0
  70. data/lib/fairy/master/c-group-by.rb +64 -0
  71. data/lib/fairy/master/c-here.rb +80 -0
  72. data/lib/fairy/master/c-inject.rb +119 -0
  73. data/lib/fairy/master/c-input-file.rb +46 -0
  74. data/lib/fairy/master/c-input-iota.rb +66 -0
  75. data/lib/fairy/master/c-input-local-file.rb +117 -0
  76. data/lib/fairy/master/c-input-varray.rb +53 -0
  77. data/lib/fairy/master/c-input.rb +24 -0
  78. data/lib/fairy/master/c-inputtable.rb +31 -0
  79. data/lib/fairy/master/c-inputtable18.rb +36 -0
  80. data/lib/fairy/master/c-inputtable19.rb +35 -0
  81. data/lib/fairy/master/c-io-filter.rb +28 -0
  82. data/lib/fairy/master/c-junction.rb +54 -0
  83. data/lib/fairy/master/c-map.rb +27 -0
  84. data/lib/fairy/master/c-merge-group-by.rb +241 -0
  85. data/lib/fairy/master/c-output-file.rb +84 -0
  86. data/lib/fairy/master/c-output-local-file.rb +19 -0
  87. data/lib/fairy/master/c-output-null.rb +45 -0
  88. data/lib/fairy/master/c-output-varray.rb +57 -0
  89. data/lib/fairy/master/c-output.rb +20 -0
  90. data/lib/fairy/master/c-seg-join.rb +141 -0
  91. data/lib/fairy/master/c-seg-map.rb +26 -0
  92. data/lib/fairy/master/c-seg-shuffle.rb +87 -0
  93. data/lib/fairy/master/c-seg-split.rb +110 -0
  94. data/lib/fairy/master/c-seg-zip.rb +132 -0
  95. data/lib/fairy/master/c-select.rb +27 -0
  96. data/lib/fairy/master/c-sort.rb +108 -0
  97. data/lib/fairy/master/c-there.rb +57 -0
  98. data/lib/fairy/master/c-wc.rb +232 -0
  99. data/lib/fairy/master/job-interpriter.rb +19 -0
  100. data/lib/fairy/master/scheduler.rb +24 -0
  101. data/lib/fairy/master.rb +329 -0
  102. data/lib/fairy/node/addins.rb +19 -0
  103. data/lib/fairy/node/p-barrier.rb +95 -0
  104. data/lib/fairy/node/p-basic-group-by.rb +252 -0
  105. data/lib/fairy/node/p-direct-product.rb +153 -0
  106. data/lib/fairy/node/p-exec.rb +30 -0
  107. data/lib/fairy/node/p-filter.rb +363 -0
  108. data/lib/fairy/node/p-find.rb +111 -0
  109. data/lib/fairy/node/p-group-by.rb +1534 -0
  110. data/lib/fairy/node/p-here.rb +21 -0
  111. data/lib/fairy/node/p-identity.rb +24 -0
  112. data/lib/fairy/node/p-inject.rb +127 -0
  113. data/lib/fairy/node/p-input-file.rb +108 -0
  114. data/lib/fairy/node/p-input-iota.rb +39 -0
  115. data/lib/fairy/node/p-input-local-file.rb +61 -0
  116. data/lib/fairy/node/p-input-varray.rb +26 -0
  117. data/lib/fairy/node/p-io-filter.rb +28 -0
  118. data/lib/fairy/node/p-map.rb +40 -0
  119. data/lib/fairy/node/p-merger-group-by.rb +48 -0
  120. data/lib/fairy/node/p-output-file.rb +104 -0
  121. data/lib/fairy/node/p-output-local-file.rb +14 -0
  122. data/lib/fairy/node/p-output-null.rb +32 -0
  123. data/lib/fairy/node/p-output-varray.rb +41 -0
  124. data/lib/fairy/node/p-seg-join.rb +82 -0
  125. data/lib/fairy/node/p-seg-map.rb +34 -0
  126. data/lib/fairy/node/p-seg-split.rb +61 -0
  127. data/lib/fairy/node/p-seg-zip.rb +79 -0
  128. data/lib/fairy/node/p-select.rb +40 -0
  129. data/lib/fairy/node/p-single-exportable.rb +90 -0
  130. data/lib/fairy/node/p-sort.rb +195 -0
  131. data/lib/fairy/node/p-task.rb +113 -0
  132. data/lib/fairy/node/p-there.rb +44 -0
  133. data/lib/fairy/node/p-wc.rb +266 -0
  134. data/lib/fairy/node.rb +187 -0
  135. data/lib/fairy/processor.rb +510 -0
  136. data/lib/fairy/share/base-app.rb +114 -0
  137. data/lib/fairy/share/block-source.rb +234 -0
  138. data/lib/fairy/share/conf.rb +396 -0
  139. data/lib/fairy/share/debug.rb +21 -0
  140. data/lib/fairy/share/encoding.rb +17 -0
  141. data/lib/fairy/share/fast-tempfile.rb +93 -0
  142. data/lib/fairy/share/file-place.rb +176 -0
  143. data/lib/fairy/share/hash-1.rb +20 -0
  144. data/lib/fairy/share/hash-md5.rb +28 -0
  145. data/lib/fairy/share/hash-murmur.rb +69 -0
  146. data/lib/fairy/share/hash-rb18.rb +20 -0
  147. data/lib/fairy/share/hash-simple-hash.rb +28 -0
  148. data/lib/fairy/share/inspector.rb +16 -0
  149. data/lib/fairy/share/lc/exceptions.rb +82 -0
  150. data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
  151. data/lib/fairy/share/locale.rb +17 -0
  152. data/lib/fairy/share/log.rb +215 -0
  153. data/lib/fairy/share/pool-dictionary.rb +53 -0
  154. data/lib/fairy/share/port-marshaled-queue.rb +347 -0
  155. data/lib/fairy/share/port.rb +1697 -0
  156. data/lib/fairy/share/reference.rb +45 -0
  157. data/lib/fairy/share/stdout.rb +56 -0
  158. data/lib/fairy/share/tr.rb +16 -0
  159. data/lib/fairy/share/varray.rb +147 -0
  160. data/lib/fairy/share/vfile.rb +183 -0
  161. data/lib/fairy/version.rb +8 -0
  162. data/lib/fairy.rb +206 -0
  163. data/sample/grep.rb +46 -0
  164. data/sample/ping.rb +19 -0
  165. data/sample/sort.rb +102 -0
  166. data/sample/wordcount.rb +61 -0
  167. data/spec/README +12 -0
  168. data/spec/fairy1_spec.rb +31 -0
  169. data/spec/fairy2_spec.rb +42 -0
  170. data/spec/fairy3_spec.rb +126 -0
  171. data/spec/fairy4_spec.rb +63 -0
  172. data/spec/fairy5_spec.rb +45 -0
  173. data/spec/fairy6_spec.rb +52 -0
  174. data/spec/fairy7_spec.rb +58 -0
  175. data/spec/fairy8_spec.rb +48 -0
  176. data/spec/mkdat.rb +148 -0
  177. data/spec/run_all.sh +65 -0
  178. data/test/testc.rb +7111 -0
  179. data/tools/cap_recipe/Capfile +144 -0
  180. data/tools/cap_recipe/cluster.yml.sample +14 -0
  181. data/tools/fairy_perf_graph.rb +444 -0
  182. data/tools/git-tag +44 -0
  183. data/tools/log-analysis.rb +62 -0
  184. data/tools/svn-ls-diff +38 -0
  185. data/tools/svn-tags +37 -0
  186. metadata +298 -0
@@ -0,0 +1,118 @@
1
+ # -*- ruby -*-
2
+ # encoding: UTF-8
3
+
4
+ module Fairy
5
+
6
+ ## Double sharp comment is a system default value.
7
+
8
+ ##CONF.RUBY_BIN = ENV["FAIRY_RUBY"] || "ruby"
9
+
10
+ ##CONF.MASTER_HOST = #NO SYSTEM DEFAULT VALUE
11
+ CONF.MASTER_HOST = NO SYSTEM DEFAULT VALUE
12
+
13
+ ##CONF.MASTER_PORT = "19999"
14
+
15
+ ##CONF.HOME = ENV["FAIRY_HOME"]
16
+ ##CONF.BIN = CONF.HOME+"/bin"
17
+ ##CONF.LIB = CONF.HOME+"/lib"
18
+ ##CONF.SUBCMD = CONF.BIN+"/subcmd"
19
+ ##CONF.CONTROLLER_BIN = CONF.SUBCMD+"/controller"
20
+ ##CONF.PROCESSOR_BIN = CONF.SUBCMD+"/processor"
21
+
22
+ ##CONF.DEFAULT_EXTERNAL = nil
23
+ ##CONF.DEFAULT_INTERNAL = nil
24
+
25
+ ##CONF.IGNORE_EXCEPTION_ON_FILTER = false
26
+
27
+ ##CONF.MASTER_MAX_ACTIVE_PROCESSORS = 4
28
+ ##CONF.CONTROLLER_INPUT_PROCESSOR_N = 10
29
+
30
+ ## 生成されるプロセッサの上限. nilなら無制限.
31
+ ##CONF.CONTROLLER_MAX_ACTIVE_TASKS_IN_PROCESSOR = 4
32
+
33
+ ##CONF.CONTROLLER_ASSIGN_NEW_PROCESSOR_N_FACTOR = 1
34
+
35
+ ## CONF.POSTMAPPING_POLICY = nil
36
+
37
+ ##CONF.PREQUEUING_POLICY = {:queuing_class => :FileMarshaledQueue}
38
+ ##CONF.POSTQUEUING_POLICY = {:queuing_class => :FileMarshaledQueue}
39
+
40
+ ##CONF.POSTQUEUE_MAX_TRANSFER_SIZE = 100000
41
+ ##CONF.POOLQUEUE_POOL_THRESHOLD = 10000
42
+
43
+ ##CONF.ONMEMORY_SIZEDQUEUE_SIZE = 10000
44
+ ##CONF.FILEBUFFEREDQUEUE_THRESHOLD = 10000/2
45
+
46
+ ##CONF.MARSHAL_QUEUE_CHUNK_SIZE = DEFAULT_CONF.POOLQUEUE_POOL_THRESHOLD
47
+ ##CONF.MARSHAL_QUEUE_MIN_CHUNK_NO = DEFAULT_CONF.POOLQUEUE_POOL_THRESHOLD
48
+ ##CONF.SIZEDMARSHAL_QUEUE_MAX_CHUNK_NO = 10
49
+
50
+ ##CONF.SORTEDQUEUE_POOL_THRESHOLD = CONF.POOLQUEUE_POOL_THRESHOLD
51
+ ##CONF.SORTEDQUEUE_THRESHOLD = 10000/2
52
+ ##CONF.SORTEDQUEUE_SORTBY = %{|v| v}
53
+
54
+ ##CONF.INPUT_LOCAL_FILE_BUFFER_SIZE = 1024*1024
55
+ ##CONF.HERE_POOL_THRESHOLD = 32000
56
+
57
+ ##CONF.GROUP_BY_NO_SEGMENT = 4
58
+
59
+ ##CONF.GROUP_BY_HASH_MODULE = "fairy/share/hash-md5"
60
+ ##CONF.GROUP_BY_GROUPING_OPTIMIZE = false
61
+
62
+ ##CONF.GROUP_BY_BUFFERING_POLICY = {:buffering_class => :DirectMergeSortBuffer}
63
+ ##CONF.GROUP_BY_CMSB_THRESHOLD = 400_000
64
+ ##CONF.GROUP_BY_CMSB_CHUNK_SIZE = 1000
65
+
66
+ ##CONF.BARRIER_MEMORY_BUFFERING_POLICY = {:queuing_class => :PoolQueue}
67
+ ##CONF.SORT_BUFFERING_POLICY = {:buffering_class => "PGroupBy::DirectMergeSortBuffer"}
68
+ ##CONF.SORT_SAMPLING_MIN = 100
69
+ ##CONF.SORT_SAMPLING_MAX = 10000
70
+ ##CONF.SORT_SAMPLING_RATIO_1_TO = 100
71
+ ##CONF.SORT_NO_SEGMENT = CONF.GROUP_BY_NO_SEGMENT
72
+ ##CONF.SORT_CMP_OPTIMIZE = false
73
+
74
+ ##CONF.IOTA_SPLIT_NO = 4
75
+
76
+ ##CONF.TRANSFAR_MARSHAL_STRING_ARRAY_OPTIMIZE = false
77
+
78
+ ##CONF.VF_ROOT = CONF.HOME+"/Repos"
79
+ ##CONF.VF_PREFIX = `hostname`.chomp
80
+ #- CONF.VF_PREFIX use client setting.
81
+ # (ja_JP.utf-8: CONF.VF_PREFIXはクライアントのものが使われる)
82
+
83
+ ##CONF.VF_SPLIT_SIZE = 64*1024*1024
84
+
85
+ ##CONF.TMP_DIR = "/tmp/fairy/tmpbuf"
86
+
87
+ ##CONF.LOG_FILE = "/tmp/fairy/log"
88
+ ##CONF.LOG_FLUSH_INTERVAL = 1
89
+ ##CONF.LOG_MARK_INTERVAL = 300
90
+ ##CONF.LOG_LEVEL = :INFO
91
+ ##CONF.LOG_IMPORT_NTIMES_POP = 100000
92
+ ##CONF.LOG_LOCAL_OUTPUT_DEV = :$stderr
93
+ ##CONF.LOG_ROTATE_INTERVAL = 60*60*24
94
+ ##CONF.LOG_ROTATE_N = 7
95
+
96
+ ##CONF.SUBCMD_EXEC_TIMEOUT = 60
97
+
98
+ ## CONF.PROCESSOR_MON_ON = false
99
+ ## CONF.PROCESSOR_MON_INTERVAL = 300
100
+ ## CONF.PROCESSOR_MON_PSFORMAT = "stat,vsz,rss,sz,pmem,pcpu,nlwp,time,wchan"
101
+ ## CONF.PROCESSOR_MON_OBJECTSPACE_INSPECT_ON = false
102
+
103
+ ##CONF.SOCK_DO_NOT_REVERSE_LOOKUP = true
104
+ ##CONF.USE_RESOLV_REPLACE = false
105
+
106
+ ##CONF.BLOCK_USE_STDOUT = true
107
+
108
+ ##CONF.DEBUG_PORT_WAIT = false
109
+ ##CONF.DEBUG_FULL_BACKTRACE = false
110
+ ##CONF.DEBUG_THREAD_ABORT_ON_EXCEPTION = false
111
+ ##CONF.DEBUG_MONITOR_ON = false
112
+ ##CONF.DEBUG_PROCESSOR_TRACE_ON = false
113
+ ##CONF.DEBUG_BUG49 = false
114
+
115
+ ##CONF.BUG234 = false
116
+
117
+ ##CONF.PROCESS_LIFE_MANAGE_INTERVAL = nil
118
+ end
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile("simple_hash")
4
+
@@ -0,0 +1,42 @@
1
+ /*
2
+ * Copyright (C) 2007-2010 Rakuten, Inc.
3
+ */
4
+
5
+ #include <ruby.h>
6
+
7
+ #define MULTIPLIER 137
8
+
9
+ static VALUE simple_hash(VALUE self, VALUE vstr);
10
+
11
+
12
+ static VALUE mFairy;
13
+ static VALUE mSimpleHash;
14
+
15
+
16
+ static VALUE simple_hash(VALUE self, VALUE vstr) {
17
+ VALUE vh;
18
+ char *str;
19
+ int len;
20
+ char *p;
21
+ unsigned int h = 0;
22
+
23
+ str = StringValuePtr(vstr);
24
+ len = RSTRING_LEN(vstr);
25
+
26
+ for (p = str; p - str < len; p++) {
27
+ h = h * MULTIPLIER + *p;
28
+ }
29
+
30
+ /* vh = UINT2NUM(h); */
31
+ vh = INT2FIX(h);
32
+ return vh;
33
+ }
34
+
35
+ void Init_simple_hash(void) {
36
+ mFairy = rb_define_module("Fairy");
37
+ mSimpleHash = rb_define_module_under(mFairy, "SimpleHash");
38
+
39
+ rb_define_module_function(mSimpleHash, "hash", simple_hash, 1);
40
+ }
41
+
42
+
data/fairy.gemspec ADDED
@@ -0,0 +1,60 @@
1
+
2
+ require "rubygems"
3
+
4
+ v = `ruby -Ilib -e 'require "fairy/version"; print Fairy::Version'`
5
+ v, p = v.scan(/^([0-9]+\.[0-9]+\.[0-9]+)-([0-9]+)/).first
6
+ if p.to_i > 1
7
+ v += "."+p
8
+ end
9
+
10
+ Gem::Specification.new do |s|
11
+ s.name = "fairy"
12
+ s.authors = "Rakuten, Inc."
13
+ s.email = "hajime.masuda@mail.rakuten.co.jp"
14
+ s.platform = Gem::Platform::RUBY
15
+ s.summary = "fairy is a framework for distributed processing in Ruby, originally designed at Rakuten Institute of Technology with Yukihiro Matsumoto, the founder of Ruby."
16
+ s.rubyforge_project = s.name
17
+ s.homepage = "http://code.google.com/p/fairy-prj/"
18
+ s.version = v
19
+ s.require_path = "lib"
20
+ s.test_file = "spec/run_all.sh"
21
+ s.executables = ["fairy", "fairy-cat", "fairy-cp", "fairy-rm"]
22
+ s.default_executable = "fairy"
23
+
24
+ s.files = ["Makefile", "README", "LICENSE", "fairy.gemspec", "lib/fairy.rb" ]
25
+ s.files.concat Dir.glob("lib/fairy/**/*.rb")
26
+ s.files.concat Dir.glob("lib/fairy/template/*.tmpl")
27
+ s.files.concat ["etc/fairy.conf.tmpl"]
28
+ s.files.concat Dir.glob("bin/{#{s.executables.grep(/.*[a-z]$/).join(",")}}")
29
+ s.files.concat Dir.glob("bin/subcmd/*[A-Za-z]")
30
+ s.files.concat Dir.glob("ext/**/{Makefile,*.rb,*.c}")
31
+ s.files.concat Dir.glob("doc/*.{rd,html}")
32
+ s.files.concat Dir.glob("spec/{README,*.rb,run_all.sh}")
33
+ s.files.concat Dir.glob("sample/*.rb")
34
+ s.files.concat ["test/testc.rb"]
35
+ s.files.concat Dir.glob("tools/**/*[a-z]")
36
+
37
+ s.add_dependency("DeepConnect", ">= 0.4.06")
38
+ s.add_dependency("fiber-mon", ">= 0.1.0")
39
+
40
+ s.description = <<EOF
41
+ fairy is a framework for distributed processing in Ruby, originally
42
+ designed at Rakuten Institute of Technology with Yukihiro Matsumoto,
43
+ the founder of Ruby.
44
+
45
+ Although fairy was inspired by MapReduce model, a well-known
46
+ programming model for distributed processing, it's more flexible and
47
+ suitable for wider use. That's due to fairy's programming model,
48
+ called filter IF, and various built-in filters.
49
+
50
+ fairy is implemented in Ruby and inherits its high productivity and
51
+ simplicity. fairy's API is quite similar to Ruby. Therefore most
52
+ programmers who know Ruby can easily understand and use it.
53
+ EOF
54
+ end
55
+
56
+ # Editor settings
57
+ # - Emacs -
58
+ # local variables:
59
+ # mode: Ruby
60
+ # end:
@@ -0,0 +1,20 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ job_dir = File.dirname(__FILE__)
7
+ subdir = File.basename(File.dirname(job_dir))
8
+ job_name = File.basename(job_dir)
9
+ for job in Dir.glob("#{job_dir}/*.rb")
10
+ base = File.basename(job)
11
+ case base
12
+ when /18.rb$/
13
+ next if RUBY_VERSION >= "1.9.0"
14
+ when /19.rb$/
15
+ next unless RUBY_VERSION >= "1.9.0"
16
+ end
17
+ require [subdir, job_name, base].join("/")
18
+ end
19
+
20
+ Fairy::post_initialize
@@ -0,0 +1,29 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ require "fairy/share/block-source"
9
+
10
+ module Fairy
11
+ module Interface
12
+ def barrier(opts = nil)
13
+ if opts[:cond].kind_of?(String)
14
+ opts[:cond] = BlockSource.new(opts[:cond])
15
+ end
16
+ barrier = Barrier.new(@fairy, opts)
17
+ barrier.input = self
18
+ barrier
19
+ end
20
+ end
21
+ Fairy::def_filter_interface Interface
22
+
23
+
24
+ class Barrier<IOFilter
25
+ def backend_class_name
26
+ "CBarrier"
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,52 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class BasicGroupBy<IOFilter
10
+
11
+ module Interface
12
+ def basic_group_by(hash_block, opts = nil)
13
+ hash_block = BlockSource.new(hash_block)
14
+ group_by = BasicGroupBy.new(@fairy, opts, hash_block)
15
+ group_by.input = self
16
+ group_by
17
+ end
18
+ end
19
+ Fairy::def_filter_interface Interface
20
+
21
+ def initialize(fairy, opts, block_source)
22
+ super
23
+ @block_source = block_source
24
+ end
25
+
26
+ def backend_class_name
27
+ "CBasicGroupBy"
28
+ end
29
+ end
30
+
31
+ class BasicMGroupBy<IOFilter
32
+
33
+ module Interface
34
+ def basic_mgroup_by(hash_block, opts = nil)
35
+ hash_block = BlockSource.new(hash_block)
36
+ mgroup_by = BasicMGroupBy.new(@fairy, opts, hash_block)
37
+ mgroup_by.input = self
38
+ mgroup_by
39
+ end
40
+ end
41
+ Fairy::def_filter_interface Interface
42
+
43
+ def initialize(fairy, opts, block_source)
44
+ super
45
+ @block_source = block_source
46
+ end
47
+
48
+ def backend_class_name
49
+ "CBasicMGroupBy"
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,41 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class Cat<IOFilter
10
+
11
+ module Interface
12
+ # jpb.cat(opts,...,filter,...,opts,...)
13
+ def cat(*others)
14
+ others, opts = others.partition{|e| e.kind_of?(Filter)}
15
+ if opts.last.kind_of?(Hash)
16
+ h = opts.pop
17
+ else
18
+ h = {}
19
+ end
20
+ opts.each{|e| h[e] = true}
21
+
22
+ cat = Cat.new(@fairy, h, others)
23
+ cat.input = self
24
+ cat
25
+ end
26
+ end
27
+ Fairy::def_filter_interface Interface
28
+
29
+ def initialize(fairy, opts, others)
30
+ super(fairy, opts, others.collect{|o| o.backend})
31
+ @others = others
32
+ @block_source
33
+ @opts = opts
34
+ end
35
+
36
+ def backend_class_name
37
+ "CCat"
38
+ end
39
+ end
40
+ end
41
+
@@ -0,0 +1,51 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/filter"
7
+
8
+ module Fairy
9
+ class DirectProduct<IOFilter
10
+
11
+ module Interface
12
+ # jpb.direct_product(opts,...,filter,...,block_source, opts,...)
13
+ def direct_product(*others)
14
+ block_source = nil
15
+ if others.last.kind_of?(String)
16
+ block_source = others.pop
17
+ end
18
+ others, opts = others.partition{|e| e.kind_of?(Filter)}
19
+ if opts.last.kind_of?(Hash)
20
+ h = opts.pop
21
+ else
22
+ h = {}
23
+ end
24
+ opts.each{|e| h[e] = true}
25
+
26
+ block_source = BlockSource.new(block_source)
27
+ dp = DirectProduct.new(@fairy, h, others, block_source)
28
+ dp.input = self
29
+ dp
30
+ end
31
+ alias product direct_product
32
+
33
+ def *(other)
34
+ direct_product(other, %{|e| e})
35
+ end
36
+ end
37
+ Fairy::def_filter_interface Interface
38
+
39
+ def initialize(fairy, opts, others, block_source)
40
+ super(fairy, opts, others.collect{|o| o.backend}, block_source)
41
+ @others = others
42
+ @block_source
43
+ @opts = opts
44
+ end
45
+
46
+ def backend_class_name
47
+ "CDirectProduct"
48
+ end
49
+ end
50
+ end
51
+
@@ -0,0 +1,79 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/basic-group-by"
7
+ require "fairy/client/seg-join"
8
+
9
+ require "fairy/client/group-by"
10
+
11
+ Fairy.def_filter(:equijoin_on_memory) do |fairy, input, other, *no|
12
+ puts no1 = no2 = no[0]
13
+ puts no2 = no[1] if no[1]
14
+
15
+ # mod = Fairy::CONF.GROUP_BY_HASH_MODULE
16
+ # require mod
17
+ # seed = Fairy::HValueGenerator.create_seed
18
+ # fairy.def_pool_variable(:HASH_SEED, seed)
19
+
20
+ main = input.basic_group_by(%{|e| @hgen.value(e[#{no1}]) % CONF.GROUP_BY_NO_SEGMENT},
21
+ :BEGIN=>%{
22
+ mod = CONF.GROUP_BY_HASH_MODULE
23
+ require mod
24
+ @hgen = Fairy::HValueGenerator.new(@Pool[:HASH_SEED])
25
+ }).barrier(:mode=>:NODE_CREATION, :cond=>:NODE_ARRIVED, :buffer=>:MEMORY)
26
+ other2 = other.basic_group_by(%{|e| @hgen.value(e[#{no2}]) % CONF.GROUP_BY_NO_SEGMENT},
27
+ :BEGIN=>%{
28
+ mod = CONF.GROUP_BY_HASH_MODULE
29
+ require mod
30
+ @hgen = Fairy::HValueGenerator.new(@Pool[:HASH_SEED])
31
+ }).barrier(:mode=>:NODE_CREATION, :cond=>:NODE_ARRIVED, :buffer=>:MEMORY)
32
+
33
+
34
+ main.seg_join(other2, %{|in0, in1, out_block|
35
+
36
+ next unless in0 && in1
37
+
38
+ ary_m = in0.to_a.group_by{|e| e[#{no1}]}
39
+ ary_o = in1.to_a.group_by{|e| e[#{no2}]}
40
+
41
+ ary_m.each do |key, values|
42
+ o_values = ary_o[key]
43
+ next unless o_values
44
+ values.each do |value|
45
+ o_values.each do |o_value|
46
+ out_block.call([value, o_value])
47
+ end
48
+ end
49
+ end
50
+ }, :by => :key)
51
+ end
52
+
53
+ Fairy.def_filter(:equijoin) do |fairy, input, other, *no|
54
+ no1 = no2 = no[0]
55
+ no2 = no[1] if no[1]
56
+
57
+ main = input.map(%{|e| [e[#{no1}], 0, e]})
58
+ other = other.map(%{|e| [e[#{no2}], 1, e]})
59
+
60
+ main.cat(other).group_by(%{|e| e[0]}).mapf(%{|values|
61
+ parted = values.group_by{|value| value[1]}
62
+ if parted[0] && parted[1]
63
+ parted[0].collect{|e| e[2]}.product(parted[1].collect{|e| e[2]})
64
+ else
65
+ []
66
+ end
67
+ })
68
+
69
+ # main.cat(other).mod_group_by(%{|e| e[0]}).emap(%{|key, values|
70
+ # puts "XXXX: \#{key.inspect}"
71
+ # puts "XXXS: \#{values.inspect}"
72
+
73
+ # parted = values.group_by{|value| value[1]}
74
+ # parted[0].product(parted[1])
75
+ # })
76
+ end
77
+
78
+
79
+