fairy 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. data/LICENSE +674 -0
  2. data/Makefile +116 -0
  3. data/README +15 -0
  4. data/bin/fairy +582 -0
  5. data/bin/fairy-cat +74 -0
  6. data/bin/fairy-cp +128 -0
  7. data/bin/fairy-rm +122 -0
  8. data/bin/subcmd/controller +41 -0
  9. data/bin/subcmd/inspector +81 -0
  10. data/bin/subcmd/master +43 -0
  11. data/bin/subcmd/node +47 -0
  12. data/bin/subcmd/processor +54 -0
  13. data/doc/programming-interface.html +240 -0
  14. data/doc/programming-interface.rd +300 -0
  15. data/etc/fairy.conf.tmpl +118 -0
  16. data/ext/simple_hash/extconf.rb +4 -0
  17. data/ext/simple_hash/simple_hash.c +42 -0
  18. data/fairy.gemspec +60 -0
  19. data/lib/fairy/client/addins.rb +20 -0
  20. data/lib/fairy/client/barrier.rb +29 -0
  21. data/lib/fairy/client/basic-group-by.rb +52 -0
  22. data/lib/fairy/client/cat.rb +41 -0
  23. data/lib/fairy/client/direct-product.rb +51 -0
  24. data/lib/fairy/client/equijoin.rb +79 -0
  25. data/lib/fairy/client/exec.rb +54 -0
  26. data/lib/fairy/client/filter.rb +62 -0
  27. data/lib/fairy/client/find.rb +35 -0
  28. data/lib/fairy/client/group-by.rb +194 -0
  29. data/lib/fairy/client/here.rb +84 -0
  30. data/lib/fairy/client/inject.rb +70 -0
  31. data/lib/fairy/client/input-file.rb +53 -0
  32. data/lib/fairy/client/input-iota.rb +49 -0
  33. data/lib/fairy/client/input-local-file.rb +188 -0
  34. data/lib/fairy/client/input-varray.rb +30 -0
  35. data/lib/fairy/client/input.rb +42 -0
  36. data/lib/fairy/client/io-filter.rb +26 -0
  37. data/lib/fairy/client/junction.rb +31 -0
  38. data/lib/fairy/client/map.rb +34 -0
  39. data/lib/fairy/client/merge-group-by.rb +71 -0
  40. data/lib/fairy/client/output-file.rb +64 -0
  41. data/lib/fairy/client/output-local-file.rb +60 -0
  42. data/lib/fairy/client/output-null.rb +47 -0
  43. data/lib/fairy/client/output-varray.rb +50 -0
  44. data/lib/fairy/client/output.rb +29 -0
  45. data/lib/fairy/client/roma-put.rb +62 -0
  46. data/lib/fairy/client/roma.rb +156 -0
  47. data/lib/fairy/client/seg-join.rb +61 -0
  48. data/lib/fairy/client/seg-map.rb +78 -0
  49. data/lib/fairy/client/seg-shuffle.rb +35 -0
  50. data/lib/fairy/client/seg-split.rb +27 -0
  51. data/lib/fairy/client/seg-zip.rb +60 -0
  52. data/lib/fairy/client/select.rb +38 -0
  53. data/lib/fairy/client/sort.rb +48 -0
  54. data/lib/fairy/client/sort18.rb +56 -0
  55. data/lib/fairy/client/sort19.rb +61 -0
  56. data/lib/fairy/client/there.rb +47 -0
  57. data/lib/fairy/client/top_n_into_roma.rb +34 -0
  58. data/lib/fairy/client/wc.rb +92 -0
  59. data/lib/fairy/controller.rb +1103 -0
  60. data/lib/fairy/logger.rb +107 -0
  61. data/lib/fairy/master/addins.rb +20 -0
  62. data/lib/fairy/master/atom.rb +17 -0
  63. data/lib/fairy/master/c-barrier.rb +283 -0
  64. data/lib/fairy/master/c-basic-group-by.rb +250 -0
  65. data/lib/fairy/master/c-cat.rb +159 -0
  66. data/lib/fairy/master/c-direct-product.rb +203 -0
  67. data/lib/fairy/master/c-exec.rb +68 -0
  68. data/lib/fairy/master/c-filter.rb +422 -0
  69. data/lib/fairy/master/c-find.rb +138 -0
  70. data/lib/fairy/master/c-group-by.rb +64 -0
  71. data/lib/fairy/master/c-here.rb +80 -0
  72. data/lib/fairy/master/c-inject.rb +119 -0
  73. data/lib/fairy/master/c-input-file.rb +46 -0
  74. data/lib/fairy/master/c-input-iota.rb +66 -0
  75. data/lib/fairy/master/c-input-local-file.rb +117 -0
  76. data/lib/fairy/master/c-input-varray.rb +53 -0
  77. data/lib/fairy/master/c-input.rb +24 -0
  78. data/lib/fairy/master/c-inputtable.rb +31 -0
  79. data/lib/fairy/master/c-inputtable18.rb +36 -0
  80. data/lib/fairy/master/c-inputtable19.rb +35 -0
  81. data/lib/fairy/master/c-io-filter.rb +28 -0
  82. data/lib/fairy/master/c-junction.rb +54 -0
  83. data/lib/fairy/master/c-map.rb +27 -0
  84. data/lib/fairy/master/c-merge-group-by.rb +241 -0
  85. data/lib/fairy/master/c-output-file.rb +84 -0
  86. data/lib/fairy/master/c-output-local-file.rb +19 -0
  87. data/lib/fairy/master/c-output-null.rb +45 -0
  88. data/lib/fairy/master/c-output-varray.rb +57 -0
  89. data/lib/fairy/master/c-output.rb +20 -0
  90. data/lib/fairy/master/c-seg-join.rb +141 -0
  91. data/lib/fairy/master/c-seg-map.rb +26 -0
  92. data/lib/fairy/master/c-seg-shuffle.rb +87 -0
  93. data/lib/fairy/master/c-seg-split.rb +110 -0
  94. data/lib/fairy/master/c-seg-zip.rb +132 -0
  95. data/lib/fairy/master/c-select.rb +27 -0
  96. data/lib/fairy/master/c-sort.rb +108 -0
  97. data/lib/fairy/master/c-there.rb +57 -0
  98. data/lib/fairy/master/c-wc.rb +232 -0
  99. data/lib/fairy/master/job-interpriter.rb +19 -0
  100. data/lib/fairy/master/scheduler.rb +24 -0
  101. data/lib/fairy/master.rb +329 -0
  102. data/lib/fairy/node/addins.rb +19 -0
  103. data/lib/fairy/node/p-barrier.rb +95 -0
  104. data/lib/fairy/node/p-basic-group-by.rb +252 -0
  105. data/lib/fairy/node/p-direct-product.rb +153 -0
  106. data/lib/fairy/node/p-exec.rb +30 -0
  107. data/lib/fairy/node/p-filter.rb +363 -0
  108. data/lib/fairy/node/p-find.rb +111 -0
  109. data/lib/fairy/node/p-group-by.rb +1534 -0
  110. data/lib/fairy/node/p-here.rb +21 -0
  111. data/lib/fairy/node/p-identity.rb +24 -0
  112. data/lib/fairy/node/p-inject.rb +127 -0
  113. data/lib/fairy/node/p-input-file.rb +108 -0
  114. data/lib/fairy/node/p-input-iota.rb +39 -0
  115. data/lib/fairy/node/p-input-local-file.rb +61 -0
  116. data/lib/fairy/node/p-input-varray.rb +26 -0
  117. data/lib/fairy/node/p-io-filter.rb +28 -0
  118. data/lib/fairy/node/p-map.rb +40 -0
  119. data/lib/fairy/node/p-merger-group-by.rb +48 -0
  120. data/lib/fairy/node/p-output-file.rb +104 -0
  121. data/lib/fairy/node/p-output-local-file.rb +14 -0
  122. data/lib/fairy/node/p-output-null.rb +32 -0
  123. data/lib/fairy/node/p-output-varray.rb +41 -0
  124. data/lib/fairy/node/p-seg-join.rb +82 -0
  125. data/lib/fairy/node/p-seg-map.rb +34 -0
  126. data/lib/fairy/node/p-seg-split.rb +61 -0
  127. data/lib/fairy/node/p-seg-zip.rb +79 -0
  128. data/lib/fairy/node/p-select.rb +40 -0
  129. data/lib/fairy/node/p-single-exportable.rb +90 -0
  130. data/lib/fairy/node/p-sort.rb +195 -0
  131. data/lib/fairy/node/p-task.rb +113 -0
  132. data/lib/fairy/node/p-there.rb +44 -0
  133. data/lib/fairy/node/p-wc.rb +266 -0
  134. data/lib/fairy/node.rb +187 -0
  135. data/lib/fairy/processor.rb +510 -0
  136. data/lib/fairy/share/base-app.rb +114 -0
  137. data/lib/fairy/share/block-source.rb +234 -0
  138. data/lib/fairy/share/conf.rb +396 -0
  139. data/lib/fairy/share/debug.rb +21 -0
  140. data/lib/fairy/share/encoding.rb +17 -0
  141. data/lib/fairy/share/fast-tempfile.rb +93 -0
  142. data/lib/fairy/share/file-place.rb +176 -0
  143. data/lib/fairy/share/hash-1.rb +20 -0
  144. data/lib/fairy/share/hash-md5.rb +28 -0
  145. data/lib/fairy/share/hash-murmur.rb +69 -0
  146. data/lib/fairy/share/hash-rb18.rb +20 -0
  147. data/lib/fairy/share/hash-simple-hash.rb +28 -0
  148. data/lib/fairy/share/inspector.rb +16 -0
  149. data/lib/fairy/share/lc/exceptions.rb +82 -0
  150. data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
  151. data/lib/fairy/share/locale.rb +17 -0
  152. data/lib/fairy/share/log.rb +215 -0
  153. data/lib/fairy/share/pool-dictionary.rb +53 -0
  154. data/lib/fairy/share/port-marshaled-queue.rb +347 -0
  155. data/lib/fairy/share/port.rb +1697 -0
  156. data/lib/fairy/share/reference.rb +45 -0
  157. data/lib/fairy/share/stdout.rb +56 -0
  158. data/lib/fairy/share/tr.rb +16 -0
  159. data/lib/fairy/share/varray.rb +147 -0
  160. data/lib/fairy/share/vfile.rb +183 -0
  161. data/lib/fairy/version.rb +8 -0
  162. data/lib/fairy.rb +206 -0
  163. data/sample/grep.rb +46 -0
  164. data/sample/ping.rb +19 -0
  165. data/sample/sort.rb +102 -0
  166. data/sample/wordcount.rb +61 -0
  167. data/spec/README +12 -0
  168. data/spec/fairy1_spec.rb +31 -0
  169. data/spec/fairy2_spec.rb +42 -0
  170. data/spec/fairy3_spec.rb +126 -0
  171. data/spec/fairy4_spec.rb +63 -0
  172. data/spec/fairy5_spec.rb +45 -0
  173. data/spec/fairy6_spec.rb +52 -0
  174. data/spec/fairy7_spec.rb +58 -0
  175. data/spec/fairy8_spec.rb +48 -0
  176. data/spec/mkdat.rb +148 -0
  177. data/spec/run_all.sh +65 -0
  178. data/test/testc.rb +7111 -0
  179. data/tools/cap_recipe/Capfile +144 -0
  180. data/tools/cap_recipe/cluster.yml.sample +14 -0
  181. data/tools/fairy_perf_graph.rb +444 -0
  182. data/tools/git-tag +44 -0
  183. data/tools/log-analysis.rb +62 -0
  184. data/tools/svn-ls-diff +38 -0
  185. data/tools/svn-tags +37 -0
  186. metadata +298 -0
@@ -0,0 +1,118 @@
1
+ # -*- ruby -*-
2
+ # encoding: UTF-8
3
+
4
+ module Fairy
5
+
6
+ ## Double sharp comment is a system default value.
7
+
8
+ ##CONF.RUBY_BIN = ENV["FAIRY_RUBY"] || "ruby"
9
+
10
+ ##CONF.MASTER_HOST = #NO SYSTEM DEFAULT VALUE
11
+ CONF.MASTER_HOST = NO SYSTEM DEFAULT VALUE
12
+
13
+ ##CONF.MASTER_PORT = "19999"
14
+
15
+ ##CONF.HOME = ENV["FAIRY_HOME"]
16
+ ##CONF.BIN = CONF.HOME+"/bin"
17
+ ##CONF.LIB = CONF.HOME+"/lib"
18
+ ##CONF.SUBCMD = CONF.BIN+"/subcmd"
19
+ ##CONF.CONTROLLER_BIN = CONF.SUBCMD+"/controller"
20
+ ##CONF.PROCESSOR_BIN = CONF.SUBCMD+"/processor"
21
+
22
+ ##CONF.DEFAULT_EXTERNAL = nil
23
+ ##CONF.DEFAULT_INTERNAL = nil
24
+
25
+ ##CONF.IGNORE_EXCEPTION_ON_FILTER = false
26
+
27
+ ##CONF.MASTER_MAX_ACTIVE_PROCESSORS = 4
28
+ ##CONF.CONTROLLER_INPUT_PROCESSOR_N = 10
29
+
30
+ ## 生成されるプロセッサの上限. nilなら無制限.
31
+ ##CONF.CONTROLLER_MAX_ACTIVE_TASKS_IN_PROCESSOR = 4
32
+
33
+ ##CONF.CONTROLLER_ASSIGN_NEW_PROCESSOR_N_FACTOR = 1
34
+
35
+ ## CONF.POSTMAPPING_POLICY = nil
36
+
37
+ ##CONF.PREQUEUING_POLICY = {:queuing_class => :FileMarshaledQueue}
38
+ ##CONF.POSTQUEUING_POLICY = {:queuing_class => :FileMarshaledQueue}
39
+
40
+ ##CONF.POSTQUEUE_MAX_TRANSFER_SIZE = 100000
41
+ ##CONF.POOLQUEUE_POOL_THRESHOLD = 10000
42
+
43
+ ##CONF.ONMEMORY_SIZEDQUEUE_SIZE = 10000
44
+ ##CONF.FILEBUFFEREDQUEUE_THRESHOLD = 10000/2
45
+
46
+ ##CONF.MARSHAL_QUEUE_CHUNK_SIZE = DEFAULT_CONF.POOLQUEUE_POOL_THRESHOLD
47
+ ##CONF.MARSHAL_QUEUE_MIN_CHUNK_NO = DEFAULT_CONF.POOLQUEUE_POOL_THRESHOLD
48
+ ##CONF.SIZEDMARSHAL_QUEUE_MAX_CHUNK_NO = 10
49
+
50
+ ##CONF.SORTEDQUEUE_POOL_THRESHOLD = CONF.POOLQUEUE_POOL_THRESHOLD
51
+ ##CONF.SORTEDQUEUE_THRESHOLD = 10000/2
52
+ ##CONF.SORTEDQUEUE_SORTBY = %{|v| v}
53
+
54
+ ##CONF.INPUT_LOCAL_FILE_BUFFER_SIZE = 1024*1024
55
+ ##CONF.HERE_POOL_THRESHOLD = 32000
56
+
57
+ ##CONF.GROUP_BY_NO_SEGMENT = 4
58
+
59
+ ##CONF.GROUP_BY_HASH_MODULE = "fairy/share/hash-md5"
60
+ ##CONF.GROUP_BY_GROUPING_OPTIMIZE = false
61
+
62
+ ##CONF.GROUP_BY_BUFFERING_POLICY = {:buffering_class => :DirectMergeSortBuffer}
63
+ ##CONF.GROUP_BY_CMSB_THRESHOLD = 400_000
64
+ ##CONF.GROUP_BY_CMSB_CHUNK_SIZE = 1000
65
+
66
+ ##CONF.BARRIER_MEMORY_BUFFERING_POLICY = {:queuing_class => :PoolQueue}
67
+ ##CONF.SORT_BUFFERING_POLICY = {:buffering_class => "PGroupBy::DirectMergeSortBuffer"}
68
+ ##CONF.SORT_SAMPLING_MIN = 100
69
+ ##CONF.SORT_SAMPLING_MAX = 10000
70
+ ##CONF.SORT_SAMPLING_RATIO_1_TO = 100
71
+ ##CONF.SORT_NO_SEGMENT = CONF.GROUP_BY_NO_SEGMENT
72
+ ##CONF.SORT_CMP_OPTIMIZE = false
73
+
74
+ ##CONF.IOTA_SPLIT_NO = 4
75
+
76
+ ##CONF.TRANSFAR_MARSHAL_STRING_ARRAY_OPTIMIZE = false
77
+
78
+ ##CONF.VF_ROOT = CONF.HOME+"/Repos"
79
+ ##CONF.VF_PREFIX = `hostname`.chomp
80
+ #- CONF.VF_PREFIX use client setting.
81
+ # (ja_JP.utf-8: CONF.VF_PREFIXはクライアントのものが使われる)
82
+
83
+ ##CONF.VF_SPLIT_SIZE = 64*1024*1024
84
+
85
+ ##CONF.TMP_DIR = "/tmp/fairy/tmpbuf"
86
+
87
+ ##CONF.LOG_FILE = "/tmp/fairy/log"
88
+ ##CONF.LOG_FLUSH_INTERVAL = 1
89
+ ##CONF.LOG_MARK_INTERVAL = 300
90
+ ##CONF.LOG_LEVEL = :INFO
91
+ ##CONF.LOG_IMPORT_NTIMES_POP = 100000
92
+ ##CONF.LOG_LOCAL_OUTPUT_DEV = :$stderr
93
+ ##CONF.LOG_ROTATE_INTERVAL = 60*60*24
94
+ ##CONF.LOG_ROTATE_N = 7
95
+
96
+ ##CONF.SUBCMD_EXEC_TIMEOUT = 60
97
+
98
+ ## CONF.PROCESSOR_MON_ON = false
99
+ ## CONF.PROCESSOR_MON_INTERVAL = 300
100
+ ## CONF.PROCESSOR_MON_PSFORMAT = "stat,vsz,rss,sz,pmem,pcpu,nlwp,time,wchan"
101
+ ## CONF.PROCESSOR_MON_OBJECTSPACE_INSPECT_ON = false
102
+
103
+ ##CONF.SOCK_DO_NOT_REVERSE_LOOKUP = true
104
+ ##CONF.USE_RESOLV_REPLACE = false
105
+
106
+ ##CONF.BLOCK_USE_STDOUT = true
107
+
108
+ ##CONF.DEBUG_PORT_WAIT = false
109
+ ##CONF.DEBUG_FULL_BACKTRACE = false
110
+ ##CONF.DEBUG_THREAD_ABORT_ON_EXCEPTION = false
111
+ ##CONF.DEBUG_MONITOR_ON = false
112
+ ##CONF.DEBUG_PROCESSOR_TRACE_ON = false
113
+ ##CONF.DEBUG_BUG49 = false
114
+
115
+ ##CONF.BUG234 = false
116
+
117
+ ##CONF.PROCESS_LIFE_MANAGE_INTERVAL = nil
118
+ end
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile("simple_hash")
4
+
@@ -0,0 +1,42 @@
1
+ /*
2
+ * Copyright (C) 2007-2010 Rakuten, Inc.
3
+ */
4
+
5
+ #include <ruby.h>
6
+
7
+ #define MULTIPLIER 137
8
+
9
+ static VALUE simple_hash(VALUE self, VALUE vstr);
10
+
11
+
12
+ static VALUE mFairy;
13
+ static VALUE mSimpleHash;
14
+
15
+
16
+ static VALUE simple_hash(VALUE self, VALUE vstr) {
17
+ VALUE vh;
18
+ char *str;
19
+ int len;
20
+ char *p;
21
+ unsigned int h = 0;
22
+
23
+ str = StringValuePtr(vstr);
24
+ len = RSTRING_LEN(vstr);
25
+
26
+ for (p = str; p - str < len; p++) {
27
+ h = h * MULTIPLIER + *p;
28
+ }
29
+
30
+ /* vh = UINT2NUM(h); */
31
+ vh = INT2FIX(h);
32
+ return vh;
33
+ }
34
+
35
+ void Init_simple_hash(void) {
36
+ mFairy = rb_define_module("Fairy");
37
+ mSimpleHash = rb_define_module_under(mFairy, "SimpleHash");
38
+
39
+ rb_define_module_function(mSimpleHash, "hash", simple_hash, 1);
40
+ }
41
+
42
+
data/fairy.gemspec ADDED
@@ -0,0 +1,60 @@
1
+
2
+ require "rubygems"
3
+
4
+ v = `ruby -Ilib -e 'require "fairy/version"; print Fairy::Version'`
5
+ v, p = v.scan(/^([0-9]+\.[0-9]+\.[0-9]+)-([0-9]+)/).first
6
+ if p.to_i > 1
7
+ v += "."+p
8
+ end
9
+
10
+ Gem::Specification.new do |s|
11
+ s.name = "fairy"
12
+ s.authors = "Rakuten, Inc."
13
+ s.email = "hajime.masuda@mail.rakuten.co.jp"
14
+ s.platform = Gem::Platform::RUBY
15
+ s.summary = "fairy is a framework for distributed processing in Ruby, originally designed at Rakuten Institute of Technology with Yukihiro Matsumoto, the founder of Ruby."
16
+ s.rubyforge_project = s.name
17
+ s.homepage = "http://code.google.com/p/fairy-prj/"
18
+ s.version = v
19
+ s.require_path = "lib"
20
+ s.test_file = "spec/run_all.sh"
21
+ s.executables = ["fairy", "fairy-cat", "fairy-cp", "fairy-rm"]
22
+ s.default_executable = "fairy"
23
+
24
+ s.files = ["Makefile", "README", "LICENSE", "fairy.gemspec", "lib/fairy.rb" ]
25
+ s.files.concat Dir.glob("lib/fairy/**/*.rb")
26
+ s.files.concat Dir.glob("lib/fairy/template/*.tmpl")
27
+ s.files.concat ["etc/fairy.conf.tmpl"]
28
+ s.files.concat Dir.glob("bin/{#{s.executables.grep(/.*[a-z]$/).join(",")}}")
29
+ s.files.concat Dir.glob("bin/subcmd/*[A-Za-z]")
30
+ s.files.concat Dir.glob("ext/**/{Makefile,*.rb,*.c}")
31
+ s.files.concat Dir.glob("doc/*.{rd,html}")
32
+ s.files.concat Dir.glob("spec/{README,*.rb,run_all.sh}")
33
+ s.files.concat Dir.glob("sample/*.rb")
34
+ s.files.concat ["test/testc.rb"]
35
+ s.files.concat Dir.glob("tools/**/*[a-z]")
36
+
37
+ s.add_dependency("DeepConnect", ">= 0.4.06")
38
+ s.add_dependency("fiber-mon", ">= 0.1.0")
39
+
40
+ s.description = <<EOF
41
+ fairy is a framework for distributed processing in Ruby, originally
42
+ designed at Rakuten Institute of Technology with Yukihiro Matsumoto,
43
+ the founder of Ruby.
44
+
45
+ Although fairy was inspired by MapReduce model, a well-known
46
+ programming model for distributed processing, it's more flexible and
47
+ suitable for wider use. That's due to fairy's programming model,
48
+ called filter IF, and various built-in filters.
49
+
50
+ fairy is implemented in Ruby and inherits its high productivity and
51
+ simplicity. fairy's API is quite similar to Ruby. Therefore most
52
+ programmers who know Ruby can easily understand and use it.
53
+ EOF
54
+ end
55
+
56
+ # Editor settings
57
+ # - Emacs -
58
+ # local variables:
59
+ # mode: Ruby
60
+ # end:
@@ -0,0 +1,20 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ job_dir = File.dirname(__FILE__)
7
+ subdir = File.basename(File.dirname(job_dir))
8
+ job_name = File.basename(job_dir)
9
+ for job in Dir.glob("#{job_dir}/*.rb")
10
+ base = File.basename(job)
11
+ case base
12
+ when /18.rb$/
13
+ next if RUBY_VERSION >= "1.9.0"
14
+ when /19.rb$/
15
+ next unless RUBY_VERSION >= "1.9.0"
16
+ end
17
+ require [subdir, job_name, base].join("/")
18
+ end
19
+
20
+ Fairy::post_initialize
@@ -0,0 +1,29 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ require "fairy/share/block-source"
9
+
10
+ module Fairy
11
+ module Interface
12
+ def barrier(opts = nil)
13
+ if opts[:cond].kind_of?(String)
14
+ opts[:cond] = BlockSource.new(opts[:cond])
15
+ end
16
+ barrier = Barrier.new(@fairy, opts)
17
+ barrier.input = self
18
+ barrier
19
+ end
20
+ end
21
+ Fairy::def_filter_interface Interface
22
+
23
+
24
+ class Barrier<IOFilter
25
+ def backend_class_name
26
+ "CBarrier"
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,52 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class BasicGroupBy<IOFilter
10
+
11
+ module Interface
12
+ def basic_group_by(hash_block, opts = nil)
13
+ hash_block = BlockSource.new(hash_block)
14
+ group_by = BasicGroupBy.new(@fairy, opts, hash_block)
15
+ group_by.input = self
16
+ group_by
17
+ end
18
+ end
19
+ Fairy::def_filter_interface Interface
20
+
21
+ def initialize(fairy, opts, block_source)
22
+ super
23
+ @block_source = block_source
24
+ end
25
+
26
+ def backend_class_name
27
+ "CBasicGroupBy"
28
+ end
29
+ end
30
+
31
+ class BasicMGroupBy<IOFilter
32
+
33
+ module Interface
34
+ def basic_mgroup_by(hash_block, opts = nil)
35
+ hash_block = BlockSource.new(hash_block)
36
+ mgroup_by = BasicMGroupBy.new(@fairy, opts, hash_block)
37
+ mgroup_by.input = self
38
+ mgroup_by
39
+ end
40
+ end
41
+ Fairy::def_filter_interface Interface
42
+
43
+ def initialize(fairy, opts, block_source)
44
+ super
45
+ @block_source = block_source
46
+ end
47
+
48
+ def backend_class_name
49
+ "CBasicMGroupBy"
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,41 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class Cat<IOFilter
10
+
11
+ module Interface
12
+ # jpb.cat(opts,...,filter,...,opts,...)
13
+ def cat(*others)
14
+ others, opts = others.partition{|e| e.kind_of?(Filter)}
15
+ if opts.last.kind_of?(Hash)
16
+ h = opts.pop
17
+ else
18
+ h = {}
19
+ end
20
+ opts.each{|e| h[e] = true}
21
+
22
+ cat = Cat.new(@fairy, h, others)
23
+ cat.input = self
24
+ cat
25
+ end
26
+ end
27
+ Fairy::def_filter_interface Interface
28
+
29
+ def initialize(fairy, opts, others)
30
+ super(fairy, opts, others.collect{|o| o.backend})
31
+ @others = others
32
+ @block_source
33
+ @opts = opts
34
+ end
35
+
36
+ def backend_class_name
37
+ "CCat"
38
+ end
39
+ end
40
+ end
41
+
@@ -0,0 +1,51 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/filter"
7
+
8
+ module Fairy
9
+ class DirectProduct<IOFilter
10
+
11
+ module Interface
12
+ # jpb.direct_product(opts,...,filter,...,block_source, opts,...)
13
+ def direct_product(*others)
14
+ block_source = nil
15
+ if others.last.kind_of?(String)
16
+ block_source = others.pop
17
+ end
18
+ others, opts = others.partition{|e| e.kind_of?(Filter)}
19
+ if opts.last.kind_of?(Hash)
20
+ h = opts.pop
21
+ else
22
+ h = {}
23
+ end
24
+ opts.each{|e| h[e] = true}
25
+
26
+ block_source = BlockSource.new(block_source)
27
+ dp = DirectProduct.new(@fairy, h, others, block_source)
28
+ dp.input = self
29
+ dp
30
+ end
31
+ alias product direct_product
32
+
33
+ def *(other)
34
+ direct_product(other, %{|e| e})
35
+ end
36
+ end
37
+ Fairy::def_filter_interface Interface
38
+
39
+ def initialize(fairy, opts, others, block_source)
40
+ super(fairy, opts, others.collect{|o| o.backend}, block_source)
41
+ @others = others
42
+ @block_source
43
+ @opts = opts
44
+ end
45
+
46
+ def backend_class_name
47
+ "CDirectProduct"
48
+ end
49
+ end
50
+ end
51
+
@@ -0,0 +1,79 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/basic-group-by"
7
+ require "fairy/client/seg-join"
8
+
9
+ require "fairy/client/group-by"
10
+
11
+ Fairy.def_filter(:equijoin_on_memory) do |fairy, input, other, *no|
12
+ puts no1 = no2 = no[0]
13
+ puts no2 = no[1] if no[1]
14
+
15
+ # mod = Fairy::CONF.GROUP_BY_HASH_MODULE
16
+ # require mod
17
+ # seed = Fairy::HValueGenerator.create_seed
18
+ # fairy.def_pool_variable(:HASH_SEED, seed)
19
+
20
+ main = input.basic_group_by(%{|e| @hgen.value(e[#{no1}]) % CONF.GROUP_BY_NO_SEGMENT},
21
+ :BEGIN=>%{
22
+ mod = CONF.GROUP_BY_HASH_MODULE
23
+ require mod
24
+ @hgen = Fairy::HValueGenerator.new(@Pool[:HASH_SEED])
25
+ }).barrier(:mode=>:NODE_CREATION, :cond=>:NODE_ARRIVED, :buffer=>:MEMORY)
26
+ other2 = other.basic_group_by(%{|e| @hgen.value(e[#{no2}]) % CONF.GROUP_BY_NO_SEGMENT},
27
+ :BEGIN=>%{
28
+ mod = CONF.GROUP_BY_HASH_MODULE
29
+ require mod
30
+ @hgen = Fairy::HValueGenerator.new(@Pool[:HASH_SEED])
31
+ }).barrier(:mode=>:NODE_CREATION, :cond=>:NODE_ARRIVED, :buffer=>:MEMORY)
32
+
33
+
34
+ main.seg_join(other2, %{|in0, in1, out_block|
35
+
36
+ next unless in0 && in1
37
+
38
+ ary_m = in0.to_a.group_by{|e| e[#{no1}]}
39
+ ary_o = in1.to_a.group_by{|e| e[#{no2}]}
40
+
41
+ ary_m.each do |key, values|
42
+ o_values = ary_o[key]
43
+ next unless o_values
44
+ values.each do |value|
45
+ o_values.each do |o_value|
46
+ out_block.call([value, o_value])
47
+ end
48
+ end
49
+ end
50
+ }, :by => :key)
51
+ end
52
+
53
+ Fairy.def_filter(:equijoin) do |fairy, input, other, *no|
54
+ no1 = no2 = no[0]
55
+ no2 = no[1] if no[1]
56
+
57
+ main = input.map(%{|e| [e[#{no1}], 0, e]})
58
+ other = other.map(%{|e| [e[#{no2}], 1, e]})
59
+
60
+ main.cat(other).group_by(%{|e| e[0]}).mapf(%{|values|
61
+ parted = values.group_by{|value| value[1]}
62
+ if parted[0] && parted[1]
63
+ parted[0].collect{|e| e[2]}.product(parted[1].collect{|e| e[2]})
64
+ else
65
+ []
66
+ end
67
+ })
68
+
69
+ # main.cat(other).mod_group_by(%{|e| e[0]}).emap(%{|key, values|
70
+ # puts "XXXX: \#{key.inspect}"
71
+ # puts "XXXS: \#{values.inspect}"
72
+
73
+ # parted = values.group_by{|value| value[1]}
74
+ # parted[0].product(parted[1])
75
+ # })
76
+ end
77
+
78
+
79
+