fairy 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +674 -0
- data/Makefile +116 -0
- data/README +15 -0
- data/bin/fairy +582 -0
- data/bin/fairy-cat +74 -0
- data/bin/fairy-cp +128 -0
- data/bin/fairy-rm +122 -0
- data/bin/subcmd/controller +41 -0
- data/bin/subcmd/inspector +81 -0
- data/bin/subcmd/master +43 -0
- data/bin/subcmd/node +47 -0
- data/bin/subcmd/processor +54 -0
- data/doc/programming-interface.html +240 -0
- data/doc/programming-interface.rd +300 -0
- data/etc/fairy.conf.tmpl +118 -0
- data/ext/simple_hash/extconf.rb +4 -0
- data/ext/simple_hash/simple_hash.c +42 -0
- data/fairy.gemspec +60 -0
- data/lib/fairy/client/addins.rb +20 -0
- data/lib/fairy/client/barrier.rb +29 -0
- data/lib/fairy/client/basic-group-by.rb +52 -0
- data/lib/fairy/client/cat.rb +41 -0
- data/lib/fairy/client/direct-product.rb +51 -0
- data/lib/fairy/client/equijoin.rb +79 -0
- data/lib/fairy/client/exec.rb +54 -0
- data/lib/fairy/client/filter.rb +62 -0
- data/lib/fairy/client/find.rb +35 -0
- data/lib/fairy/client/group-by.rb +194 -0
- data/lib/fairy/client/here.rb +84 -0
- data/lib/fairy/client/inject.rb +70 -0
- data/lib/fairy/client/input-file.rb +53 -0
- data/lib/fairy/client/input-iota.rb +49 -0
- data/lib/fairy/client/input-local-file.rb +188 -0
- data/lib/fairy/client/input-varray.rb +30 -0
- data/lib/fairy/client/input.rb +42 -0
- data/lib/fairy/client/io-filter.rb +26 -0
- data/lib/fairy/client/junction.rb +31 -0
- data/lib/fairy/client/map.rb +34 -0
- data/lib/fairy/client/merge-group-by.rb +71 -0
- data/lib/fairy/client/output-file.rb +64 -0
- data/lib/fairy/client/output-local-file.rb +60 -0
- data/lib/fairy/client/output-null.rb +47 -0
- data/lib/fairy/client/output-varray.rb +50 -0
- data/lib/fairy/client/output.rb +29 -0
- data/lib/fairy/client/roma-put.rb +62 -0
- data/lib/fairy/client/roma.rb +156 -0
- data/lib/fairy/client/seg-join.rb +61 -0
- data/lib/fairy/client/seg-map.rb +78 -0
- data/lib/fairy/client/seg-shuffle.rb +35 -0
- data/lib/fairy/client/seg-split.rb +27 -0
- data/lib/fairy/client/seg-zip.rb +60 -0
- data/lib/fairy/client/select.rb +38 -0
- data/lib/fairy/client/sort.rb +48 -0
- data/lib/fairy/client/sort18.rb +56 -0
- data/lib/fairy/client/sort19.rb +61 -0
- data/lib/fairy/client/there.rb +47 -0
- data/lib/fairy/client/top_n_into_roma.rb +34 -0
- data/lib/fairy/client/wc.rb +92 -0
- data/lib/fairy/controller.rb +1103 -0
- data/lib/fairy/logger.rb +107 -0
- data/lib/fairy/master/addins.rb +20 -0
- data/lib/fairy/master/atom.rb +17 -0
- data/lib/fairy/master/c-barrier.rb +283 -0
- data/lib/fairy/master/c-basic-group-by.rb +250 -0
- data/lib/fairy/master/c-cat.rb +159 -0
- data/lib/fairy/master/c-direct-product.rb +203 -0
- data/lib/fairy/master/c-exec.rb +68 -0
- data/lib/fairy/master/c-filter.rb +422 -0
- data/lib/fairy/master/c-find.rb +138 -0
- data/lib/fairy/master/c-group-by.rb +64 -0
- data/lib/fairy/master/c-here.rb +80 -0
- data/lib/fairy/master/c-inject.rb +119 -0
- data/lib/fairy/master/c-input-file.rb +46 -0
- data/lib/fairy/master/c-input-iota.rb +66 -0
- data/lib/fairy/master/c-input-local-file.rb +117 -0
- data/lib/fairy/master/c-input-varray.rb +53 -0
- data/lib/fairy/master/c-input.rb +24 -0
- data/lib/fairy/master/c-inputtable.rb +31 -0
- data/lib/fairy/master/c-inputtable18.rb +36 -0
- data/lib/fairy/master/c-inputtable19.rb +35 -0
- data/lib/fairy/master/c-io-filter.rb +28 -0
- data/lib/fairy/master/c-junction.rb +54 -0
- data/lib/fairy/master/c-map.rb +27 -0
- data/lib/fairy/master/c-merge-group-by.rb +241 -0
- data/lib/fairy/master/c-output-file.rb +84 -0
- data/lib/fairy/master/c-output-local-file.rb +19 -0
- data/lib/fairy/master/c-output-null.rb +45 -0
- data/lib/fairy/master/c-output-varray.rb +57 -0
- data/lib/fairy/master/c-output.rb +20 -0
- data/lib/fairy/master/c-seg-join.rb +141 -0
- data/lib/fairy/master/c-seg-map.rb +26 -0
- data/lib/fairy/master/c-seg-shuffle.rb +87 -0
- data/lib/fairy/master/c-seg-split.rb +110 -0
- data/lib/fairy/master/c-seg-zip.rb +132 -0
- data/lib/fairy/master/c-select.rb +27 -0
- data/lib/fairy/master/c-sort.rb +108 -0
- data/lib/fairy/master/c-there.rb +57 -0
- data/lib/fairy/master/c-wc.rb +232 -0
- data/lib/fairy/master/job-interpriter.rb +19 -0
- data/lib/fairy/master/scheduler.rb +24 -0
- data/lib/fairy/master.rb +329 -0
- data/lib/fairy/node/addins.rb +19 -0
- data/lib/fairy/node/p-barrier.rb +95 -0
- data/lib/fairy/node/p-basic-group-by.rb +252 -0
- data/lib/fairy/node/p-direct-product.rb +153 -0
- data/lib/fairy/node/p-exec.rb +30 -0
- data/lib/fairy/node/p-filter.rb +363 -0
- data/lib/fairy/node/p-find.rb +111 -0
- data/lib/fairy/node/p-group-by.rb +1534 -0
- data/lib/fairy/node/p-here.rb +21 -0
- data/lib/fairy/node/p-identity.rb +24 -0
- data/lib/fairy/node/p-inject.rb +127 -0
- data/lib/fairy/node/p-input-file.rb +108 -0
- data/lib/fairy/node/p-input-iota.rb +39 -0
- data/lib/fairy/node/p-input-local-file.rb +61 -0
- data/lib/fairy/node/p-input-varray.rb +26 -0
- data/lib/fairy/node/p-io-filter.rb +28 -0
- data/lib/fairy/node/p-map.rb +40 -0
- data/lib/fairy/node/p-merger-group-by.rb +48 -0
- data/lib/fairy/node/p-output-file.rb +104 -0
- data/lib/fairy/node/p-output-local-file.rb +14 -0
- data/lib/fairy/node/p-output-null.rb +32 -0
- data/lib/fairy/node/p-output-varray.rb +41 -0
- data/lib/fairy/node/p-seg-join.rb +82 -0
- data/lib/fairy/node/p-seg-map.rb +34 -0
- data/lib/fairy/node/p-seg-split.rb +61 -0
- data/lib/fairy/node/p-seg-zip.rb +79 -0
- data/lib/fairy/node/p-select.rb +40 -0
- data/lib/fairy/node/p-single-exportable.rb +90 -0
- data/lib/fairy/node/p-sort.rb +195 -0
- data/lib/fairy/node/p-task.rb +113 -0
- data/lib/fairy/node/p-there.rb +44 -0
- data/lib/fairy/node/p-wc.rb +266 -0
- data/lib/fairy/node.rb +187 -0
- data/lib/fairy/processor.rb +510 -0
- data/lib/fairy/share/base-app.rb +114 -0
- data/lib/fairy/share/block-source.rb +234 -0
- data/lib/fairy/share/conf.rb +396 -0
- data/lib/fairy/share/debug.rb +21 -0
- data/lib/fairy/share/encoding.rb +17 -0
- data/lib/fairy/share/fast-tempfile.rb +93 -0
- data/lib/fairy/share/file-place.rb +176 -0
- data/lib/fairy/share/hash-1.rb +20 -0
- data/lib/fairy/share/hash-md5.rb +28 -0
- data/lib/fairy/share/hash-murmur.rb +69 -0
- data/lib/fairy/share/hash-rb18.rb +20 -0
- data/lib/fairy/share/hash-simple-hash.rb +28 -0
- data/lib/fairy/share/inspector.rb +16 -0
- data/lib/fairy/share/lc/exceptions.rb +82 -0
- data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
- data/lib/fairy/share/locale.rb +17 -0
- data/lib/fairy/share/log.rb +215 -0
- data/lib/fairy/share/pool-dictionary.rb +53 -0
- data/lib/fairy/share/port-marshaled-queue.rb +347 -0
- data/lib/fairy/share/port.rb +1697 -0
- data/lib/fairy/share/reference.rb +45 -0
- data/lib/fairy/share/stdout.rb +56 -0
- data/lib/fairy/share/tr.rb +16 -0
- data/lib/fairy/share/varray.rb +147 -0
- data/lib/fairy/share/vfile.rb +183 -0
- data/lib/fairy/version.rb +8 -0
- data/lib/fairy.rb +206 -0
- data/sample/grep.rb +46 -0
- data/sample/ping.rb +19 -0
- data/sample/sort.rb +102 -0
- data/sample/wordcount.rb +61 -0
- data/spec/README +12 -0
- data/spec/fairy1_spec.rb +31 -0
- data/spec/fairy2_spec.rb +42 -0
- data/spec/fairy3_spec.rb +126 -0
- data/spec/fairy4_spec.rb +63 -0
- data/spec/fairy5_spec.rb +45 -0
- data/spec/fairy6_spec.rb +52 -0
- data/spec/fairy7_spec.rb +58 -0
- data/spec/fairy8_spec.rb +48 -0
- data/spec/mkdat.rb +148 -0
- data/spec/run_all.sh +65 -0
- data/test/testc.rb +7111 -0
- data/tools/cap_recipe/Capfile +144 -0
- data/tools/cap_recipe/cluster.yml.sample +14 -0
- data/tools/fairy_perf_graph.rb +444 -0
- data/tools/git-tag +44 -0
- data/tools/log-analysis.rb +62 -0
- data/tools/svn-ls-diff +38 -0
- data/tools/svn-tags +37 -0
- metadata +298 -0
data/etc/fairy.conf.tmpl
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
module Fairy
|
5
|
+
|
6
|
+
## Double sharp comment is a system default value.
|
7
|
+
|
8
|
+
##CONF.RUBY_BIN = ENV["FAIRY_RUBY"] || "ruby"
|
9
|
+
|
10
|
+
##CONF.MASTER_HOST = #NO SYSTEM DEFAULT VALUE
|
11
|
+
CONF.MASTER_HOST = NO SYSTEM DEFAULT VALUE
|
12
|
+
|
13
|
+
##CONF.MASTER_PORT = "19999"
|
14
|
+
|
15
|
+
##CONF.HOME = ENV["FAIRY_HOME"]
|
16
|
+
##CONF.BIN = CONF.HOME+"/bin"
|
17
|
+
##CONF.LIB = CONF.HOME+"/lib"
|
18
|
+
##CONF.SUBCMD = CONF.BIN+"/subcmd"
|
19
|
+
##CONF.CONTROLLER_BIN = CONF.SUBCMD+"/controller"
|
20
|
+
##CONF.PROCESSOR_BIN = CONF.SUBCMD+"/processor"
|
21
|
+
|
22
|
+
##CONF.DEFAULT_EXTERNAL = nil
|
23
|
+
##CONF.DEFAULT_INTERNAL = nil
|
24
|
+
|
25
|
+
##CONF.IGNORE_EXCEPTION_ON_FILTER = false
|
26
|
+
|
27
|
+
##CONF.MASTER_MAX_ACTIVE_PROCESSORS = 4
|
28
|
+
##CONF.CONTROLLER_INPUT_PROCESSOR_N = 10
|
29
|
+
|
30
|
+
## 生成されるプロセッサの上限. nilなら無制限.
|
31
|
+
##CONF.CONTROLLER_MAX_ACTIVE_TASKS_IN_PROCESSOR = 4
|
32
|
+
|
33
|
+
##CONF.CONTROLLER_ASSIGN_NEW_PROCESSOR_N_FACTOR = 1
|
34
|
+
|
35
|
+
## CONF.POSTMAPPING_POLICY = nil
|
36
|
+
|
37
|
+
##CONF.PREQUEUING_POLICY = {:queuing_class => :FileMarshaledQueue}
|
38
|
+
##CONF.POSTQUEUING_POLICY = {:queuing_class => :FileMarshaledQueue}
|
39
|
+
|
40
|
+
##CONF.POSTQUEUE_MAX_TRANSFER_SIZE = 100000
|
41
|
+
##CONF.POOLQUEUE_POOL_THRESHOLD = 10000
|
42
|
+
|
43
|
+
##CONF.ONMEMORY_SIZEDQUEUE_SIZE = 10000
|
44
|
+
##CONF.FILEBUFFEREDQUEUE_THRESHOLD = 10000/2
|
45
|
+
|
46
|
+
##CONF.MARSHAL_QUEUE_CHUNK_SIZE = DEFAULT_CONF.POOLQUEUE_POOL_THRESHOLD
|
47
|
+
##CONF.MARSHAL_QUEUE_MIN_CHUNK_NO = DEFAULT_CONF.POOLQUEUE_POOL_THRESHOLD
|
48
|
+
##CONF.SIZEDMARSHAL_QUEUE_MAX_CHUNK_NO = 10
|
49
|
+
|
50
|
+
##CONF.SORTEDQUEUE_POOL_THRESHOLD = CONF.POOLQUEUE_POOL_THRESHOLD
|
51
|
+
##CONF.SORTEDQUEUE_THRESHOLD = 10000/2
|
52
|
+
##CONF.SORTEDQUEUE_SORTBY = %{|v| v}
|
53
|
+
|
54
|
+
##CONF.INPUT_LOCAL_FILE_BUFFER_SIZE = 1024*1024
|
55
|
+
##CONF.HERE_POOL_THRESHOLD = 32000
|
56
|
+
|
57
|
+
##CONF.GROUP_BY_NO_SEGMENT = 4
|
58
|
+
|
59
|
+
##CONF.GROUP_BY_HASH_MODULE = "fairy/share/hash-md5"
|
60
|
+
##CONF.GROUP_BY_GROUPING_OPTIMIZE = false
|
61
|
+
|
62
|
+
##CONF.GROUP_BY_BUFFERING_POLICY = {:buffering_class => :DirectMergeSortBuffer}
|
63
|
+
##CONF.GROUP_BY_CMSB_THRESHOLD = 400_000
|
64
|
+
##CONF.GROUP_BY_CMSB_CHUNK_SIZE = 1000
|
65
|
+
|
66
|
+
##CONF.BARRIER_MEMORY_BUFFERING_POLICY = {:queuing_class => :PoolQueue}
|
67
|
+
##CONF.SORT_BUFFERING_POLICY = {:buffering_class => "PGroupBy::DirectMergeSortBuffer"}
|
68
|
+
##CONF.SORT_SAMPLING_MIN = 100
|
69
|
+
##CONF.SORT_SAMPLING_MAX = 10000
|
70
|
+
##CONF.SORT_SAMPLING_RATIO_1_TO = 100
|
71
|
+
##CONF.SORT_NO_SEGMENT = CONF.GROUP_BY_NO_SEGMENT
|
72
|
+
##CONF.SORT_CMP_OPTIMIZE = false
|
73
|
+
|
74
|
+
##CONF.IOTA_SPLIT_NO = 4
|
75
|
+
|
76
|
+
##CONF.TRANSFAR_MARSHAL_STRING_ARRAY_OPTIMIZE = false
|
77
|
+
|
78
|
+
##CONF.VF_ROOT = CONF.HOME+"/Repos"
|
79
|
+
##CONF.VF_PREFIX = `hostname`.chomp
|
80
|
+
#- CONF.VF_PREFIX use client setting.
|
81
|
+
# (ja_JP.utf-8: CONF.VF_PREFIXはクライアントのものが使われる)
|
82
|
+
|
83
|
+
##CONF.VF_SPLIT_SIZE = 64*1024*1024
|
84
|
+
|
85
|
+
##CONF.TMP_DIR = "/tmp/fairy/tmpbuf"
|
86
|
+
|
87
|
+
##CONF.LOG_FILE = "/tmp/fairy/log"
|
88
|
+
##CONF.LOG_FLUSH_INTERVAL = 1
|
89
|
+
##CONF.LOG_MARK_INTERVAL = 300
|
90
|
+
##CONF.LOG_LEVEL = :INFO
|
91
|
+
##CONF.LOG_IMPORT_NTIMES_POP = 100000
|
92
|
+
##CONF.LOG_LOCAL_OUTPUT_DEV = :$stderr
|
93
|
+
##CONF.LOG_ROTATE_INTERVAL = 60*60*24
|
94
|
+
##CONF.LOG_ROTATE_N = 7
|
95
|
+
|
96
|
+
##CONF.SUBCMD_EXEC_TIMEOUT = 60
|
97
|
+
|
98
|
+
## CONF.PROCESSOR_MON_ON = false
|
99
|
+
## CONF.PROCESSOR_MON_INTERVAL = 300
|
100
|
+
## CONF.PROCESSOR_MON_PSFORMAT = "stat,vsz,rss,sz,pmem,pcpu,nlwp,time,wchan"
|
101
|
+
## CONF.PROCESSOR_MON_OBJECTSPACE_INSPECT_ON = false
|
102
|
+
|
103
|
+
##CONF.SOCK_DO_NOT_REVERSE_LOOKUP = true
|
104
|
+
##CONF.USE_RESOLV_REPLACE = false
|
105
|
+
|
106
|
+
##CONF.BLOCK_USE_STDOUT = true
|
107
|
+
|
108
|
+
##CONF.DEBUG_PORT_WAIT = false
|
109
|
+
##CONF.DEBUG_FULL_BACKTRACE = false
|
110
|
+
##CONF.DEBUG_THREAD_ABORT_ON_EXCEPTION = false
|
111
|
+
##CONF.DEBUG_MONITOR_ON = false
|
112
|
+
##CONF.DEBUG_PROCESSOR_TRACE_ON = false
|
113
|
+
##CONF.DEBUG_BUG49 = false
|
114
|
+
|
115
|
+
##CONF.BUG234 = false
|
116
|
+
|
117
|
+
##CONF.PROCESS_LIFE_MANAGE_INTERVAL = nil
|
118
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (C) 2007-2010 Rakuten, Inc.
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include <ruby.h>
|
6
|
+
|
7
|
+
#define MULTIPLIER 137
|
8
|
+
|
9
|
+
static VALUE simple_hash(VALUE self, VALUE vstr);
|
10
|
+
|
11
|
+
|
12
|
+
static VALUE mFairy;
|
13
|
+
static VALUE mSimpleHash;
|
14
|
+
|
15
|
+
|
16
|
+
static VALUE simple_hash(VALUE self, VALUE vstr) {
|
17
|
+
VALUE vh;
|
18
|
+
char *str;
|
19
|
+
int len;
|
20
|
+
char *p;
|
21
|
+
unsigned int h = 0;
|
22
|
+
|
23
|
+
str = StringValuePtr(vstr);
|
24
|
+
len = RSTRING_LEN(vstr);
|
25
|
+
|
26
|
+
for (p = str; p - str < len; p++) {
|
27
|
+
h = h * MULTIPLIER + *p;
|
28
|
+
}
|
29
|
+
|
30
|
+
/* vh = UINT2NUM(h); */
|
31
|
+
vh = INT2FIX(h);
|
32
|
+
return vh;
|
33
|
+
}
|
34
|
+
|
35
|
+
void Init_simple_hash(void) {
|
36
|
+
mFairy = rb_define_module("Fairy");
|
37
|
+
mSimpleHash = rb_define_module_under(mFairy, "SimpleHash");
|
38
|
+
|
39
|
+
rb_define_module_function(mSimpleHash, "hash", simple_hash, 1);
|
40
|
+
}
|
41
|
+
|
42
|
+
|
data/fairy.gemspec
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
|
2
|
+
require "rubygems"
|
3
|
+
|
4
|
+
v = `ruby -Ilib -e 'require "fairy/version"; print Fairy::Version'`
|
5
|
+
v, p = v.scan(/^([0-9]+\.[0-9]+\.[0-9]+)-([0-9]+)/).first
|
6
|
+
if p.to_i > 1
|
7
|
+
v += "."+p
|
8
|
+
end
|
9
|
+
|
10
|
+
Gem::Specification.new do |s|
|
11
|
+
s.name = "fairy"
|
12
|
+
s.authors = "Rakuten, Inc."
|
13
|
+
s.email = "hajime.masuda@mail.rakuten.co.jp"
|
14
|
+
s.platform = Gem::Platform::RUBY
|
15
|
+
s.summary = "fairy is a framework for distributed processing in Ruby, originally designed at Rakuten Institute of Technology with Yukihiro Matsumoto, the founder of Ruby."
|
16
|
+
s.rubyforge_project = s.name
|
17
|
+
s.homepage = "http://code.google.com/p/fairy-prj/"
|
18
|
+
s.version = v
|
19
|
+
s.require_path = "lib"
|
20
|
+
s.test_file = "spec/run_all.sh"
|
21
|
+
s.executables = ["fairy", "fairy-cat", "fairy-cp", "fairy-rm"]
|
22
|
+
s.default_executable = "fairy"
|
23
|
+
|
24
|
+
s.files = ["Makefile", "README", "LICENSE", "fairy.gemspec", "lib/fairy.rb" ]
|
25
|
+
s.files.concat Dir.glob("lib/fairy/**/*.rb")
|
26
|
+
s.files.concat Dir.glob("lib/fairy/template/*.tmpl")
|
27
|
+
s.files.concat ["etc/fairy.conf.tmpl"]
|
28
|
+
s.files.concat Dir.glob("bin/{#{s.executables.grep(/.*[a-z]$/).join(",")}}")
|
29
|
+
s.files.concat Dir.glob("bin/subcmd/*[A-Za-z]")
|
30
|
+
s.files.concat Dir.glob("ext/**/{Makefile,*.rb,*.c}")
|
31
|
+
s.files.concat Dir.glob("doc/*.{rd,html}")
|
32
|
+
s.files.concat Dir.glob("spec/{README,*.rb,run_all.sh}")
|
33
|
+
s.files.concat Dir.glob("sample/*.rb")
|
34
|
+
s.files.concat ["test/testc.rb"]
|
35
|
+
s.files.concat Dir.glob("tools/**/*[a-z]")
|
36
|
+
|
37
|
+
s.add_dependency("DeepConnect", ">= 0.4.06")
|
38
|
+
s.add_dependency("fiber-mon", ">= 0.1.0")
|
39
|
+
|
40
|
+
s.description = <<EOF
|
41
|
+
fairy is a framework for distributed processing in Ruby, originally
|
42
|
+
designed at Rakuten Institute of Technology with Yukihiro Matsumoto,
|
43
|
+
the founder of Ruby.
|
44
|
+
|
45
|
+
Although fairy was inspired by MapReduce model, a well-known
|
46
|
+
programming model for distributed processing, it's more flexible and
|
47
|
+
suitable for wider use. That's due to fairy's programming model,
|
48
|
+
called filter IF, and various built-in filters.
|
49
|
+
|
50
|
+
fairy is implemented in Ruby and inherits its high productivity and
|
51
|
+
simplicity. fairy's API is quite similar to Ruby. Therefore most
|
52
|
+
programmers who know Ruby can easily understand and use it.
|
53
|
+
EOF
|
54
|
+
end
|
55
|
+
|
56
|
+
# Editor settings
|
57
|
+
# - Emacs -
|
58
|
+
# local variables:
|
59
|
+
# mode: Ruby
|
60
|
+
# end:
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
job_dir = File.dirname(__FILE__)
|
7
|
+
subdir = File.basename(File.dirname(job_dir))
|
8
|
+
job_name = File.basename(job_dir)
|
9
|
+
for job in Dir.glob("#{job_dir}/*.rb")
|
10
|
+
base = File.basename(job)
|
11
|
+
case base
|
12
|
+
when /18.rb$/
|
13
|
+
next if RUBY_VERSION >= "1.9.0"
|
14
|
+
when /19.rb$/
|
15
|
+
next unless RUBY_VERSION >= "1.9.0"
|
16
|
+
end
|
17
|
+
require [subdir, job_name, base].join("/")
|
18
|
+
end
|
19
|
+
|
20
|
+
Fairy::post_initialize
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/io-filter"
|
7
|
+
|
8
|
+
require "fairy/share/block-source"
|
9
|
+
|
10
|
+
module Fairy
|
11
|
+
module Interface
|
12
|
+
def barrier(opts = nil)
|
13
|
+
if opts[:cond].kind_of?(String)
|
14
|
+
opts[:cond] = BlockSource.new(opts[:cond])
|
15
|
+
end
|
16
|
+
barrier = Barrier.new(@fairy, opts)
|
17
|
+
barrier.input = self
|
18
|
+
barrier
|
19
|
+
end
|
20
|
+
end
|
21
|
+
Fairy::def_filter_interface Interface
|
22
|
+
|
23
|
+
|
24
|
+
class Barrier<IOFilter
|
25
|
+
def backend_class_name
|
26
|
+
"CBarrier"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/io-filter"
|
7
|
+
|
8
|
+
module Fairy
|
9
|
+
class BasicGroupBy<IOFilter
|
10
|
+
|
11
|
+
module Interface
|
12
|
+
def basic_group_by(hash_block, opts = nil)
|
13
|
+
hash_block = BlockSource.new(hash_block)
|
14
|
+
group_by = BasicGroupBy.new(@fairy, opts, hash_block)
|
15
|
+
group_by.input = self
|
16
|
+
group_by
|
17
|
+
end
|
18
|
+
end
|
19
|
+
Fairy::def_filter_interface Interface
|
20
|
+
|
21
|
+
def initialize(fairy, opts, block_source)
|
22
|
+
super
|
23
|
+
@block_source = block_source
|
24
|
+
end
|
25
|
+
|
26
|
+
def backend_class_name
|
27
|
+
"CBasicGroupBy"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class BasicMGroupBy<IOFilter
|
32
|
+
|
33
|
+
module Interface
|
34
|
+
def basic_mgroup_by(hash_block, opts = nil)
|
35
|
+
hash_block = BlockSource.new(hash_block)
|
36
|
+
mgroup_by = BasicMGroupBy.new(@fairy, opts, hash_block)
|
37
|
+
mgroup_by.input = self
|
38
|
+
mgroup_by
|
39
|
+
end
|
40
|
+
end
|
41
|
+
Fairy::def_filter_interface Interface
|
42
|
+
|
43
|
+
def initialize(fairy, opts, block_source)
|
44
|
+
super
|
45
|
+
@block_source = block_source
|
46
|
+
end
|
47
|
+
|
48
|
+
def backend_class_name
|
49
|
+
"CBasicMGroupBy"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/io-filter"
|
7
|
+
|
8
|
+
module Fairy
|
9
|
+
class Cat<IOFilter
|
10
|
+
|
11
|
+
module Interface
|
12
|
+
# jpb.cat(opts,...,filter,...,opts,...)
|
13
|
+
def cat(*others)
|
14
|
+
others, opts = others.partition{|e| e.kind_of?(Filter)}
|
15
|
+
if opts.last.kind_of?(Hash)
|
16
|
+
h = opts.pop
|
17
|
+
else
|
18
|
+
h = {}
|
19
|
+
end
|
20
|
+
opts.each{|e| h[e] = true}
|
21
|
+
|
22
|
+
cat = Cat.new(@fairy, h, others)
|
23
|
+
cat.input = self
|
24
|
+
cat
|
25
|
+
end
|
26
|
+
end
|
27
|
+
Fairy::def_filter_interface Interface
|
28
|
+
|
29
|
+
def initialize(fairy, opts, others)
|
30
|
+
super(fairy, opts, others.collect{|o| o.backend})
|
31
|
+
@others = others
|
32
|
+
@block_source
|
33
|
+
@opts = opts
|
34
|
+
end
|
35
|
+
|
36
|
+
def backend_class_name
|
37
|
+
"CCat"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/filter"
|
7
|
+
|
8
|
+
module Fairy
|
9
|
+
class DirectProduct<IOFilter
|
10
|
+
|
11
|
+
module Interface
|
12
|
+
# jpb.direct_product(opts,...,filter,...,block_source, opts,...)
|
13
|
+
def direct_product(*others)
|
14
|
+
block_source = nil
|
15
|
+
if others.last.kind_of?(String)
|
16
|
+
block_source = others.pop
|
17
|
+
end
|
18
|
+
others, opts = others.partition{|e| e.kind_of?(Filter)}
|
19
|
+
if opts.last.kind_of?(Hash)
|
20
|
+
h = opts.pop
|
21
|
+
else
|
22
|
+
h = {}
|
23
|
+
end
|
24
|
+
opts.each{|e| h[e] = true}
|
25
|
+
|
26
|
+
block_source = BlockSource.new(block_source)
|
27
|
+
dp = DirectProduct.new(@fairy, h, others, block_source)
|
28
|
+
dp.input = self
|
29
|
+
dp
|
30
|
+
end
|
31
|
+
alias product direct_product
|
32
|
+
|
33
|
+
def *(other)
|
34
|
+
direct_product(other, %{|e| e})
|
35
|
+
end
|
36
|
+
end
|
37
|
+
Fairy::def_filter_interface Interface
|
38
|
+
|
39
|
+
def initialize(fairy, opts, others, block_source)
|
40
|
+
super(fairy, opts, others.collect{|o| o.backend}, block_source)
|
41
|
+
@others = others
|
42
|
+
@block_source
|
43
|
+
@opts = opts
|
44
|
+
end
|
45
|
+
|
46
|
+
def backend_class_name
|
47
|
+
"CDirectProduct"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/basic-group-by"
|
7
|
+
require "fairy/client/seg-join"
|
8
|
+
|
9
|
+
require "fairy/client/group-by"
|
10
|
+
|
11
|
+
Fairy.def_filter(:equijoin_on_memory) do |fairy, input, other, *no|
|
12
|
+
puts no1 = no2 = no[0]
|
13
|
+
puts no2 = no[1] if no[1]
|
14
|
+
|
15
|
+
# mod = Fairy::CONF.GROUP_BY_HASH_MODULE
|
16
|
+
# require mod
|
17
|
+
# seed = Fairy::HValueGenerator.create_seed
|
18
|
+
# fairy.def_pool_variable(:HASH_SEED, seed)
|
19
|
+
|
20
|
+
main = input.basic_group_by(%{|e| @hgen.value(e[#{no1}]) % CONF.GROUP_BY_NO_SEGMENT},
|
21
|
+
:BEGIN=>%{
|
22
|
+
mod = CONF.GROUP_BY_HASH_MODULE
|
23
|
+
require mod
|
24
|
+
@hgen = Fairy::HValueGenerator.new(@Pool[:HASH_SEED])
|
25
|
+
}).barrier(:mode=>:NODE_CREATION, :cond=>:NODE_ARRIVED, :buffer=>:MEMORY)
|
26
|
+
other2 = other.basic_group_by(%{|e| @hgen.value(e[#{no2}]) % CONF.GROUP_BY_NO_SEGMENT},
|
27
|
+
:BEGIN=>%{
|
28
|
+
mod = CONF.GROUP_BY_HASH_MODULE
|
29
|
+
require mod
|
30
|
+
@hgen = Fairy::HValueGenerator.new(@Pool[:HASH_SEED])
|
31
|
+
}).barrier(:mode=>:NODE_CREATION, :cond=>:NODE_ARRIVED, :buffer=>:MEMORY)
|
32
|
+
|
33
|
+
|
34
|
+
main.seg_join(other2, %{|in0, in1, out_block|
|
35
|
+
|
36
|
+
next unless in0 && in1
|
37
|
+
|
38
|
+
ary_m = in0.to_a.group_by{|e| e[#{no1}]}
|
39
|
+
ary_o = in1.to_a.group_by{|e| e[#{no2}]}
|
40
|
+
|
41
|
+
ary_m.each do |key, values|
|
42
|
+
o_values = ary_o[key]
|
43
|
+
next unless o_values
|
44
|
+
values.each do |value|
|
45
|
+
o_values.each do |o_value|
|
46
|
+
out_block.call([value, o_value])
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
}, :by => :key)
|
51
|
+
end
|
52
|
+
|
53
|
+
Fairy.def_filter(:equijoin) do |fairy, input, other, *no|
|
54
|
+
no1 = no2 = no[0]
|
55
|
+
no2 = no[1] if no[1]
|
56
|
+
|
57
|
+
main = input.map(%{|e| [e[#{no1}], 0, e]})
|
58
|
+
other = other.map(%{|e| [e[#{no2}], 1, e]})
|
59
|
+
|
60
|
+
main.cat(other).group_by(%{|e| e[0]}).mapf(%{|values|
|
61
|
+
parted = values.group_by{|value| value[1]}
|
62
|
+
if parted[0] && parted[1]
|
63
|
+
parted[0].collect{|e| e[2]}.product(parted[1].collect{|e| e[2]})
|
64
|
+
else
|
65
|
+
[]
|
66
|
+
end
|
67
|
+
})
|
68
|
+
|
69
|
+
# main.cat(other).mod_group_by(%{|e| e[0]}).emap(%{|key, values|
|
70
|
+
# puts "XXXX: \#{key.inspect}"
|
71
|
+
# puts "XXXS: \#{values.inspect}"
|
72
|
+
|
73
|
+
# parted = values.group_by{|value| value[1]}
|
74
|
+
# parted[0].product(parted[1])
|
75
|
+
# })
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
|