fairy 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +674 -0
- data/Makefile +116 -0
- data/README +15 -0
- data/bin/fairy +582 -0
- data/bin/fairy-cat +74 -0
- data/bin/fairy-cp +128 -0
- data/bin/fairy-rm +122 -0
- data/bin/subcmd/controller +41 -0
- data/bin/subcmd/inspector +81 -0
- data/bin/subcmd/master +43 -0
- data/bin/subcmd/node +47 -0
- data/bin/subcmd/processor +54 -0
- data/doc/programming-interface.html +240 -0
- data/doc/programming-interface.rd +300 -0
- data/etc/fairy.conf.tmpl +118 -0
- data/ext/simple_hash/extconf.rb +4 -0
- data/ext/simple_hash/simple_hash.c +42 -0
- data/fairy.gemspec +60 -0
- data/lib/fairy/client/addins.rb +20 -0
- data/lib/fairy/client/barrier.rb +29 -0
- data/lib/fairy/client/basic-group-by.rb +52 -0
- data/lib/fairy/client/cat.rb +41 -0
- data/lib/fairy/client/direct-product.rb +51 -0
- data/lib/fairy/client/equijoin.rb +79 -0
- data/lib/fairy/client/exec.rb +54 -0
- data/lib/fairy/client/filter.rb +62 -0
- data/lib/fairy/client/find.rb +35 -0
- data/lib/fairy/client/group-by.rb +194 -0
- data/lib/fairy/client/here.rb +84 -0
- data/lib/fairy/client/inject.rb +70 -0
- data/lib/fairy/client/input-file.rb +53 -0
- data/lib/fairy/client/input-iota.rb +49 -0
- data/lib/fairy/client/input-local-file.rb +188 -0
- data/lib/fairy/client/input-varray.rb +30 -0
- data/lib/fairy/client/input.rb +42 -0
- data/lib/fairy/client/io-filter.rb +26 -0
- data/lib/fairy/client/junction.rb +31 -0
- data/lib/fairy/client/map.rb +34 -0
- data/lib/fairy/client/merge-group-by.rb +71 -0
- data/lib/fairy/client/output-file.rb +64 -0
- data/lib/fairy/client/output-local-file.rb +60 -0
- data/lib/fairy/client/output-null.rb +47 -0
- data/lib/fairy/client/output-varray.rb +50 -0
- data/lib/fairy/client/output.rb +29 -0
- data/lib/fairy/client/roma-put.rb +62 -0
- data/lib/fairy/client/roma.rb +156 -0
- data/lib/fairy/client/seg-join.rb +61 -0
- data/lib/fairy/client/seg-map.rb +78 -0
- data/lib/fairy/client/seg-shuffle.rb +35 -0
- data/lib/fairy/client/seg-split.rb +27 -0
- data/lib/fairy/client/seg-zip.rb +60 -0
- data/lib/fairy/client/select.rb +38 -0
- data/lib/fairy/client/sort.rb +48 -0
- data/lib/fairy/client/sort18.rb +56 -0
- data/lib/fairy/client/sort19.rb +61 -0
- data/lib/fairy/client/there.rb +47 -0
- data/lib/fairy/client/top_n_into_roma.rb +34 -0
- data/lib/fairy/client/wc.rb +92 -0
- data/lib/fairy/controller.rb +1103 -0
- data/lib/fairy/logger.rb +107 -0
- data/lib/fairy/master/addins.rb +20 -0
- data/lib/fairy/master/atom.rb +17 -0
- data/lib/fairy/master/c-barrier.rb +283 -0
- data/lib/fairy/master/c-basic-group-by.rb +250 -0
- data/lib/fairy/master/c-cat.rb +159 -0
- data/lib/fairy/master/c-direct-product.rb +203 -0
- data/lib/fairy/master/c-exec.rb +68 -0
- data/lib/fairy/master/c-filter.rb +422 -0
- data/lib/fairy/master/c-find.rb +138 -0
- data/lib/fairy/master/c-group-by.rb +64 -0
- data/lib/fairy/master/c-here.rb +80 -0
- data/lib/fairy/master/c-inject.rb +119 -0
- data/lib/fairy/master/c-input-file.rb +46 -0
- data/lib/fairy/master/c-input-iota.rb +66 -0
- data/lib/fairy/master/c-input-local-file.rb +117 -0
- data/lib/fairy/master/c-input-varray.rb +53 -0
- data/lib/fairy/master/c-input.rb +24 -0
- data/lib/fairy/master/c-inputtable.rb +31 -0
- data/lib/fairy/master/c-inputtable18.rb +36 -0
- data/lib/fairy/master/c-inputtable19.rb +35 -0
- data/lib/fairy/master/c-io-filter.rb +28 -0
- data/lib/fairy/master/c-junction.rb +54 -0
- data/lib/fairy/master/c-map.rb +27 -0
- data/lib/fairy/master/c-merge-group-by.rb +241 -0
- data/lib/fairy/master/c-output-file.rb +84 -0
- data/lib/fairy/master/c-output-local-file.rb +19 -0
- data/lib/fairy/master/c-output-null.rb +45 -0
- data/lib/fairy/master/c-output-varray.rb +57 -0
- data/lib/fairy/master/c-output.rb +20 -0
- data/lib/fairy/master/c-seg-join.rb +141 -0
- data/lib/fairy/master/c-seg-map.rb +26 -0
- data/lib/fairy/master/c-seg-shuffle.rb +87 -0
- data/lib/fairy/master/c-seg-split.rb +110 -0
- data/lib/fairy/master/c-seg-zip.rb +132 -0
- data/lib/fairy/master/c-select.rb +27 -0
- data/lib/fairy/master/c-sort.rb +108 -0
- data/lib/fairy/master/c-there.rb +57 -0
- data/lib/fairy/master/c-wc.rb +232 -0
- data/lib/fairy/master/job-interpriter.rb +19 -0
- data/lib/fairy/master/scheduler.rb +24 -0
- data/lib/fairy/master.rb +329 -0
- data/lib/fairy/node/addins.rb +19 -0
- data/lib/fairy/node/p-barrier.rb +95 -0
- data/lib/fairy/node/p-basic-group-by.rb +252 -0
- data/lib/fairy/node/p-direct-product.rb +153 -0
- data/lib/fairy/node/p-exec.rb +30 -0
- data/lib/fairy/node/p-filter.rb +363 -0
- data/lib/fairy/node/p-find.rb +111 -0
- data/lib/fairy/node/p-group-by.rb +1534 -0
- data/lib/fairy/node/p-here.rb +21 -0
- data/lib/fairy/node/p-identity.rb +24 -0
- data/lib/fairy/node/p-inject.rb +127 -0
- data/lib/fairy/node/p-input-file.rb +108 -0
- data/lib/fairy/node/p-input-iota.rb +39 -0
- data/lib/fairy/node/p-input-local-file.rb +61 -0
- data/lib/fairy/node/p-input-varray.rb +26 -0
- data/lib/fairy/node/p-io-filter.rb +28 -0
- data/lib/fairy/node/p-map.rb +40 -0
- data/lib/fairy/node/p-merger-group-by.rb +48 -0
- data/lib/fairy/node/p-output-file.rb +104 -0
- data/lib/fairy/node/p-output-local-file.rb +14 -0
- data/lib/fairy/node/p-output-null.rb +32 -0
- data/lib/fairy/node/p-output-varray.rb +41 -0
- data/lib/fairy/node/p-seg-join.rb +82 -0
- data/lib/fairy/node/p-seg-map.rb +34 -0
- data/lib/fairy/node/p-seg-split.rb +61 -0
- data/lib/fairy/node/p-seg-zip.rb +79 -0
- data/lib/fairy/node/p-select.rb +40 -0
- data/lib/fairy/node/p-single-exportable.rb +90 -0
- data/lib/fairy/node/p-sort.rb +195 -0
- data/lib/fairy/node/p-task.rb +113 -0
- data/lib/fairy/node/p-there.rb +44 -0
- data/lib/fairy/node/p-wc.rb +266 -0
- data/lib/fairy/node.rb +187 -0
- data/lib/fairy/processor.rb +510 -0
- data/lib/fairy/share/base-app.rb +114 -0
- data/lib/fairy/share/block-source.rb +234 -0
- data/lib/fairy/share/conf.rb +396 -0
- data/lib/fairy/share/debug.rb +21 -0
- data/lib/fairy/share/encoding.rb +17 -0
- data/lib/fairy/share/fast-tempfile.rb +93 -0
- data/lib/fairy/share/file-place.rb +176 -0
- data/lib/fairy/share/hash-1.rb +20 -0
- data/lib/fairy/share/hash-md5.rb +28 -0
- data/lib/fairy/share/hash-murmur.rb +69 -0
- data/lib/fairy/share/hash-rb18.rb +20 -0
- data/lib/fairy/share/hash-simple-hash.rb +28 -0
- data/lib/fairy/share/inspector.rb +16 -0
- data/lib/fairy/share/lc/exceptions.rb +82 -0
- data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
- data/lib/fairy/share/locale.rb +17 -0
- data/lib/fairy/share/log.rb +215 -0
- data/lib/fairy/share/pool-dictionary.rb +53 -0
- data/lib/fairy/share/port-marshaled-queue.rb +347 -0
- data/lib/fairy/share/port.rb +1697 -0
- data/lib/fairy/share/reference.rb +45 -0
- data/lib/fairy/share/stdout.rb +56 -0
- data/lib/fairy/share/tr.rb +16 -0
- data/lib/fairy/share/varray.rb +147 -0
- data/lib/fairy/share/vfile.rb +183 -0
- data/lib/fairy/version.rb +8 -0
- data/lib/fairy.rb +206 -0
- data/sample/grep.rb +46 -0
- data/sample/ping.rb +19 -0
- data/sample/sort.rb +102 -0
- data/sample/wordcount.rb +61 -0
- data/spec/README +12 -0
- data/spec/fairy1_spec.rb +31 -0
- data/spec/fairy2_spec.rb +42 -0
- data/spec/fairy3_spec.rb +126 -0
- data/spec/fairy4_spec.rb +63 -0
- data/spec/fairy5_spec.rb +45 -0
- data/spec/fairy6_spec.rb +52 -0
- data/spec/fairy7_spec.rb +58 -0
- data/spec/fairy8_spec.rb +48 -0
- data/spec/mkdat.rb +148 -0
- data/spec/run_all.sh +65 -0
- data/test/testc.rb +7111 -0
- data/tools/cap_recipe/Capfile +144 -0
- data/tools/cap_recipe/cluster.yml.sample +14 -0
- data/tools/fairy_perf_graph.rb +444 -0
- data/tools/git-tag +44 -0
- data/tools/log-analysis.rb +62 -0
- data/tools/svn-ls-diff +38 -0
- data/tools/svn-tags +37 -0
- metadata +298 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/io-filter"
|
7
|
+
|
8
|
+
module Fairy
|
9
|
+
class SegJoin<IOFilter
|
10
|
+
|
11
|
+
module Interface
|
12
|
+
# jpb.seg_join(opts,...,filter,...,block_source, opts,...)
|
13
|
+
def seg_join(*others)
|
14
|
+
block_source = nil
|
15
|
+
if others.last.kind_of?(String)
|
16
|
+
block_source = others.pop
|
17
|
+
elsif others.last.kind_of?(Hash) and others[-2].kind_of?(String)
|
18
|
+
block_source = others.delete_at(-2)
|
19
|
+
end
|
20
|
+
others, opts = others.partition{|e| e.kind_of?(Filter)}
|
21
|
+
if opts.last.kind_of?(Hash)
|
22
|
+
h = opts.pop
|
23
|
+
else
|
24
|
+
h = {}
|
25
|
+
end
|
26
|
+
opts.each{|e| h[e] = true}
|
27
|
+
|
28
|
+
pres = others.collect{|o|
|
29
|
+
p = PreJoinedFilter.new(@fairy, h)
|
30
|
+
p.input = o
|
31
|
+
p
|
32
|
+
}
|
33
|
+
|
34
|
+
block_source = BlockSource.new(block_source)
|
35
|
+
join = SegJoin.new(@fairy, h, pres, block_source)
|
36
|
+
join.input = self
|
37
|
+
join
|
38
|
+
end
|
39
|
+
end
|
40
|
+
Fairy::def_filter_interface Interface
|
41
|
+
|
42
|
+
def initialize(fairy, opts, others, block_source)
|
43
|
+
super(fairy, opts, others.collect{|o| o.backend}, block_source)
|
44
|
+
@others = others
|
45
|
+
@block_source
|
46
|
+
@opts = opts
|
47
|
+
end
|
48
|
+
|
49
|
+
def backend_class_name
|
50
|
+
"CSegJoin"
|
51
|
+
end
|
52
|
+
|
53
|
+
class PreJoinedFilter<IOFilter
|
54
|
+
def backend_class_name
|
55
|
+
"CSegJoin::CPreSegJoinFilter"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/io-filter"
|
7
|
+
|
8
|
+
module Fairy
|
9
|
+
|
10
|
+
class SegMap<IOFilter
|
11
|
+
module Interface
|
12
|
+
|
13
|
+
def smap(block_source, opts = nil)
|
14
|
+
raise "No compatibility after fairy-0.5"
|
15
|
+
end
|
16
|
+
|
17
|
+
def smap2(block_source, opts = nil)
|
18
|
+
raise "No compatibility after fairy-0.6"
|
19
|
+
end
|
20
|
+
|
21
|
+
def seg_map(block_source, opts = nil)
|
22
|
+
ERR::Raise ERR::CantAcceptBlock if block_given?
|
23
|
+
block_source = BlockSource.new(block_source)
|
24
|
+
mapper = SegMap.new(@fairy, opts, block_source)
|
25
|
+
mapper.input=self
|
26
|
+
mapper
|
27
|
+
end
|
28
|
+
|
29
|
+
# emap(%{|input| input.collect{..})
|
30
|
+
def emap(block_source, opts = nil)
|
31
|
+
ERR::Raise ERR::CantAcceptBlock if block_given?
|
32
|
+
map_source = %{|i, block| proc{#{block_source}}.call(i).each{|e| block.call e}}
|
33
|
+
seg_map(map_source, opts)
|
34
|
+
end
|
35
|
+
|
36
|
+
def map_flatten(block_source, opts = nil)
|
37
|
+
ERR::Raise ERR::CantAcceptBlock if block_given?
|
38
|
+
map_source = %{|i, block|
|
39
|
+
i.each do |e|
|
40
|
+
enum = proc{#{block_source}}.call(e)
|
41
|
+
enum.each do |f|
|
42
|
+
#{n = opts && opts[:N]; n ||= 1
|
43
|
+
case n
|
44
|
+
when 1
|
45
|
+
"block.call f"
|
46
|
+
when 2
|
47
|
+
"if f.respond_to?(:each)
|
48
|
+
f.each{|g| block.call(g)}
|
49
|
+
else
|
50
|
+
block.call f
|
51
|
+
end"
|
52
|
+
else
|
53
|
+
"if f.respond_to?(:flatten)
|
54
|
+
f.flatten(#{opts[:N]} - 2).each{|g| block.call(g)}
|
55
|
+
else
|
56
|
+
block.call f
|
57
|
+
end"
|
58
|
+
end}
|
59
|
+
end
|
60
|
+
end
|
61
|
+
}
|
62
|
+
seg_map(map_source, opts)
|
63
|
+
end
|
64
|
+
alias mapf map_flatten
|
65
|
+
|
66
|
+
end
|
67
|
+
Fairy::def_filter_interface Interface
|
68
|
+
|
69
|
+
def initialize(fairy, opts, block_source)
|
70
|
+
super
|
71
|
+
@block_source = block_source
|
72
|
+
end
|
73
|
+
|
74
|
+
def backend_class_name
|
75
|
+
"CSegMap"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/io-filter"
|
7
|
+
|
8
|
+
module Fairy
|
9
|
+
class SegShuffle<IOFilter
|
10
|
+
module Interface
|
11
|
+
def seg_shuffle(block_source, opts = nil)
|
12
|
+
block_source = BlockSource.new(block_source)
|
13
|
+
shuffle = SegShuffle.new(@fairy, opts, block_source)
|
14
|
+
shuffle.input = self
|
15
|
+
shuffle
|
16
|
+
end
|
17
|
+
|
18
|
+
def seg_eshuffle(block_source, opts = nil)
|
19
|
+
map_source = %{|i, o| proc{#{block_source}}.call(i).each{|e| o.push e}}
|
20
|
+
seg_shuffle(map_source, opts)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
Fairy::def_filter_interface Interface
|
24
|
+
|
25
|
+
def initialize(fairy, opts, block_source)
|
26
|
+
super
|
27
|
+
@block_source = block_source
|
28
|
+
@opts = opts
|
29
|
+
end
|
30
|
+
|
31
|
+
def backend_class_name
|
32
|
+
"CSegShuffle"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require "fairy/client/io-filter"
|
4
|
+
|
5
|
+
module Fairy
|
6
|
+
class SegSplit<IOFilter
|
7
|
+
module Interface
|
8
|
+
def seg_split(n, opts=nil)
|
9
|
+
splitter = SegSplit.new(@fairy, opts, n)
|
10
|
+
splitter.input = self
|
11
|
+
splitter
|
12
|
+
end
|
13
|
+
end
|
14
|
+
Fairy::def_filter_interface Interface
|
15
|
+
|
16
|
+
|
17
|
+
def initialize(fairy, opts, n)
|
18
|
+
super
|
19
|
+
@no_split = n
|
20
|
+
@opts = opts
|
21
|
+
end
|
22
|
+
|
23
|
+
def backend_class_name
|
24
|
+
"CSegSplit"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/io-filter"
|
7
|
+
|
8
|
+
module Fairy
|
9
|
+
class SegZip<IOFilter
|
10
|
+
|
11
|
+
module Interface
|
12
|
+
# jpb.seg_zip(opts,...,filter,...,block_source, opts,...)
|
13
|
+
def seg_zip(*others)
|
14
|
+
block_source = nil
|
15
|
+
if others.last.kind_of?(String)
|
16
|
+
block_source = others.pop
|
17
|
+
end
|
18
|
+
others, opts = others.partition{|e| e.kind_of?(Filter)}
|
19
|
+
if opts.last.kind_of?(Hash)
|
20
|
+
h = opts.pop
|
21
|
+
else
|
22
|
+
h = {}
|
23
|
+
end
|
24
|
+
opts.each{|e| h[e] = true}
|
25
|
+
|
26
|
+
pres = others.collect{|o|
|
27
|
+
p = PreSegZipFilter.new(@fairy, h)
|
28
|
+
p.input = o
|
29
|
+
p
|
30
|
+
}
|
31
|
+
|
32
|
+
block_source = BlockSource.new(block_source)
|
33
|
+
zip = SegZip.new(@fairy, h, pres, block_source)
|
34
|
+
zip.input = self
|
35
|
+
zip
|
36
|
+
end
|
37
|
+
end
|
38
|
+
Fairy::def_filter_interface Interface
|
39
|
+
|
40
|
+
# ZIP_BY_SEGMENT = :ZIP_BY_SEGMENT
|
41
|
+
|
42
|
+
def initialize(fairy, opts, others, block_source)
|
43
|
+
super(fairy, opts, others.collect{|o| o.backend}, block_source)
|
44
|
+
@others = others
|
45
|
+
@block_source
|
46
|
+
@opts = opts
|
47
|
+
end
|
48
|
+
|
49
|
+
def backend_class_name
|
50
|
+
"CSegZip"
|
51
|
+
end
|
52
|
+
|
53
|
+
class PreSegZipFilter<IOFilter
|
54
|
+
def backend_class_name
|
55
|
+
"CSegZip::CPreSegZipFilter"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/io-filter"
|
7
|
+
|
8
|
+
module Fairy
|
9
|
+
|
10
|
+
class Select<IOFilter
|
11
|
+
module Interface
|
12
|
+
def select(block_source, opts = nil)
|
13
|
+
ERR::Raise ERR::CantAcceptBlock if block_given?
|
14
|
+
block_source = BlockSource.new(block_source)
|
15
|
+
mapper = Select.new(@fairy, opts, block_source)
|
16
|
+
mapper.input=self
|
17
|
+
mapper
|
18
|
+
end
|
19
|
+
|
20
|
+
alias find_all select
|
21
|
+
|
22
|
+
def grep(regexp, opts = nil)
|
23
|
+
select(%{|e| /#{regexp.source}/ === e}, opts)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
Fairy::def_filter_interface Interface
|
27
|
+
|
28
|
+
def initialize(fairy, opts, block_source)
|
29
|
+
super
|
30
|
+
@block_source = block_source
|
31
|
+
end
|
32
|
+
|
33
|
+
def backend_class_name
|
34
|
+
"CSelect"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/basic-group-by"
|
7
|
+
|
8
|
+
module Fairy
|
9
|
+
|
10
|
+
module Sort
|
11
|
+
|
12
|
+
module Interface
|
13
|
+
def sort_by(cmp_proc, opts=nil)
|
14
|
+
cmp_proc = BlockSource.new(cmp_proc)
|
15
|
+
pre_sort = Sort::PreSort.new(@fairy, opts, cmp_proc)
|
16
|
+
pre_sort.input = self
|
17
|
+
post_sort = Sort::PostSort.new(@fairy, opts, cmp_proc)
|
18
|
+
post_sort.input = pre_sort
|
19
|
+
post_sort
|
20
|
+
end
|
21
|
+
end
|
22
|
+
Fairy::def_filter_interface Interface
|
23
|
+
|
24
|
+
class PreSort<IOFilter
|
25
|
+
def initialize(fairy, opts, block_source)
|
26
|
+
super
|
27
|
+
@block_source = block_source
|
28
|
+
end
|
29
|
+
|
30
|
+
def backend_class_name
|
31
|
+
"CSort::CPreSort"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class PostSort<IOFilter
|
36
|
+
def initialize(fairy, opts, block_source)
|
37
|
+
super
|
38
|
+
@block_source = block_source
|
39
|
+
end
|
40
|
+
|
41
|
+
def backend_class_name
|
42
|
+
"CSort::CPostSort"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require "fairy/client/merge-group-by"
|
4
|
+
|
5
|
+
Fairy.def_filter(:sort_by_with_va) do |fairy, input, block_source, *rests|
|
6
|
+
|
7
|
+
opts = {}
|
8
|
+
if !rests.empty?
|
9
|
+
opts = rests.last
|
10
|
+
end
|
11
|
+
|
12
|
+
sampling_ratio_1_to = opts[:sampling_ratio]
|
13
|
+
sampling_ratio_1_to ||= Fairy::CONF.SORT_SAMPLING_RATIO_1_TO
|
14
|
+
pvn = opts[:pvn]
|
15
|
+
pvn ||= Fairy::CONF.SORT_NO_SEGMENT
|
16
|
+
|
17
|
+
va = input.emap(%{|i|
|
18
|
+
sort_proc = proc{#{block_source}}
|
19
|
+
i.to_a.collect{|e| [sort_proc.call(e), e]}.sort_by{|e| e.first}}).to_va
|
20
|
+
|
21
|
+
if va.size/sampling_ratio_1_to < Fairy::CONF.SORT_SAMPLING_MIN
|
22
|
+
sampling_ratio_1_to = Fairy::CONF.SORT_SAMPLING_MIN.div(va.size)
|
23
|
+
end
|
24
|
+
if va.size/sampling_ratio_1_to > Fairy::CONF.SORT_SAMPLING_MAX
|
25
|
+
sampling_ratio_1_to = Fairy::CONF.SORT_SAMPLING_MAX.div(va.size)
|
26
|
+
end
|
27
|
+
|
28
|
+
Fairy::Log::debug(self, "SAMPLING: RATIO: 1/#{sampling_ratio_1_to}")
|
29
|
+
sample = fairy.input(va).select(%{|e| (i += 1) % #{sampling_ratio_1_to} == 0},
|
30
|
+
:BEGIN=>%{i = 0}).here.sort_by{|e| e.first}.map{|e| e.first}
|
31
|
+
|
32
|
+
idxes = (1...pvn).collect{|i| (sample.size*i).div(pvn)}
|
33
|
+
idxes.push -1
|
34
|
+
pvs = sample.values_at(*idxes)
|
35
|
+
Fairy::Log::debug(self, "PVS: #{pvs.inspect}")
|
36
|
+
fairy.def_pool_variable(:pvs, pvs)
|
37
|
+
|
38
|
+
div = fairy.input(va).merge_group_by(%{|e|
|
39
|
+
key = @Pool.pvs.find{|pv| e.first <= pv}
|
40
|
+
key ? key : @Pool.pvs.last},
|
41
|
+
:postqueuing_policy => {:queuing_class => :OnMemoryQueue})
|
42
|
+
|
43
|
+
msort = div.seg_map(%{|i, o|
|
44
|
+
buf = i.map{|st| [st, st.pop.dc_deep_copy]}.select{|st, v|!v.nil?}.sort_by{|st, v| v.first}
|
45
|
+
while st_min = buf.shift
|
46
|
+
st, min = st_min
|
47
|
+
o.push min.last
|
48
|
+
next unless v = st.pop.dc_deep_copy # 取りあえずの対応
|
49
|
+
idx = buf.rindex{|st0, v0| v0.first <= v.first}
|
50
|
+
idx ? buf.insert(idx+1, [st, v]) : buf.unshift([st, v])
|
51
|
+
end})
|
52
|
+
|
53
|
+
shuffle = msort.seg_eshuffle(%{|i| i.sort{|s1, s2| s1.key <=> s2.key}})
|
54
|
+
# shuffle = msort.eshuffle(%{|i| i.sort_by{|s1| Log::debug(self, s1.key.inspect); s1.key}})
|
55
|
+
end
|
56
|
+
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "fairy/client/merge-group-by"
|
7
|
+
|
8
|
+
Fairy.def_filter(:sort_by_with_va, :sub => true) do |fairy, input, block_source, opts = {}|
|
9
|
+
|
10
|
+
sampling_ratio_1_to = opts[:sampling_ratio]
|
11
|
+
sampling_ratio_1_to ||= Fairy::CONF.SORT_SAMPLING_RATIO_1_TO
|
12
|
+
pvn = opts[:pvn]
|
13
|
+
pvn ||= Fairy::CONF.SORT_NO_SEGMENT
|
14
|
+
|
15
|
+
va = input.emap(%{|i|
|
16
|
+
sort_proc = proc{#{block_source}}
|
17
|
+
i.to_a.collect{|e| [sort_proc.call(e), e]}.sort_by{|e| e.first}}).to_va
|
18
|
+
|
19
|
+
if va.size/sampling_ratio_1_to < Fairy::CONF.SORT_SAMPLING_MIN
|
20
|
+
#sampling_ratio_1_to = Fairy::CONF.SORT_SAMPLING_MIN.div(va.size)
|
21
|
+
sampling_ratio_1_to = va.size.div(Fairy::CONF.SORT_SAMPLING_MIN)
|
22
|
+
sampling_ratio_1_to = 1 if sampling_ratio_1_to.zero?
|
23
|
+
end
|
24
|
+
if va.size/sampling_ratio_1_to > Fairy::CONF.SORT_SAMPLING_MAX
|
25
|
+
sampling_ratio_1_to = va.size.div(Fairy::CONF.SORT_SAMPLING_MAX)
|
26
|
+
end
|
27
|
+
|
28
|
+
Fairy::Log::debug(self, "SAMPLING: RATIO: 1/#{sampling_ratio_1_to}")
|
29
|
+
Fairy::Log::debug(self, "SAMPLING: VA SIZE: #{va.size}")
|
30
|
+
sample = fairy.input(va).select(%{|e| (i += 1) % #{sampling_ratio_1_to} == 0},
|
31
|
+
:BEGIN=>%{i = 0}).here.sort_by{|e| e.first}.map{|e| e.first}
|
32
|
+
|
33
|
+
Fairy::Log::debugf(self, "SAMPLING: SAMPLE: %s", sample.inspect)
|
34
|
+
|
35
|
+
idxes = (1...pvn).collect{|i| (sample.size*i).div(pvn)}
|
36
|
+
idxes.push -1
|
37
|
+
pvs = sample.values_at(*idxes)
|
38
|
+
Fairy::Log::debug(self, "PVS: #{pvs.inspect}")
|
39
|
+
fairy.def_pool_variable(:pvs, pvs)
|
40
|
+
|
41
|
+
div = fairy.input(va).merge_group_by(%{|e|
|
42
|
+
key = @Pool.pvs.find{|pv| e.first <= pv}
|
43
|
+
key ? key : @Pool.pvs.last},
|
44
|
+
:postqueuing_policy => {:queuing_class => :PoolQueue}
|
45
|
+
|
46
|
+
)
|
47
|
+
|
48
|
+
msort = div.seg_map(%{|i, block|
|
49
|
+
buf = i.map{|st| [st, st.pop.dc_deep_copy]}.select{|st, v|!v.nil?}.sort_by{|st, v| v.first}
|
50
|
+
while st_min = buf.shift
|
51
|
+
st, min = st_min
|
52
|
+
block.call min.last
|
53
|
+
next unless v = st.pop.dc_deep_copy # 取りあえずの対応
|
54
|
+
idx = buf.rindex{|st0, v0| v0.first <= v.first}
|
55
|
+
idx ? buf.insert(idx+1, [st, v]) : buf.unshift([st, v])
|
56
|
+
end})
|
57
|
+
|
58
|
+
shuffle = msort.seg_eshuffle(%{|i| i.sort{|s1, s2| s1.key <=> s2.key}})
|
59
|
+
# shuffle = msort.eshuffle(%{|i| i.sort_by{|s1| Log::debug(self, s1.key.inspect); s1.key}})
|
60
|
+
end
|
61
|
+
|