fairy 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. data/LICENSE +674 -0
  2. data/Makefile +116 -0
  3. data/README +15 -0
  4. data/bin/fairy +582 -0
  5. data/bin/fairy-cat +74 -0
  6. data/bin/fairy-cp +128 -0
  7. data/bin/fairy-rm +122 -0
  8. data/bin/subcmd/controller +41 -0
  9. data/bin/subcmd/inspector +81 -0
  10. data/bin/subcmd/master +43 -0
  11. data/bin/subcmd/node +47 -0
  12. data/bin/subcmd/processor +54 -0
  13. data/doc/programming-interface.html +240 -0
  14. data/doc/programming-interface.rd +300 -0
  15. data/etc/fairy.conf.tmpl +118 -0
  16. data/ext/simple_hash/extconf.rb +4 -0
  17. data/ext/simple_hash/simple_hash.c +42 -0
  18. data/fairy.gemspec +60 -0
  19. data/lib/fairy/client/addins.rb +20 -0
  20. data/lib/fairy/client/barrier.rb +29 -0
  21. data/lib/fairy/client/basic-group-by.rb +52 -0
  22. data/lib/fairy/client/cat.rb +41 -0
  23. data/lib/fairy/client/direct-product.rb +51 -0
  24. data/lib/fairy/client/equijoin.rb +79 -0
  25. data/lib/fairy/client/exec.rb +54 -0
  26. data/lib/fairy/client/filter.rb +62 -0
  27. data/lib/fairy/client/find.rb +35 -0
  28. data/lib/fairy/client/group-by.rb +194 -0
  29. data/lib/fairy/client/here.rb +84 -0
  30. data/lib/fairy/client/inject.rb +70 -0
  31. data/lib/fairy/client/input-file.rb +53 -0
  32. data/lib/fairy/client/input-iota.rb +49 -0
  33. data/lib/fairy/client/input-local-file.rb +188 -0
  34. data/lib/fairy/client/input-varray.rb +30 -0
  35. data/lib/fairy/client/input.rb +42 -0
  36. data/lib/fairy/client/io-filter.rb +26 -0
  37. data/lib/fairy/client/junction.rb +31 -0
  38. data/lib/fairy/client/map.rb +34 -0
  39. data/lib/fairy/client/merge-group-by.rb +71 -0
  40. data/lib/fairy/client/output-file.rb +64 -0
  41. data/lib/fairy/client/output-local-file.rb +60 -0
  42. data/lib/fairy/client/output-null.rb +47 -0
  43. data/lib/fairy/client/output-varray.rb +50 -0
  44. data/lib/fairy/client/output.rb +29 -0
  45. data/lib/fairy/client/roma-put.rb +62 -0
  46. data/lib/fairy/client/roma.rb +156 -0
  47. data/lib/fairy/client/seg-join.rb +61 -0
  48. data/lib/fairy/client/seg-map.rb +78 -0
  49. data/lib/fairy/client/seg-shuffle.rb +35 -0
  50. data/lib/fairy/client/seg-split.rb +27 -0
  51. data/lib/fairy/client/seg-zip.rb +60 -0
  52. data/lib/fairy/client/select.rb +38 -0
  53. data/lib/fairy/client/sort.rb +48 -0
  54. data/lib/fairy/client/sort18.rb +56 -0
  55. data/lib/fairy/client/sort19.rb +61 -0
  56. data/lib/fairy/client/there.rb +47 -0
  57. data/lib/fairy/client/top_n_into_roma.rb +34 -0
  58. data/lib/fairy/client/wc.rb +92 -0
  59. data/lib/fairy/controller.rb +1103 -0
  60. data/lib/fairy/logger.rb +107 -0
  61. data/lib/fairy/master/addins.rb +20 -0
  62. data/lib/fairy/master/atom.rb +17 -0
  63. data/lib/fairy/master/c-barrier.rb +283 -0
  64. data/lib/fairy/master/c-basic-group-by.rb +250 -0
  65. data/lib/fairy/master/c-cat.rb +159 -0
  66. data/lib/fairy/master/c-direct-product.rb +203 -0
  67. data/lib/fairy/master/c-exec.rb +68 -0
  68. data/lib/fairy/master/c-filter.rb +422 -0
  69. data/lib/fairy/master/c-find.rb +138 -0
  70. data/lib/fairy/master/c-group-by.rb +64 -0
  71. data/lib/fairy/master/c-here.rb +80 -0
  72. data/lib/fairy/master/c-inject.rb +119 -0
  73. data/lib/fairy/master/c-input-file.rb +46 -0
  74. data/lib/fairy/master/c-input-iota.rb +66 -0
  75. data/lib/fairy/master/c-input-local-file.rb +117 -0
  76. data/lib/fairy/master/c-input-varray.rb +53 -0
  77. data/lib/fairy/master/c-input.rb +24 -0
  78. data/lib/fairy/master/c-inputtable.rb +31 -0
  79. data/lib/fairy/master/c-inputtable18.rb +36 -0
  80. data/lib/fairy/master/c-inputtable19.rb +35 -0
  81. data/lib/fairy/master/c-io-filter.rb +28 -0
  82. data/lib/fairy/master/c-junction.rb +54 -0
  83. data/lib/fairy/master/c-map.rb +27 -0
  84. data/lib/fairy/master/c-merge-group-by.rb +241 -0
  85. data/lib/fairy/master/c-output-file.rb +84 -0
  86. data/lib/fairy/master/c-output-local-file.rb +19 -0
  87. data/lib/fairy/master/c-output-null.rb +45 -0
  88. data/lib/fairy/master/c-output-varray.rb +57 -0
  89. data/lib/fairy/master/c-output.rb +20 -0
  90. data/lib/fairy/master/c-seg-join.rb +141 -0
  91. data/lib/fairy/master/c-seg-map.rb +26 -0
  92. data/lib/fairy/master/c-seg-shuffle.rb +87 -0
  93. data/lib/fairy/master/c-seg-split.rb +110 -0
  94. data/lib/fairy/master/c-seg-zip.rb +132 -0
  95. data/lib/fairy/master/c-select.rb +27 -0
  96. data/lib/fairy/master/c-sort.rb +108 -0
  97. data/lib/fairy/master/c-there.rb +57 -0
  98. data/lib/fairy/master/c-wc.rb +232 -0
  99. data/lib/fairy/master/job-interpriter.rb +19 -0
  100. data/lib/fairy/master/scheduler.rb +24 -0
  101. data/lib/fairy/master.rb +329 -0
  102. data/lib/fairy/node/addins.rb +19 -0
  103. data/lib/fairy/node/p-barrier.rb +95 -0
  104. data/lib/fairy/node/p-basic-group-by.rb +252 -0
  105. data/lib/fairy/node/p-direct-product.rb +153 -0
  106. data/lib/fairy/node/p-exec.rb +30 -0
  107. data/lib/fairy/node/p-filter.rb +363 -0
  108. data/lib/fairy/node/p-find.rb +111 -0
  109. data/lib/fairy/node/p-group-by.rb +1534 -0
  110. data/lib/fairy/node/p-here.rb +21 -0
  111. data/lib/fairy/node/p-identity.rb +24 -0
  112. data/lib/fairy/node/p-inject.rb +127 -0
  113. data/lib/fairy/node/p-input-file.rb +108 -0
  114. data/lib/fairy/node/p-input-iota.rb +39 -0
  115. data/lib/fairy/node/p-input-local-file.rb +61 -0
  116. data/lib/fairy/node/p-input-varray.rb +26 -0
  117. data/lib/fairy/node/p-io-filter.rb +28 -0
  118. data/lib/fairy/node/p-map.rb +40 -0
  119. data/lib/fairy/node/p-merger-group-by.rb +48 -0
  120. data/lib/fairy/node/p-output-file.rb +104 -0
  121. data/lib/fairy/node/p-output-local-file.rb +14 -0
  122. data/lib/fairy/node/p-output-null.rb +32 -0
  123. data/lib/fairy/node/p-output-varray.rb +41 -0
  124. data/lib/fairy/node/p-seg-join.rb +82 -0
  125. data/lib/fairy/node/p-seg-map.rb +34 -0
  126. data/lib/fairy/node/p-seg-split.rb +61 -0
  127. data/lib/fairy/node/p-seg-zip.rb +79 -0
  128. data/lib/fairy/node/p-select.rb +40 -0
  129. data/lib/fairy/node/p-single-exportable.rb +90 -0
  130. data/lib/fairy/node/p-sort.rb +195 -0
  131. data/lib/fairy/node/p-task.rb +113 -0
  132. data/lib/fairy/node/p-there.rb +44 -0
  133. data/lib/fairy/node/p-wc.rb +266 -0
  134. data/lib/fairy/node.rb +187 -0
  135. data/lib/fairy/processor.rb +510 -0
  136. data/lib/fairy/share/base-app.rb +114 -0
  137. data/lib/fairy/share/block-source.rb +234 -0
  138. data/lib/fairy/share/conf.rb +396 -0
  139. data/lib/fairy/share/debug.rb +21 -0
  140. data/lib/fairy/share/encoding.rb +17 -0
  141. data/lib/fairy/share/fast-tempfile.rb +93 -0
  142. data/lib/fairy/share/file-place.rb +176 -0
  143. data/lib/fairy/share/hash-1.rb +20 -0
  144. data/lib/fairy/share/hash-md5.rb +28 -0
  145. data/lib/fairy/share/hash-murmur.rb +69 -0
  146. data/lib/fairy/share/hash-rb18.rb +20 -0
  147. data/lib/fairy/share/hash-simple-hash.rb +28 -0
  148. data/lib/fairy/share/inspector.rb +16 -0
  149. data/lib/fairy/share/lc/exceptions.rb +82 -0
  150. data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
  151. data/lib/fairy/share/locale.rb +17 -0
  152. data/lib/fairy/share/log.rb +215 -0
  153. data/lib/fairy/share/pool-dictionary.rb +53 -0
  154. data/lib/fairy/share/port-marshaled-queue.rb +347 -0
  155. data/lib/fairy/share/port.rb +1697 -0
  156. data/lib/fairy/share/reference.rb +45 -0
  157. data/lib/fairy/share/stdout.rb +56 -0
  158. data/lib/fairy/share/tr.rb +16 -0
  159. data/lib/fairy/share/varray.rb +147 -0
  160. data/lib/fairy/share/vfile.rb +183 -0
  161. data/lib/fairy/version.rb +8 -0
  162. data/lib/fairy.rb +206 -0
  163. data/sample/grep.rb +46 -0
  164. data/sample/ping.rb +19 -0
  165. data/sample/sort.rb +102 -0
  166. data/sample/wordcount.rb +61 -0
  167. data/spec/README +12 -0
  168. data/spec/fairy1_spec.rb +31 -0
  169. data/spec/fairy2_spec.rb +42 -0
  170. data/spec/fairy3_spec.rb +126 -0
  171. data/spec/fairy4_spec.rb +63 -0
  172. data/spec/fairy5_spec.rb +45 -0
  173. data/spec/fairy6_spec.rb +52 -0
  174. data/spec/fairy7_spec.rb +58 -0
  175. data/spec/fairy8_spec.rb +48 -0
  176. data/spec/mkdat.rb +148 -0
  177. data/spec/run_all.sh +65 -0
  178. data/test/testc.rb +7111 -0
  179. data/tools/cap_recipe/Capfile +144 -0
  180. data/tools/cap_recipe/cluster.yml.sample +14 -0
  181. data/tools/fairy_perf_graph.rb +444 -0
  182. data/tools/git-tag +44 -0
  183. data/tools/log-analysis.rb +62 -0
  184. data/tools/svn-ls-diff +38 -0
  185. data/tools/svn-tags +37 -0
  186. metadata +298 -0
@@ -0,0 +1,61 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class SegJoin<IOFilter
10
+
11
+ module Interface
12
+ # jpb.seg_join(opts,...,filter,...,block_source, opts,...)
13
+ def seg_join(*others)
14
+ block_source = nil
15
+ if others.last.kind_of?(String)
16
+ block_source = others.pop
17
+ elsif others.last.kind_of?(Hash) and others[-2].kind_of?(String)
18
+ block_source = others.delete_at(-2)
19
+ end
20
+ others, opts = others.partition{|e| e.kind_of?(Filter)}
21
+ if opts.last.kind_of?(Hash)
22
+ h = opts.pop
23
+ else
24
+ h = {}
25
+ end
26
+ opts.each{|e| h[e] = true}
27
+
28
+ pres = others.collect{|o|
29
+ p = PreJoinedFilter.new(@fairy, h)
30
+ p.input = o
31
+ p
32
+ }
33
+
34
+ block_source = BlockSource.new(block_source)
35
+ join = SegJoin.new(@fairy, h, pres, block_source)
36
+ join.input = self
37
+ join
38
+ end
39
+ end
40
+ Fairy::def_filter_interface Interface
41
+
42
+ def initialize(fairy, opts, others, block_source)
43
+ super(fairy, opts, others.collect{|o| o.backend}, block_source)
44
+ @others = others
45
+ @block_source
46
+ @opts = opts
47
+ end
48
+
49
+ def backend_class_name
50
+ "CSegJoin"
51
+ end
52
+
53
+ class PreJoinedFilter<IOFilter
54
+ def backend_class_name
55
+ "CSegJoin::CPreSegJoinFilter"
56
+ end
57
+ end
58
+
59
+ end
60
+ end
61
+
@@ -0,0 +1,78 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+
10
+ class SegMap<IOFilter
11
+ module Interface
12
+
13
+ def smap(block_source, opts = nil)
14
+ raise "No compatibility after fairy-0.5"
15
+ end
16
+
17
+ def smap2(block_source, opts = nil)
18
+ raise "No compatibility after fairy-0.6"
19
+ end
20
+
21
+ def seg_map(block_source, opts = nil)
22
+ ERR::Raise ERR::CantAcceptBlock if block_given?
23
+ block_source = BlockSource.new(block_source)
24
+ mapper = SegMap.new(@fairy, opts, block_source)
25
+ mapper.input=self
26
+ mapper
27
+ end
28
+
29
+ # emap(%{|input| input.collect{..})
30
+ def emap(block_source, opts = nil)
31
+ ERR::Raise ERR::CantAcceptBlock if block_given?
32
+ map_source = %{|i, block| proc{#{block_source}}.call(i).each{|e| block.call e}}
33
+ seg_map(map_source, opts)
34
+ end
35
+
36
+ def map_flatten(block_source, opts = nil)
37
+ ERR::Raise ERR::CantAcceptBlock if block_given?
38
+ map_source = %{|i, block|
39
+ i.each do |e|
40
+ enum = proc{#{block_source}}.call(e)
41
+ enum.each do |f|
42
+ #{n = opts && opts[:N]; n ||= 1
43
+ case n
44
+ when 1
45
+ "block.call f"
46
+ when 2
47
+ "if f.respond_to?(:each)
48
+ f.each{|g| block.call(g)}
49
+ else
50
+ block.call f
51
+ end"
52
+ else
53
+ "if f.respond_to?(:flatten)
54
+ f.flatten(#{opts[:N]} - 2).each{|g| block.call(g)}
55
+ else
56
+ block.call f
57
+ end"
58
+ end}
59
+ end
60
+ end
61
+ }
62
+ seg_map(map_source, opts)
63
+ end
64
+ alias mapf map_flatten
65
+
66
+ end
67
+ Fairy::def_filter_interface Interface
68
+
69
+ def initialize(fairy, opts, block_source)
70
+ super
71
+ @block_source = block_source
72
+ end
73
+
74
+ def backend_class_name
75
+ "CSegMap"
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class SegShuffle<IOFilter
10
+ module Interface
11
+ def seg_shuffle(block_source, opts = nil)
12
+ block_source = BlockSource.new(block_source)
13
+ shuffle = SegShuffle.new(@fairy, opts, block_source)
14
+ shuffle.input = self
15
+ shuffle
16
+ end
17
+
18
+ def seg_eshuffle(block_source, opts = nil)
19
+ map_source = %{|i, o| proc{#{block_source}}.call(i).each{|e| o.push e}}
20
+ seg_shuffle(map_source, opts)
21
+ end
22
+ end
23
+ Fairy::def_filter_interface Interface
24
+
25
+ def initialize(fairy, opts, block_source)
26
+ super
27
+ @block_source = block_source
28
+ @opts = opts
29
+ end
30
+
31
+ def backend_class_name
32
+ "CSegShuffle"
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+
3
+ require "fairy/client/io-filter"
4
+
5
+ module Fairy
6
+ class SegSplit<IOFilter
7
+ module Interface
8
+ def seg_split(n, opts=nil)
9
+ splitter = SegSplit.new(@fairy, opts, n)
10
+ splitter.input = self
11
+ splitter
12
+ end
13
+ end
14
+ Fairy::def_filter_interface Interface
15
+
16
+
17
+ def initialize(fairy, opts, n)
18
+ super
19
+ @no_split = n
20
+ @opts = opts
21
+ end
22
+
23
+ def backend_class_name
24
+ "CSegSplit"
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,60 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class SegZip<IOFilter
10
+
11
+ module Interface
12
+ # jpb.seg_zip(opts,...,filter,...,block_source, opts,...)
13
+ def seg_zip(*others)
14
+ block_source = nil
15
+ if others.last.kind_of?(String)
16
+ block_source = others.pop
17
+ end
18
+ others, opts = others.partition{|e| e.kind_of?(Filter)}
19
+ if opts.last.kind_of?(Hash)
20
+ h = opts.pop
21
+ else
22
+ h = {}
23
+ end
24
+ opts.each{|e| h[e] = true}
25
+
26
+ pres = others.collect{|o|
27
+ p = PreSegZipFilter.new(@fairy, h)
28
+ p.input = o
29
+ p
30
+ }
31
+
32
+ block_source = BlockSource.new(block_source)
33
+ zip = SegZip.new(@fairy, h, pres, block_source)
34
+ zip.input = self
35
+ zip
36
+ end
37
+ end
38
+ Fairy::def_filter_interface Interface
39
+
40
+ # ZIP_BY_SEGMENT = :ZIP_BY_SEGMENT
41
+
42
+ def initialize(fairy, opts, others, block_source)
43
+ super(fairy, opts, others.collect{|o| o.backend}, block_source)
44
+ @others = others
45
+ @block_source
46
+ @opts = opts
47
+ end
48
+
49
+ def backend_class_name
50
+ "CSegZip"
51
+ end
52
+
53
+ class PreSegZipFilter<IOFilter
54
+ def backend_class_name
55
+ "CSegZip::CPreSegZipFilter"
56
+ end
57
+ end
58
+ end
59
+ end
60
+
@@ -0,0 +1,38 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+
10
+ class Select<IOFilter
11
+ module Interface
12
+ def select(block_source, opts = nil)
13
+ ERR::Raise ERR::CantAcceptBlock if block_given?
14
+ block_source = BlockSource.new(block_source)
15
+ mapper = Select.new(@fairy, opts, block_source)
16
+ mapper.input=self
17
+ mapper
18
+ end
19
+
20
+ alias find_all select
21
+
22
+ def grep(regexp, opts = nil)
23
+ select(%{|e| /#{regexp.source}/ === e}, opts)
24
+ end
25
+ end
26
+ Fairy::def_filter_interface Interface
27
+
28
+ def initialize(fairy, opts, block_source)
29
+ super
30
+ @block_source = block_source
31
+ end
32
+
33
+ def backend_class_name
34
+ "CSelect"
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,48 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/basic-group-by"
7
+
8
+ module Fairy
9
+
10
+ module Sort
11
+
12
+ module Interface
13
+ def sort_by(cmp_proc, opts=nil)
14
+ cmp_proc = BlockSource.new(cmp_proc)
15
+ pre_sort = Sort::PreSort.new(@fairy, opts, cmp_proc)
16
+ pre_sort.input = self
17
+ post_sort = Sort::PostSort.new(@fairy, opts, cmp_proc)
18
+ post_sort.input = pre_sort
19
+ post_sort
20
+ end
21
+ end
22
+ Fairy::def_filter_interface Interface
23
+
24
+ class PreSort<IOFilter
25
+ def initialize(fairy, opts, block_source)
26
+ super
27
+ @block_source = block_source
28
+ end
29
+
30
+ def backend_class_name
31
+ "CSort::CPreSort"
32
+ end
33
+ end
34
+
35
+ class PostSort<IOFilter
36
+ def initialize(fairy, opts, block_source)
37
+ super
38
+ @block_source = block_source
39
+ end
40
+
41
+ def backend_class_name
42
+ "CSort::CPostSort"
43
+ end
44
+ end
45
+
46
+ end
47
+ end
48
+
@@ -0,0 +1,56 @@
1
+ # encoding: UTF-8
2
+
3
+ require "fairy/client/merge-group-by"
4
+
5
+ Fairy.def_filter(:sort_by_with_va) do |fairy, input, block_source, *rests|
6
+
7
+ opts = {}
8
+ if !rests.empty?
9
+ opts = rests.last
10
+ end
11
+
12
+ sampling_ratio_1_to = opts[:sampling_ratio]
13
+ sampling_ratio_1_to ||= Fairy::CONF.SORT_SAMPLING_RATIO_1_TO
14
+ pvn = opts[:pvn]
15
+ pvn ||= Fairy::CONF.SORT_NO_SEGMENT
16
+
17
+ va = input.emap(%{|i|
18
+ sort_proc = proc{#{block_source}}
19
+ i.to_a.collect{|e| [sort_proc.call(e), e]}.sort_by{|e| e.first}}).to_va
20
+
21
+ if va.size/sampling_ratio_1_to < Fairy::CONF.SORT_SAMPLING_MIN
22
+ sampling_ratio_1_to = Fairy::CONF.SORT_SAMPLING_MIN.div(va.size)
23
+ end
24
+ if va.size/sampling_ratio_1_to > Fairy::CONF.SORT_SAMPLING_MAX
25
+ sampling_ratio_1_to = Fairy::CONF.SORT_SAMPLING_MAX.div(va.size)
26
+ end
27
+
28
+ Fairy::Log::debug(self, "SAMPLING: RATIO: 1/#{sampling_ratio_1_to}")
29
+ sample = fairy.input(va).select(%{|e| (i += 1) % #{sampling_ratio_1_to} == 0},
30
+ :BEGIN=>%{i = 0}).here.sort_by{|e| e.first}.map{|e| e.first}
31
+
32
+ idxes = (1...pvn).collect{|i| (sample.size*i).div(pvn)}
33
+ idxes.push -1
34
+ pvs = sample.values_at(*idxes)
35
+ Fairy::Log::debug(self, "PVS: #{pvs.inspect}")
36
+ fairy.def_pool_variable(:pvs, pvs)
37
+
38
+ div = fairy.input(va).merge_group_by(%{|e|
39
+ key = @Pool.pvs.find{|pv| e.first <= pv}
40
+ key ? key : @Pool.pvs.last},
41
+ :postqueuing_policy => {:queuing_class => :OnMemoryQueue})
42
+
43
+ msort = div.seg_map(%{|i, o|
44
+ buf = i.map{|st| [st, st.pop.dc_deep_copy]}.select{|st, v|!v.nil?}.sort_by{|st, v| v.first}
45
+ while st_min = buf.shift
46
+ st, min = st_min
47
+ o.push min.last
48
+ next unless v = st.pop.dc_deep_copy # 取りあえずの対応
49
+ idx = buf.rindex{|st0, v0| v0.first <= v.first}
50
+ idx ? buf.insert(idx+1, [st, v]) : buf.unshift([st, v])
51
+ end})
52
+
53
+ shuffle = msort.seg_eshuffle(%{|i| i.sort{|s1, s2| s1.key <=> s2.key}})
54
+ # shuffle = msort.eshuffle(%{|i| i.sort_by{|s1| Log::debug(self, s1.key.inspect); s1.key}})
55
+ end
56
+
@@ -0,0 +1,61 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/merge-group-by"
7
+
8
+ Fairy.def_filter(:sort_by_with_va, :sub => true) do |fairy, input, block_source, opts = {}|
9
+
10
+ sampling_ratio_1_to = opts[:sampling_ratio]
11
+ sampling_ratio_1_to ||= Fairy::CONF.SORT_SAMPLING_RATIO_1_TO
12
+ pvn = opts[:pvn]
13
+ pvn ||= Fairy::CONF.SORT_NO_SEGMENT
14
+
15
+ va = input.emap(%{|i|
16
+ sort_proc = proc{#{block_source}}
17
+ i.to_a.collect{|e| [sort_proc.call(e), e]}.sort_by{|e| e.first}}).to_va
18
+
19
+ if va.size/sampling_ratio_1_to < Fairy::CONF.SORT_SAMPLING_MIN
20
+ #sampling_ratio_1_to = Fairy::CONF.SORT_SAMPLING_MIN.div(va.size)
21
+ sampling_ratio_1_to = va.size.div(Fairy::CONF.SORT_SAMPLING_MIN)
22
+ sampling_ratio_1_to = 1 if sampling_ratio_1_to.zero?
23
+ end
24
+ if va.size/sampling_ratio_1_to > Fairy::CONF.SORT_SAMPLING_MAX
25
+ sampling_ratio_1_to = va.size.div(Fairy::CONF.SORT_SAMPLING_MAX)
26
+ end
27
+
28
+ Fairy::Log::debug(self, "SAMPLING: RATIO: 1/#{sampling_ratio_1_to}")
29
+ Fairy::Log::debug(self, "SAMPLING: VA SIZE: #{va.size}")
30
+ sample = fairy.input(va).select(%{|e| (i += 1) % #{sampling_ratio_1_to} == 0},
31
+ :BEGIN=>%{i = 0}).here.sort_by{|e| e.first}.map{|e| e.first}
32
+
33
+ Fairy::Log::debugf(self, "SAMPLING: SAMPLE: %s", sample.inspect)
34
+
35
+ idxes = (1...pvn).collect{|i| (sample.size*i).div(pvn)}
36
+ idxes.push -1
37
+ pvs = sample.values_at(*idxes)
38
+ Fairy::Log::debug(self, "PVS: #{pvs.inspect}")
39
+ fairy.def_pool_variable(:pvs, pvs)
40
+
41
+ div = fairy.input(va).merge_group_by(%{|e|
42
+ key = @Pool.pvs.find{|pv| e.first <= pv}
43
+ key ? key : @Pool.pvs.last},
44
+ :postqueuing_policy => {:queuing_class => :PoolQueue}
45
+
46
+ )
47
+
48
+ msort = div.seg_map(%{|i, block|
49
+ buf = i.map{|st| [st, st.pop.dc_deep_copy]}.select{|st, v|!v.nil?}.sort_by{|st, v| v.first}
50
+ while st_min = buf.shift
51
+ st, min = st_min
52
+ block.call min.last
53
+ next unless v = st.pop.dc_deep_copy # 取りあえずの対応
54
+ idx = buf.rindex{|st0, v0| v0.first <= v.first}
55
+ idx ? buf.insert(idx+1, [st, v]) : buf.unshift([st, v])
56
+ end})
57
+
58
+ shuffle = msort.seg_eshuffle(%{|i| i.sort{|s1, s2| s1.key <=> s2.key}})
59
+ # shuffle = msort.eshuffle(%{|i| i.sort_by{|s1| Log::debug(self, s1.key.inspect); s1.key}})
60
+ end
61
+