fairy 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. data/LICENSE +674 -0
  2. data/Makefile +116 -0
  3. data/README +15 -0
  4. data/bin/fairy +582 -0
  5. data/bin/fairy-cat +74 -0
  6. data/bin/fairy-cp +128 -0
  7. data/bin/fairy-rm +122 -0
  8. data/bin/subcmd/controller +41 -0
  9. data/bin/subcmd/inspector +81 -0
  10. data/bin/subcmd/master +43 -0
  11. data/bin/subcmd/node +47 -0
  12. data/bin/subcmd/processor +54 -0
  13. data/doc/programming-interface.html +240 -0
  14. data/doc/programming-interface.rd +300 -0
  15. data/etc/fairy.conf.tmpl +118 -0
  16. data/ext/simple_hash/extconf.rb +4 -0
  17. data/ext/simple_hash/simple_hash.c +42 -0
  18. data/fairy.gemspec +60 -0
  19. data/lib/fairy/client/addins.rb +20 -0
  20. data/lib/fairy/client/barrier.rb +29 -0
  21. data/lib/fairy/client/basic-group-by.rb +52 -0
  22. data/lib/fairy/client/cat.rb +41 -0
  23. data/lib/fairy/client/direct-product.rb +51 -0
  24. data/lib/fairy/client/equijoin.rb +79 -0
  25. data/lib/fairy/client/exec.rb +54 -0
  26. data/lib/fairy/client/filter.rb +62 -0
  27. data/lib/fairy/client/find.rb +35 -0
  28. data/lib/fairy/client/group-by.rb +194 -0
  29. data/lib/fairy/client/here.rb +84 -0
  30. data/lib/fairy/client/inject.rb +70 -0
  31. data/lib/fairy/client/input-file.rb +53 -0
  32. data/lib/fairy/client/input-iota.rb +49 -0
  33. data/lib/fairy/client/input-local-file.rb +188 -0
  34. data/lib/fairy/client/input-varray.rb +30 -0
  35. data/lib/fairy/client/input.rb +42 -0
  36. data/lib/fairy/client/io-filter.rb +26 -0
  37. data/lib/fairy/client/junction.rb +31 -0
  38. data/lib/fairy/client/map.rb +34 -0
  39. data/lib/fairy/client/merge-group-by.rb +71 -0
  40. data/lib/fairy/client/output-file.rb +64 -0
  41. data/lib/fairy/client/output-local-file.rb +60 -0
  42. data/lib/fairy/client/output-null.rb +47 -0
  43. data/lib/fairy/client/output-varray.rb +50 -0
  44. data/lib/fairy/client/output.rb +29 -0
  45. data/lib/fairy/client/roma-put.rb +62 -0
  46. data/lib/fairy/client/roma.rb +156 -0
  47. data/lib/fairy/client/seg-join.rb +61 -0
  48. data/lib/fairy/client/seg-map.rb +78 -0
  49. data/lib/fairy/client/seg-shuffle.rb +35 -0
  50. data/lib/fairy/client/seg-split.rb +27 -0
  51. data/lib/fairy/client/seg-zip.rb +60 -0
  52. data/lib/fairy/client/select.rb +38 -0
  53. data/lib/fairy/client/sort.rb +48 -0
  54. data/lib/fairy/client/sort18.rb +56 -0
  55. data/lib/fairy/client/sort19.rb +61 -0
  56. data/lib/fairy/client/there.rb +47 -0
  57. data/lib/fairy/client/top_n_into_roma.rb +34 -0
  58. data/lib/fairy/client/wc.rb +92 -0
  59. data/lib/fairy/controller.rb +1103 -0
  60. data/lib/fairy/logger.rb +107 -0
  61. data/lib/fairy/master/addins.rb +20 -0
  62. data/lib/fairy/master/atom.rb +17 -0
  63. data/lib/fairy/master/c-barrier.rb +283 -0
  64. data/lib/fairy/master/c-basic-group-by.rb +250 -0
  65. data/lib/fairy/master/c-cat.rb +159 -0
  66. data/lib/fairy/master/c-direct-product.rb +203 -0
  67. data/lib/fairy/master/c-exec.rb +68 -0
  68. data/lib/fairy/master/c-filter.rb +422 -0
  69. data/lib/fairy/master/c-find.rb +138 -0
  70. data/lib/fairy/master/c-group-by.rb +64 -0
  71. data/lib/fairy/master/c-here.rb +80 -0
  72. data/lib/fairy/master/c-inject.rb +119 -0
  73. data/lib/fairy/master/c-input-file.rb +46 -0
  74. data/lib/fairy/master/c-input-iota.rb +66 -0
  75. data/lib/fairy/master/c-input-local-file.rb +117 -0
  76. data/lib/fairy/master/c-input-varray.rb +53 -0
  77. data/lib/fairy/master/c-input.rb +24 -0
  78. data/lib/fairy/master/c-inputtable.rb +31 -0
  79. data/lib/fairy/master/c-inputtable18.rb +36 -0
  80. data/lib/fairy/master/c-inputtable19.rb +35 -0
  81. data/lib/fairy/master/c-io-filter.rb +28 -0
  82. data/lib/fairy/master/c-junction.rb +54 -0
  83. data/lib/fairy/master/c-map.rb +27 -0
  84. data/lib/fairy/master/c-merge-group-by.rb +241 -0
  85. data/lib/fairy/master/c-output-file.rb +84 -0
  86. data/lib/fairy/master/c-output-local-file.rb +19 -0
  87. data/lib/fairy/master/c-output-null.rb +45 -0
  88. data/lib/fairy/master/c-output-varray.rb +57 -0
  89. data/lib/fairy/master/c-output.rb +20 -0
  90. data/lib/fairy/master/c-seg-join.rb +141 -0
  91. data/lib/fairy/master/c-seg-map.rb +26 -0
  92. data/lib/fairy/master/c-seg-shuffle.rb +87 -0
  93. data/lib/fairy/master/c-seg-split.rb +110 -0
  94. data/lib/fairy/master/c-seg-zip.rb +132 -0
  95. data/lib/fairy/master/c-select.rb +27 -0
  96. data/lib/fairy/master/c-sort.rb +108 -0
  97. data/lib/fairy/master/c-there.rb +57 -0
  98. data/lib/fairy/master/c-wc.rb +232 -0
  99. data/lib/fairy/master/job-interpriter.rb +19 -0
  100. data/lib/fairy/master/scheduler.rb +24 -0
  101. data/lib/fairy/master.rb +329 -0
  102. data/lib/fairy/node/addins.rb +19 -0
  103. data/lib/fairy/node/p-barrier.rb +95 -0
  104. data/lib/fairy/node/p-basic-group-by.rb +252 -0
  105. data/lib/fairy/node/p-direct-product.rb +153 -0
  106. data/lib/fairy/node/p-exec.rb +30 -0
  107. data/lib/fairy/node/p-filter.rb +363 -0
  108. data/lib/fairy/node/p-find.rb +111 -0
  109. data/lib/fairy/node/p-group-by.rb +1534 -0
  110. data/lib/fairy/node/p-here.rb +21 -0
  111. data/lib/fairy/node/p-identity.rb +24 -0
  112. data/lib/fairy/node/p-inject.rb +127 -0
  113. data/lib/fairy/node/p-input-file.rb +108 -0
  114. data/lib/fairy/node/p-input-iota.rb +39 -0
  115. data/lib/fairy/node/p-input-local-file.rb +61 -0
  116. data/lib/fairy/node/p-input-varray.rb +26 -0
  117. data/lib/fairy/node/p-io-filter.rb +28 -0
  118. data/lib/fairy/node/p-map.rb +40 -0
  119. data/lib/fairy/node/p-merger-group-by.rb +48 -0
  120. data/lib/fairy/node/p-output-file.rb +104 -0
  121. data/lib/fairy/node/p-output-local-file.rb +14 -0
  122. data/lib/fairy/node/p-output-null.rb +32 -0
  123. data/lib/fairy/node/p-output-varray.rb +41 -0
  124. data/lib/fairy/node/p-seg-join.rb +82 -0
  125. data/lib/fairy/node/p-seg-map.rb +34 -0
  126. data/lib/fairy/node/p-seg-split.rb +61 -0
  127. data/lib/fairy/node/p-seg-zip.rb +79 -0
  128. data/lib/fairy/node/p-select.rb +40 -0
  129. data/lib/fairy/node/p-single-exportable.rb +90 -0
  130. data/lib/fairy/node/p-sort.rb +195 -0
  131. data/lib/fairy/node/p-task.rb +113 -0
  132. data/lib/fairy/node/p-there.rb +44 -0
  133. data/lib/fairy/node/p-wc.rb +266 -0
  134. data/lib/fairy/node.rb +187 -0
  135. data/lib/fairy/processor.rb +510 -0
  136. data/lib/fairy/share/base-app.rb +114 -0
  137. data/lib/fairy/share/block-source.rb +234 -0
  138. data/lib/fairy/share/conf.rb +396 -0
  139. data/lib/fairy/share/debug.rb +21 -0
  140. data/lib/fairy/share/encoding.rb +17 -0
  141. data/lib/fairy/share/fast-tempfile.rb +93 -0
  142. data/lib/fairy/share/file-place.rb +176 -0
  143. data/lib/fairy/share/hash-1.rb +20 -0
  144. data/lib/fairy/share/hash-md5.rb +28 -0
  145. data/lib/fairy/share/hash-murmur.rb +69 -0
  146. data/lib/fairy/share/hash-rb18.rb +20 -0
  147. data/lib/fairy/share/hash-simple-hash.rb +28 -0
  148. data/lib/fairy/share/inspector.rb +16 -0
  149. data/lib/fairy/share/lc/exceptions.rb +82 -0
  150. data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
  151. data/lib/fairy/share/locale.rb +17 -0
  152. data/lib/fairy/share/log.rb +215 -0
  153. data/lib/fairy/share/pool-dictionary.rb +53 -0
  154. data/lib/fairy/share/port-marshaled-queue.rb +347 -0
  155. data/lib/fairy/share/port.rb +1697 -0
  156. data/lib/fairy/share/reference.rb +45 -0
  157. data/lib/fairy/share/stdout.rb +56 -0
  158. data/lib/fairy/share/tr.rb +16 -0
  159. data/lib/fairy/share/varray.rb +147 -0
  160. data/lib/fairy/share/vfile.rb +183 -0
  161. data/lib/fairy/version.rb +8 -0
  162. data/lib/fairy.rb +206 -0
  163. data/sample/grep.rb +46 -0
  164. data/sample/ping.rb +19 -0
  165. data/sample/sort.rb +102 -0
  166. data/sample/wordcount.rb +61 -0
  167. data/spec/README +12 -0
  168. data/spec/fairy1_spec.rb +31 -0
  169. data/spec/fairy2_spec.rb +42 -0
  170. data/spec/fairy3_spec.rb +126 -0
  171. data/spec/fairy4_spec.rb +63 -0
  172. data/spec/fairy5_spec.rb +45 -0
  173. data/spec/fairy6_spec.rb +52 -0
  174. data/spec/fairy7_spec.rb +58 -0
  175. data/spec/fairy8_spec.rb +48 -0
  176. data/spec/mkdat.rb +148 -0
  177. data/spec/run_all.sh +65 -0
  178. data/test/testc.rb +7111 -0
  179. data/tools/cap_recipe/Capfile +144 -0
  180. data/tools/cap_recipe/cluster.yml.sample +14 -0
  181. data/tools/fairy_perf_graph.rb +444 -0
  182. data/tools/git-tag +44 -0
  183. data/tools/log-analysis.rb +62 -0
  184. data/tools/svn-ls-diff +38 -0
  185. data/tools/svn-tags +37 -0
  186. metadata +298 -0
@@ -0,0 +1,61 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class SegJoin<IOFilter
10
+
11
+ module Interface
12
+ # jpb.seg_join(opts,...,filter,...,block_source, opts,...)
13
+ def seg_join(*others)
14
+ block_source = nil
15
+ if others.last.kind_of?(String)
16
+ block_source = others.pop
17
+ elsif others.last.kind_of?(Hash) and others[-2].kind_of?(String)
18
+ block_source = others.delete_at(-2)
19
+ end
20
+ others, opts = others.partition{|e| e.kind_of?(Filter)}
21
+ if opts.last.kind_of?(Hash)
22
+ h = opts.pop
23
+ else
24
+ h = {}
25
+ end
26
+ opts.each{|e| h[e] = true}
27
+
28
+ pres = others.collect{|o|
29
+ p = PreJoinedFilter.new(@fairy, h)
30
+ p.input = o
31
+ p
32
+ }
33
+
34
+ block_source = BlockSource.new(block_source)
35
+ join = SegJoin.new(@fairy, h, pres, block_source)
36
+ join.input = self
37
+ join
38
+ end
39
+ end
40
+ Fairy::def_filter_interface Interface
41
+
42
+ def initialize(fairy, opts, others, block_source)
43
+ super(fairy, opts, others.collect{|o| o.backend}, block_source)
44
+ @others = others
45
+ @block_source
46
+ @opts = opts
47
+ end
48
+
49
+ def backend_class_name
50
+ "CSegJoin"
51
+ end
52
+
53
+ class PreJoinedFilter<IOFilter
54
+ def backend_class_name
55
+ "CSegJoin::CPreSegJoinFilter"
56
+ end
57
+ end
58
+
59
+ end
60
+ end
61
+
@@ -0,0 +1,78 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+
10
+ class SegMap<IOFilter
11
+ module Interface
12
+
13
+ def smap(block_source, opts = nil)
14
+ raise "No compatibility after fairy-0.5"
15
+ end
16
+
17
+ def smap2(block_source, opts = nil)
18
+ raise "No compatibility after fairy-0.6"
19
+ end
20
+
21
+ def seg_map(block_source, opts = nil)
22
+ ERR::Raise ERR::CantAcceptBlock if block_given?
23
+ block_source = BlockSource.new(block_source)
24
+ mapper = SegMap.new(@fairy, opts, block_source)
25
+ mapper.input=self
26
+ mapper
27
+ end
28
+
29
+ # emap(%{|input| input.collect{..})
30
+ def emap(block_source, opts = nil)
31
+ ERR::Raise ERR::CantAcceptBlock if block_given?
32
+ map_source = %{|i, block| proc{#{block_source}}.call(i).each{|e| block.call e}}
33
+ seg_map(map_source, opts)
34
+ end
35
+
36
+ def map_flatten(block_source, opts = nil)
37
+ ERR::Raise ERR::CantAcceptBlock if block_given?
38
+ map_source = %{|i, block|
39
+ i.each do |e|
40
+ enum = proc{#{block_source}}.call(e)
41
+ enum.each do |f|
42
+ #{n = opts && opts[:N]; n ||= 1
43
+ case n
44
+ when 1
45
+ "block.call f"
46
+ when 2
47
+ "if f.respond_to?(:each)
48
+ f.each{|g| block.call(g)}
49
+ else
50
+ block.call f
51
+ end"
52
+ else
53
+ "if f.respond_to?(:flatten)
54
+ f.flatten(#{opts[:N]} - 2).each{|g| block.call(g)}
55
+ else
56
+ block.call f
57
+ end"
58
+ end}
59
+ end
60
+ end
61
+ }
62
+ seg_map(map_source, opts)
63
+ end
64
+ alias mapf map_flatten
65
+
66
+ end
67
+ Fairy::def_filter_interface Interface
68
+
69
+ def initialize(fairy, opts, block_source)
70
+ super
71
+ @block_source = block_source
72
+ end
73
+
74
+ def backend_class_name
75
+ "CSegMap"
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class SegShuffle<IOFilter
10
+ module Interface
11
+ def seg_shuffle(block_source, opts = nil)
12
+ block_source = BlockSource.new(block_source)
13
+ shuffle = SegShuffle.new(@fairy, opts, block_source)
14
+ shuffle.input = self
15
+ shuffle
16
+ end
17
+
18
+ def seg_eshuffle(block_source, opts = nil)
19
+ map_source = %{|i, o| proc{#{block_source}}.call(i).each{|e| o.push e}}
20
+ seg_shuffle(map_source, opts)
21
+ end
22
+ end
23
+ Fairy::def_filter_interface Interface
24
+
25
+ def initialize(fairy, opts, block_source)
26
+ super
27
+ @block_source = block_source
28
+ @opts = opts
29
+ end
30
+
31
+ def backend_class_name
32
+ "CSegShuffle"
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+
3
+ require "fairy/client/io-filter"
4
+
5
+ module Fairy
6
+ class SegSplit<IOFilter
7
+ module Interface
8
+ def seg_split(n, opts=nil)
9
+ splitter = SegSplit.new(@fairy, opts, n)
10
+ splitter.input = self
11
+ splitter
12
+ end
13
+ end
14
+ Fairy::def_filter_interface Interface
15
+
16
+
17
+ def initialize(fairy, opts, n)
18
+ super
19
+ @no_split = n
20
+ @opts = opts
21
+ end
22
+
23
+ def backend_class_name
24
+ "CSegSplit"
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,60 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+ class SegZip<IOFilter
10
+
11
+ module Interface
12
+ # jpb.seg_zip(opts,...,filter,...,block_source, opts,...)
13
+ def seg_zip(*others)
14
+ block_source = nil
15
+ if others.last.kind_of?(String)
16
+ block_source = others.pop
17
+ end
18
+ others, opts = others.partition{|e| e.kind_of?(Filter)}
19
+ if opts.last.kind_of?(Hash)
20
+ h = opts.pop
21
+ else
22
+ h = {}
23
+ end
24
+ opts.each{|e| h[e] = true}
25
+
26
+ pres = others.collect{|o|
27
+ p = PreSegZipFilter.new(@fairy, h)
28
+ p.input = o
29
+ p
30
+ }
31
+
32
+ block_source = BlockSource.new(block_source)
33
+ zip = SegZip.new(@fairy, h, pres, block_source)
34
+ zip.input = self
35
+ zip
36
+ end
37
+ end
38
+ Fairy::def_filter_interface Interface
39
+
40
+ # ZIP_BY_SEGMENT = :ZIP_BY_SEGMENT
41
+
42
+ def initialize(fairy, opts, others, block_source)
43
+ super(fairy, opts, others.collect{|o| o.backend}, block_source)
44
+ @others = others
45
+ @block_source
46
+ @opts = opts
47
+ end
48
+
49
+ def backend_class_name
50
+ "CSegZip"
51
+ end
52
+
53
+ class PreSegZipFilter<IOFilter
54
+ def backend_class_name
55
+ "CSegZip::CPreSegZipFilter"
56
+ end
57
+ end
58
+ end
59
+ end
60
+
@@ -0,0 +1,38 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/io-filter"
7
+
8
+ module Fairy
9
+
10
+ class Select<IOFilter
11
+ module Interface
12
+ def select(block_source, opts = nil)
13
+ ERR::Raise ERR::CantAcceptBlock if block_given?
14
+ block_source = BlockSource.new(block_source)
15
+ mapper = Select.new(@fairy, opts, block_source)
16
+ mapper.input=self
17
+ mapper
18
+ end
19
+
20
+ alias find_all select
21
+
22
+ def grep(regexp, opts = nil)
23
+ select(%{|e| /#{regexp.source}/ === e}, opts)
24
+ end
25
+ end
26
+ Fairy::def_filter_interface Interface
27
+
28
+ def initialize(fairy, opts, block_source)
29
+ super
30
+ @block_source = block_source
31
+ end
32
+
33
+ def backend_class_name
34
+ "CSelect"
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,48 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/basic-group-by"
7
+
8
+ module Fairy
9
+
10
+ module Sort
11
+
12
+ module Interface
13
+ def sort_by(cmp_proc, opts=nil)
14
+ cmp_proc = BlockSource.new(cmp_proc)
15
+ pre_sort = Sort::PreSort.new(@fairy, opts, cmp_proc)
16
+ pre_sort.input = self
17
+ post_sort = Sort::PostSort.new(@fairy, opts, cmp_proc)
18
+ post_sort.input = pre_sort
19
+ post_sort
20
+ end
21
+ end
22
+ Fairy::def_filter_interface Interface
23
+
24
+ class PreSort<IOFilter
25
+ def initialize(fairy, opts, block_source)
26
+ super
27
+ @block_source = block_source
28
+ end
29
+
30
+ def backend_class_name
31
+ "CSort::CPreSort"
32
+ end
33
+ end
34
+
35
+ class PostSort<IOFilter
36
+ def initialize(fairy, opts, block_source)
37
+ super
38
+ @block_source = block_source
39
+ end
40
+
41
+ def backend_class_name
42
+ "CSort::CPostSort"
43
+ end
44
+ end
45
+
46
+ end
47
+ end
48
+
@@ -0,0 +1,56 @@
1
+ # encoding: UTF-8
2
+
3
+ require "fairy/client/merge-group-by"
4
+
5
+ Fairy.def_filter(:sort_by_with_va) do |fairy, input, block_source, *rests|
6
+
7
+ opts = {}
8
+ if !rests.empty?
9
+ opts = rests.last
10
+ end
11
+
12
+ sampling_ratio_1_to = opts[:sampling_ratio]
13
+ sampling_ratio_1_to ||= Fairy::CONF.SORT_SAMPLING_RATIO_1_TO
14
+ pvn = opts[:pvn]
15
+ pvn ||= Fairy::CONF.SORT_NO_SEGMENT
16
+
17
+ va = input.emap(%{|i|
18
+ sort_proc = proc{#{block_source}}
19
+ i.to_a.collect{|e| [sort_proc.call(e), e]}.sort_by{|e| e.first}}).to_va
20
+
21
+ if va.size/sampling_ratio_1_to < Fairy::CONF.SORT_SAMPLING_MIN
22
+ sampling_ratio_1_to = Fairy::CONF.SORT_SAMPLING_MIN.div(va.size)
23
+ end
24
+ if va.size/sampling_ratio_1_to > Fairy::CONF.SORT_SAMPLING_MAX
25
+ sampling_ratio_1_to = Fairy::CONF.SORT_SAMPLING_MAX.div(va.size)
26
+ end
27
+
28
+ Fairy::Log::debug(self, "SAMPLING: RATIO: 1/#{sampling_ratio_1_to}")
29
+ sample = fairy.input(va).select(%{|e| (i += 1) % #{sampling_ratio_1_to} == 0},
30
+ :BEGIN=>%{i = 0}).here.sort_by{|e| e.first}.map{|e| e.first}
31
+
32
+ idxes = (1...pvn).collect{|i| (sample.size*i).div(pvn)}
33
+ idxes.push -1
34
+ pvs = sample.values_at(*idxes)
35
+ Fairy::Log::debug(self, "PVS: #{pvs.inspect}")
36
+ fairy.def_pool_variable(:pvs, pvs)
37
+
38
+ div = fairy.input(va).merge_group_by(%{|e|
39
+ key = @Pool.pvs.find{|pv| e.first <= pv}
40
+ key ? key : @Pool.pvs.last},
41
+ :postqueuing_policy => {:queuing_class => :OnMemoryQueue})
42
+
43
+ msort = div.seg_map(%{|i, o|
44
+ buf = i.map{|st| [st, st.pop.dc_deep_copy]}.select{|st, v|!v.nil?}.sort_by{|st, v| v.first}
45
+ while st_min = buf.shift
46
+ st, min = st_min
47
+ o.push min.last
48
+ next unless v = st.pop.dc_deep_copy # 取りあえずの対応
49
+ idx = buf.rindex{|st0, v0| v0.first <= v.first}
50
+ idx ? buf.insert(idx+1, [st, v]) : buf.unshift([st, v])
51
+ end})
52
+
53
+ shuffle = msort.seg_eshuffle(%{|i| i.sort{|s1, s2| s1.key <=> s2.key}})
54
+ # shuffle = msort.eshuffle(%{|i| i.sort_by{|s1| Log::debug(self, s1.key.inspect); s1.key}})
55
+ end
56
+
@@ -0,0 +1,61 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "fairy/client/merge-group-by"
7
+
8
+ Fairy.def_filter(:sort_by_with_va, :sub => true) do |fairy, input, block_source, opts = {}|
9
+
10
+ sampling_ratio_1_to = opts[:sampling_ratio]
11
+ sampling_ratio_1_to ||= Fairy::CONF.SORT_SAMPLING_RATIO_1_TO
12
+ pvn = opts[:pvn]
13
+ pvn ||= Fairy::CONF.SORT_NO_SEGMENT
14
+
15
+ va = input.emap(%{|i|
16
+ sort_proc = proc{#{block_source}}
17
+ i.to_a.collect{|e| [sort_proc.call(e), e]}.sort_by{|e| e.first}}).to_va
18
+
19
+ if va.size/sampling_ratio_1_to < Fairy::CONF.SORT_SAMPLING_MIN
20
+ #sampling_ratio_1_to = Fairy::CONF.SORT_SAMPLING_MIN.div(va.size)
21
+ sampling_ratio_1_to = va.size.div(Fairy::CONF.SORT_SAMPLING_MIN)
22
+ sampling_ratio_1_to = 1 if sampling_ratio_1_to.zero?
23
+ end
24
+ if va.size/sampling_ratio_1_to > Fairy::CONF.SORT_SAMPLING_MAX
25
+ sampling_ratio_1_to = va.size.div(Fairy::CONF.SORT_SAMPLING_MAX)
26
+ end
27
+
28
+ Fairy::Log::debug(self, "SAMPLING: RATIO: 1/#{sampling_ratio_1_to}")
29
+ Fairy::Log::debug(self, "SAMPLING: VA SIZE: #{va.size}")
30
+ sample = fairy.input(va).select(%{|e| (i += 1) % #{sampling_ratio_1_to} == 0},
31
+ :BEGIN=>%{i = 0}).here.sort_by{|e| e.first}.map{|e| e.first}
32
+
33
+ Fairy::Log::debugf(self, "SAMPLING: SAMPLE: %s", sample.inspect)
34
+
35
+ idxes = (1...pvn).collect{|i| (sample.size*i).div(pvn)}
36
+ idxes.push -1
37
+ pvs = sample.values_at(*idxes)
38
+ Fairy::Log::debug(self, "PVS: #{pvs.inspect}")
39
+ fairy.def_pool_variable(:pvs, pvs)
40
+
41
+ div = fairy.input(va).merge_group_by(%{|e|
42
+ key = @Pool.pvs.find{|pv| e.first <= pv}
43
+ key ? key : @Pool.pvs.last},
44
+ :postqueuing_policy => {:queuing_class => :PoolQueue}
45
+
46
+ )
47
+
48
+ msort = div.seg_map(%{|i, block|
49
+ buf = i.map{|st| [st, st.pop.dc_deep_copy]}.select{|st, v|!v.nil?}.sort_by{|st, v| v.first}
50
+ while st_min = buf.shift
51
+ st, min = st_min
52
+ block.call min.last
53
+ next unless v = st.pop.dc_deep_copy # 取りあえずの対応
54
+ idx = buf.rindex{|st0, v0| v0.first <= v.first}
55
+ idx ? buf.insert(idx+1, [st, v]) : buf.unshift([st, v])
56
+ end})
57
+
58
+ shuffle = msort.seg_eshuffle(%{|i| i.sort{|s1, s2| s1.key <=> s2.key}})
59
+ # shuffle = msort.eshuffle(%{|i| i.sort_by{|s1| Log::debug(self, s1.key.inspect); s1.key}})
60
+ end
61
+