fairy 0.6.0 → 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/Makefile +1 -0
  2. data/bin/fairy +35 -5
  3. data/ext/extconf.rb +3 -0
  4. data/ext/fairy.c +180 -0
  5. data/ext/fairy.h +94 -0
  6. data/ext/fiber_mon.h +32 -0
  7. data/ext/fixnum-buffer.c +483 -0
  8. data/ext/p-group-by.c +529 -0
  9. data/ext/p-xgroup-by.c +467 -0
  10. data/ext/simple-hash.c +44 -0
  11. data/ext/string-buffer.c +286 -0
  12. data/ext/xmarshaled-queue.c +699 -0
  13. data/ext/xsized-queue.c +528 -0
  14. data/ext/xthread.h +65 -0
  15. data/fairy.gemspec +5 -2
  16. data/lib/fairy.rb +10 -1
  17. data/lib/fairy/client/group-by.rb +57 -2
  18. data/lib/fairy/client/here.rb +2 -1
  19. data/lib/fairy/controller.rb +25 -4
  20. data/lib/fairy/master.rb +17 -3
  21. data/lib/fairy/master/c-basic-group-by.rb +4 -2
  22. data/lib/fairy/master/c-cat.rb +3 -2
  23. data/lib/fairy/master/c-direct-product.rb +5 -3
  24. data/lib/fairy/master/c-filter.rb +5 -3
  25. data/lib/fairy/master/c-group-by.rb +13 -0
  26. data/lib/fairy/master/c-junction.rb +3 -2
  27. data/lib/fairy/master/c-seg-join.rb +3 -1
  28. data/lib/fairy/master/c-seg-shuffle.rb +3 -2
  29. data/lib/fairy/master/c-seg-split.rb +1 -1
  30. data/lib/fairy/master/c-seg-zip.rb +3 -1
  31. data/lib/fairy/master/c-sort.rb +7 -2
  32. data/lib/fairy/master/c-wc.rb +5 -3
  33. data/lib/fairy/node.rb +13 -2
  34. data/lib/fairy/node/p-barrier.rb +1 -1
  35. data/lib/fairy/node/p-basic-group-by.rb +22 -12
  36. data/lib/fairy/node/p-direct-product.rb +4 -2
  37. data/lib/fairy/node/p-filter.rb +8 -7
  38. data/lib/fairy/node/p-find.rb +2 -1
  39. data/lib/fairy/node/p-group-by.rb +17 -6
  40. data/lib/fairy/node/p-inject.rb +3 -2
  41. data/lib/fairy/node/p-output-file.rb +1 -1
  42. data/lib/fairy/node/p-seg-join.rb +2 -1
  43. data/lib/fairy/node/p-seg-zip.rb +2 -1
  44. data/lib/fairy/node/p-single-exportable.rb +3 -1
  45. data/lib/fairy/node/p-sort.rb +4 -2
  46. data/lib/fairy/node/p-task.rb +1 -1
  47. data/lib/fairy/node/p-wc.rb +5 -2
  48. data/lib/fairy/processor.rb +25 -18
  49. data/lib/fairy/share/block-source.rb +12 -2
  50. data/lib/fairy/share/conf.rb +35 -5
  51. data/lib/fairy/share/hash-simple-hash.rb +1 -1
  52. data/lib/fairy/share/log.rb +11 -4
  53. data/lib/fairy/share/pool-dictionary.rb +2 -1
  54. data/lib/fairy/share/port-marshaled-queue.rb +8 -1
  55. data/lib/fairy/share/port.rb +55 -45
  56. data/lib/fairy/share/reference.rb +2 -1
  57. data/lib/fairy/share/varray.rb +3 -1
  58. data/lib/fairy/share/vfile.rb +4 -2
  59. data/lib/fairy/version.rb +1 -1
  60. data/sample/sort.rb +69 -3
  61. data/spec/fairy8_spec.rb +1 -1
  62. data/test/testc.rb +380 -2
  63. data/tools/cap_recipe/Capfile +3 -3
  64. data/tools/fairy_conf_wizard.rb +375 -0
  65. data/tools/fairy_perf_graph.rb +15 -3
  66. data/tools/git-tag +1 -0
  67. data/tools/log-analysis.rb +59 -11
  68. metadata +33 -34
  69. data/ext/simple_hash/extconf.rb +0 -4
  70. data/ext/simple_hash/simple_hash.c +0 -42
data/lib/fairy/version.rb CHANGED
@@ -3,6 +3,6 @@
3
3
  # This file is auto generation.
4
4
  #
5
5
  module Fairy
6
- Version = "0.6.0-001"
6
+ Version = "0.6.5-001"
7
7
  end
8
8
 
data/sample/sort.rb CHANGED
@@ -7,6 +7,46 @@
7
7
  require 'rubygems'
8
8
  require 'fairy'
9
9
  require 'optparse'
10
+ require 'json'
11
+ require 'pp'
12
+
13
+
14
+ def json2hash(str_or_hash)
15
+ if str_or_hash.is_a?(String)
16
+ h = JSON.parse(str_or_hash)
17
+ else
18
+ h = str_or_hash
19
+ end
20
+
21
+ ret = {}
22
+
23
+ h.each{|key,val|
24
+ key = key.to_sym
25
+ if val.is_a?(String) && val.start_with?(":")
26
+ v = val.to_sym
27
+ elsif val.is_a?(Hash)
28
+ v = json2hash(val)
29
+ else
30
+ v = val
31
+ end
32
+ ret[key.to_sym] = v
33
+ }
34
+
35
+ ret
36
+ end
37
+
38
+ def recursive_merge!(hash_a, hash_b)
39
+ hash_a.merge!(hash_b){|key,val_a,val_b|
40
+ if val_a.is_a?(Hash) && val_b.is_a?(Hash)
41
+ recursive_merge!(val_a, val_b)
42
+ val_a
43
+ elsif val_a.class == val_b.class
44
+ val_b
45
+ else
46
+ raise "Incompatible types: left=#{val_a.class}, right=#{val_b.class}"
47
+ end
48
+ }
49
+ end
10
50
 
11
51
 
12
52
  opt = {:k => 0, :t => /\s+/}
@@ -16,6 +56,7 @@ op.on('-k', '--key=POS', Integer){|v| opt[:k] = v}
16
56
  op.on('-n', '--numeric-sort'){|v| opt[:n] = v}
17
57
  op.on('-r', '--reverse', nil, "This must be used with -n."){|v| opt[:r] = v}
18
58
  op.on('-t', '--separator=SEPARATOR'){|v| opt[:t] = v}
59
+ op.on('-c', '--config=JSON'){|v| opt[:c] = json2hash(v)}
19
60
  op.parse!(ARGV)
20
61
 
21
62
 
@@ -59,6 +100,7 @@ puts "[#{$$}] key: #{opt[:k]}"
59
100
  puts "[#{$$}] separator: #{sep}"
60
101
  puts "[#{$$}] num-sort: ON" if opt[:n]
61
102
  puts "[#{$$}] reverse: ON" if opt[:r]
103
+ puts "[#{$$}] sort config: #{opt[:c].inspect}" if opt[:c]
62
104
 
63
105
 
64
106
  fairy = Fairy::Fairy.new
@@ -77,14 +119,38 @@ maped = input.map(%{|ln|
77
119
  end
78
120
  })
79
121
 
122
+ sort_config = {
123
+ :postqueuing_policy => {
124
+ :queuing_class => :XMarshaledQueue,
125
+ :chunk_size => 10_000,
126
+ :buffers_cache_limit => 10
127
+ },
128
+ :postfilter_prequeuing_policy => {
129
+ :queuing_class => :XSizedQueue,
130
+ :queues_limit => 10
131
+ },
132
+ :buffering_policy => {
133
+ :buffering_class => "PGroupBy::XDirectMergeSortBuffer",
134
+ :threshold => 200_000,
135
+ :chunk_size => 1000
136
+ }
137
+ }
138
+
139
+ recursive_merge!(sort_config, opt[:c]) if opt[:c]
140
+
141
+ #pp sort_config
142
+ #exit
143
+
80
144
  if opt[:n] && opt[:r]
81
- sorted = maped.sort_by(%{|ary| -ary[0].to_i})
145
+ sort_proc = %{|ary| -ary[0].to_i}
82
146
  elsif opt[:n]
83
- sorted = maped.sort_by(%{|ary| ary[0].to_i})
147
+ sort_proc = %{|ary| ary[0].to_i}
84
148
  else
85
- sorted = maped.sort_by(%{|ary| ary[0]})
149
+ sort_proc = %{|ary| ary[0]}
86
150
  end
87
151
 
152
+ sorted = maped.sort_by(sort_proc, sort_config)
153
+
88
154
  formatted = sorted.map(%{|ary| ary[1]})
89
155
  formatted.output output_path
90
156
 
data/spec/fairy8_spec.rb CHANGED
@@ -30,7 +30,7 @@ describe Fairy do
30
30
 
31
31
  # exec
32
32
  it 'should print all node-names' do
33
- answer = @cluster["nodes"].sort
33
+ answer = @cluster["nodes"].map{|n| (n == "localhost") ? `hostname`.chomp : n}.sort
34
34
 
35
35
  result = []
36
36
  @fairy.exec(@cluster["nodes"].map{|n| "file://#{n}"}).map(%q{|uri|
data/test/testc.rb CHANGED
@@ -2635,6 +2635,7 @@ when "55.1.4"
2635
2635
  # "file://giant/home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_30M.txt"])
2636
2636
  f = f.mapf(%{|ln| begin
2637
2637
  ln.chomp.split
2638
+
2638
2639
  rescue
2639
2640
  []
2640
2641
  end
@@ -6328,8 +6329,8 @@ when "94.1"
6328
6329
 
6329
6330
  when "95"
6330
6331
 
6331
- f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
6332
- # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
6332
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
6333
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
6333
6334
  # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
6334
6335
  f = f.mapf(%{|ln| begin
6335
6336
  ln.chomp.split
@@ -7106,6 +7107,383 @@ when "113.0"
7106
7107
  f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*2)
7107
7108
  f.here.each{|e| puts e}
7108
7109
 
7110
+ when "114"
7111
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"])
7112
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_120M.txt"])
7113
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
7114
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7115
+ f = f.mapf(%{|ln| begin
7116
+ ln.chomp.split
7117
+ rescue
7118
+ []
7119
+ end
7120
+ })
7121
+ f = f.group_by(%{|w| w},
7122
+ :no_segment => 1,
7123
+ :postqueuing_policy => {
7124
+ :queuing_class => :XMarshaledQueue,
7125
+ :chunk_size => 10000},
7126
+ :postfilter_prequeuing_policy => {
7127
+ :queuing_class => :XMarshaledQueue,
7128
+ :chunk_size => 10000},)
7129
+ f = f.map(%{|values| [values.key, values.size].join(" ")})
7130
+ # f.here.each{|e| puts e}
7131
+ f.output("test/test-114.vf")
7132
+
7133
+ when "114.NS"
7134
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"])
7135
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_120M.txt"])
7136
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7137
+ f = f.mapf(%{|ln| begin
7138
+ ln.chomp.split
7139
+ rescue
7140
+ []
7141
+ end
7142
+ })
7143
+ f = f.group_by(%{|w| w},
7144
+ :no_segment => 1,
7145
+ :postqueuing_policy => {:queuing_class => :XMarshaledQueue},
7146
+ :postfilter_prequeuing_policy => {:queuing_class => :XMarshaledQueue},
7147
+ :use_string_buffer => false)
7148
+ f = f.map(%{|values| [values.key, values.size].join(" ")})
7149
+ # f.here.each{|e| puts e}
7150
+ f.output("test/test-114.vf")
7151
+
7152
+
7153
+ when "114.F"
7154
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
7155
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_120M.txt"])
7156
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7157
+ f = f.mapf(%{|ln| begin
7158
+ ln.chomp.split
7159
+ rescue
7160
+ []
7161
+ end
7162
+ })
7163
+ f = f.group_by(%{|w| w},
7164
+ :no_segment => 1,
7165
+ :postqueuing_policy => {:queuing_class => :FileMarshaledQueue},
7166
+ :postfilter_prequeuing_policy => {:queuing_class => :FileMarshaledQueue},)
7167
+ f = f.map(%{|values| [values.key, values.size].join(" ")})
7168
+ # f.here.each{|e| puts e}
7169
+ f.output("test/test-114.vf")
7170
+
7171
+ when "114.0"
7172
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
7173
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7174
+ f = f.mapf(%{|ln| begin
7175
+ ln.chomp.split
7176
+ rescue
7177
+ []
7178
+ end
7179
+ },
7180
+ :postmapping_policy => :MPNewProcessorN,
7181
+ :postqueuing_policy => {:queuing_class => :XMarshaledQueue},
7182
+ )
7183
+ # f.here.each{|e| puts e}
7184
+ f.output("test/test-114.vf",
7185
+ :prequeuing_policy => {:queuing_class => :XMarshaledQueue},
7186
+ )
7187
+
7188
+ when "114.0.F"
7189
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
7190
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7191
+ f = f.mapf(%{|ln| begin
7192
+ ln.chomp.split
7193
+ rescue
7194
+ []
7195
+ end
7196
+ },
7197
+ :postmapping_policy => :MPNewProcessorN,
7198
+ :postqueuing_policy => {:queuing_class => :FileMarshaledQueue},
7199
+ )
7200
+ # f.here.each{|e| puts e}
7201
+ f.output("test/test-114.vf")
7202
+
7203
+ when "114.0.H"
7204
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
7205
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7206
+ f = f.mapf(%{|ln| begin
7207
+ ln.chomp.split
7208
+ rescue
7209
+ []
7210
+ end
7211
+ },
7212
+ :postmapping_policy => :MPNewProcessorN,
7213
+ :postqueuing_policy => {:queuing_class => :FileMarshaledQueue},
7214
+ )
7215
+ f.here.each{|e| puts e}
7216
+ # f.output("test/test-114.vf")
7217
+
7218
+ when "115"
7219
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
7220
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7221
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7222
+ f = f.mapf(%{|ln| begin
7223
+ ln.chomp.split
7224
+ rescue
7225
+ []
7226
+ end
7227
+ })
7228
+ f = f.group_by(%{|w| w},
7229
+ :no_segment => 1,
7230
+ :postqueuing_policy => {
7231
+ :queuing_class => :XMarshaledQueue,
7232
+ :chunk_size => 10000,
7233
+ :log_mstore => true,
7234
+ :buffers_cache_limit => 100},
7235
+ :postfilter_prequeuing_policy => {
7236
+ :queuing_class => :XMarshaledQueue,
7237
+ :chunk_size => 10000,
7238
+ :log_mstore => true,
7239
+ :buffers_cache_limit => 100},)
7240
+ f = f.map(%{|values| [values.key, values.size].join(" ")})
7241
+ f.output("test/test-pf.vf")
7242
+ # f.here.each{|e| puts e.join(" ")}
7243
+
7244
+ when "116"
7245
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
7246
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7247
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7248
+ f = f.mapf(%{|ln| begin
7249
+ ln.chomp.split
7250
+ rescue
7251
+ []
7252
+ end
7253
+ })
7254
+ f = f.xgroup_by(%{|w| w},
7255
+ :no_segment => 1,
7256
+ :postqueuing_policy => {
7257
+ :queuing_class => :XMarshaledQueue,
7258
+ :chunk_size => 10000,
7259
+ :log_mstore => true,
7260
+ :buffers_cache_limit => 100},
7261
+ :postfilter_prequeuing_policy => {
7262
+ :queuing_class => :XMarshaledQueue,
7263
+ :chunk_size => 10000,
7264
+ :log_mstore => true,
7265
+ :buffers_cache_limit => 100},)
7266
+ f = f.map(%{|values| [values.key, values.size].join(" ")})
7267
+ f.output("test/test-pf.vf")
7268
+ # f.here.each{|e| puts e.join(" ")}
7269
+
7270
+ when "117"
7271
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
7272
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7273
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7274
+ f = f.mapf(%{|ln| begin
7275
+ ln.chomp.split
7276
+ rescue
7277
+ []
7278
+ end
7279
+ })
7280
+ f = f.group_by(%{|w| w},
7281
+ :group_by => :XGroupBy,
7282
+ :no_segment => 1,
7283
+ :postqueuing_policy => {
7284
+ :queuing_class => :XMarshaledQueue,
7285
+ :chunk_size => 10000,
7286
+ :log_mstore => true,
7287
+ :buffers_cache_limit => 100},
7288
+ :buffering_policy => {
7289
+ :buffering_class => :DirectMergeSortBuffer,
7290
+ :threshold => 400_000},
7291
+ :postfilter_prequeuing_policy => {
7292
+ :queuing_class => :XMarshaledQueue,
7293
+ :chunk_size => 10000,
7294
+ :log_mstore => true,
7295
+ :buffers_cache_limit => 100},)
7296
+ f = f.map(%{|values| [values.key, values.size].join(" ")})
7297
+ f.output("test/test-pf.vf")
7298
+ # f.here.each{|e| puts e.join(" ")}
7299
+
7300
+ when "117.0"
7301
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
7302
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7303
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7304
+ f = f.mapf(%{|ln| begin
7305
+ ln.chomp.split
7306
+ rescue
7307
+ []
7308
+ end
7309
+ })
7310
+ f = f.group_by(%{|w| w}, :no_segment => 1)
7311
+ f = f.map(%{|values| [values.key, values.size].join(" ")})
7312
+ f.output("test/test-pf.vf")
7313
+ # f.here.each{|e| puts e.join(" ")}
7314
+
7315
+ when "118.0"
7316
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*2)
7317
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7318
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7319
+ f = f.mapf(%{|ln| begin
7320
+ ln.chomp.split
7321
+ rescue
7322
+ []
7323
+ end
7324
+ })
7325
+ f = f.group_by(%{|w| w}, :no_segment => 2)
7326
+ f = f.map(%{|values| [values.key, values.size].join(" ")})
7327
+ f.output("test/test-pf.vf")
7328
+ # f.here.each{|e| puts e.join(" ")}
7329
+
7330
+ when "119"
7331
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
7332
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7333
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7334
+ f = f.mapf(%{|ln| begin
7335
+ ln.chomp.split
7336
+ rescue
7337
+ []
7338
+ end
7339
+ })
7340
+ f = f.group_by(%{|w| w},
7341
+ :group_by => :XGroupBy,
7342
+ :no_segment => 1,
7343
+ :postqueuing_policy => {
7344
+ :queuing_class => :XMarshaledQueue,
7345
+ :chunk_size => 10000,
7346
+ :buffers_cache_limit => 100},
7347
+ :buffering_policy => {
7348
+ :buffering_class => :XDirectMergeSortBuffer,
7349
+ :threshold => 400_000},
7350
+ :postfilter_prequeuing_policy => {
7351
+ :queuing_class => :XSizedQueue,
7352
+ :chunk_size => 10000,
7353
+ :cache_limit => 100},)
7354
+ f = f.map(%{|values| [values.key, values.size].join(" ")})
7355
+ f.output("test/test-pf.vf")
7356
+ # f.here.each{|e| puts e.join(" ")}
7357
+
7358
+
7359
+ when "200"
7360
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7361
+ f.mapf(%{|ln|
7362
+ nums = ln.split.map{|s| s.to_i}
7363
+ nums
7364
+ }).group_by(%{|n| n.to_s}).map(%q{|bag|
7365
+ "#{bag.key}\t#{bag.size}"
7366
+ }).output("/tmp/fairy_spec_testdata_multi.txt")
7367
+
7368
+ when "201"
7369
+
7370
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
7371
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7372
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7373
+ f = f.map(%{|ln| begin
7374
+ ln.chomp.split
7375
+ rescue
7376
+ []
7377
+ end
7378
+ })
7379
+ f.sort_by(%{|l| l[0] || ""},
7380
+ :no_segment=>12,
7381
+ :log_mstore=>true).map(%{|l| l.join("+")}).output("test/test.vf")
7382
+
7383
+ when "201.1"
7384
+
7385
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
7386
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7387
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7388
+ f = f.mapf(%{|ln| begin
7389
+ ln.chomp.split
7390
+ rescue
7391
+ []
7392
+ end
7393
+ })
7394
+ f.sort_by(%{|l| l}, :no_segment=>24).output("test/test.vf")
7395
+
7396
+ when "201.2"
7397
+
7398
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
7399
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7400
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7401
+ f = f.map(%{|ln| begin
7402
+ ln.chomp.split
7403
+ rescue
7404
+ []
7405
+ end
7406
+ })
7407
+ f.sort_by(%{|l| l[0] || ""},
7408
+ :no_segment => 12,
7409
+ :postqueuing_policy => {
7410
+ :queuing_class => :XMarshaledQueue,
7411
+ :chunk_size => 10_000,
7412
+ :buffers_cache_limit => 10,
7413
+ },
7414
+ :postfilter_prequeuing_policy => {
7415
+ :queuing_class => :XSizedQueue,
7416
+ :queues_limit => 10,
7417
+ },
7418
+ :buffering_policy => {
7419
+ :buffering_class => "PGroupBy::DirectMergeSortBuffer",
7420
+ :threshold => 400_000,
7421
+ :chunk_size => 1000,
7422
+ }).output("test/test-pf.vf")
7423
+
7424
+ when "201.3"
7425
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
7426
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7427
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7428
+ f = f.map(%{|ln| begin
7429
+ ln.chomp.split
7430
+ rescue
7431
+ []
7432
+ end
7433
+ })
7434
+ f.sort_by(%{|l| l[0] || ""},
7435
+ :no_segment => 12,
7436
+ :postqueuing_policy => {
7437
+ :queuing_class => :XMarshaledQueue,
7438
+ :chunk_size => 10_000,
7439
+ :buffers_cache_limit => 10,
7440
+ },
7441
+ :postfilter_prequeuing_policy => {
7442
+ :queuing_class => :XSizedQueue,
7443
+ :queues_limit => 10,
7444
+ },
7445
+ :buffering_policy => {
7446
+ :buffering_class => "PGroupBy::XDirectMergeSortBuffer",
7447
+ :threshold => 200_000,
7448
+ :chunk_size => 1000,
7449
+ }).output("test/test-pf.vf")
7450
+
7451
+ when "202", "BUG#316"
7452
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
7453
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7454
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7455
+ f = f.map(%{|ln| begin
7456
+ ln.chomp.split.size
7457
+ rescue
7458
+ 0
7459
+ end
7460
+ })
7461
+ f.sort_by(%{|n| n.to_i}).map(%q{|n| "#{n}"}).output("/tmp/fairy-test202.txt")
7462
+
7463
+ when "202.1", "BUG#316"
7464
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
7465
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
7466
+ # f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
7467
+ f = f.map(%{|ln| begin
7468
+ ln.chomp.split.size
7469
+ rescue
7470
+ 0
7471
+ end
7472
+ })
7473
+ f.sort_by(%{|n| -(n.to_i)}).map(%q{|n| "#{n}"}).output("/tmp/fairy-test202.txt")
7474
+
7475
+ when "203"
7476
+
7477
+ f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*10)
7478
+ #f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*10)
7479
+ i = 0
7480
+ f.here(:prequeuing_policy => {
7481
+ :queuing_class => :XSizedQueue,
7482
+ :queues_limit => 1, }).each{i += 1}
7483
+ p i
7484
+
7109
7485
  end
7110
7486
 
7111
7487
  # test
7488
+
7489
+