fairy 0.6.0 → 0.6.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Makefile +1 -0
- data/bin/fairy +35 -5
- data/ext/extconf.rb +3 -0
- data/ext/fairy.c +180 -0
- data/ext/fairy.h +94 -0
- data/ext/fiber_mon.h +32 -0
- data/ext/fixnum-buffer.c +483 -0
- data/ext/p-group-by.c +529 -0
- data/ext/p-xgroup-by.c +467 -0
- data/ext/simple-hash.c +44 -0
- data/ext/string-buffer.c +286 -0
- data/ext/xmarshaled-queue.c +699 -0
- data/ext/xsized-queue.c +528 -0
- data/ext/xthread.h +65 -0
- data/fairy.gemspec +5 -2
- data/lib/fairy.rb +10 -1
- data/lib/fairy/client/group-by.rb +57 -2
- data/lib/fairy/client/here.rb +2 -1
- data/lib/fairy/controller.rb +25 -4
- data/lib/fairy/master.rb +17 -3
- data/lib/fairy/master/c-basic-group-by.rb +4 -2
- data/lib/fairy/master/c-cat.rb +3 -2
- data/lib/fairy/master/c-direct-product.rb +5 -3
- data/lib/fairy/master/c-filter.rb +5 -3
- data/lib/fairy/master/c-group-by.rb +13 -0
- data/lib/fairy/master/c-junction.rb +3 -2
- data/lib/fairy/master/c-seg-join.rb +3 -1
- data/lib/fairy/master/c-seg-shuffle.rb +3 -2
- data/lib/fairy/master/c-seg-split.rb +1 -1
- data/lib/fairy/master/c-seg-zip.rb +3 -1
- data/lib/fairy/master/c-sort.rb +7 -2
- data/lib/fairy/master/c-wc.rb +5 -3
- data/lib/fairy/node.rb +13 -2
- data/lib/fairy/node/p-barrier.rb +1 -1
- data/lib/fairy/node/p-basic-group-by.rb +22 -12
- data/lib/fairy/node/p-direct-product.rb +4 -2
- data/lib/fairy/node/p-filter.rb +8 -7
- data/lib/fairy/node/p-find.rb +2 -1
- data/lib/fairy/node/p-group-by.rb +17 -6
- data/lib/fairy/node/p-inject.rb +3 -2
- data/lib/fairy/node/p-output-file.rb +1 -1
- data/lib/fairy/node/p-seg-join.rb +2 -1
- data/lib/fairy/node/p-seg-zip.rb +2 -1
- data/lib/fairy/node/p-single-exportable.rb +3 -1
- data/lib/fairy/node/p-sort.rb +4 -2
- data/lib/fairy/node/p-task.rb +1 -1
- data/lib/fairy/node/p-wc.rb +5 -2
- data/lib/fairy/processor.rb +25 -18
- data/lib/fairy/share/block-source.rb +12 -2
- data/lib/fairy/share/conf.rb +35 -5
- data/lib/fairy/share/hash-simple-hash.rb +1 -1
- data/lib/fairy/share/log.rb +11 -4
- data/lib/fairy/share/pool-dictionary.rb +2 -1
- data/lib/fairy/share/port-marshaled-queue.rb +8 -1
- data/lib/fairy/share/port.rb +55 -45
- data/lib/fairy/share/reference.rb +2 -1
- data/lib/fairy/share/varray.rb +3 -1
- data/lib/fairy/share/vfile.rb +4 -2
- data/lib/fairy/version.rb +1 -1
- data/sample/sort.rb +69 -3
- data/spec/fairy8_spec.rb +1 -1
- data/test/testc.rb +380 -2
- data/tools/cap_recipe/Capfile +3 -3
- data/tools/fairy_conf_wizard.rb +375 -0
- data/tools/fairy_perf_graph.rb +15 -3
- data/tools/git-tag +1 -0
- data/tools/log-analysis.rb +59 -11
- metadata +33 -34
- data/ext/simple_hash/extconf.rb +0 -4
- data/ext/simple_hash/simple_hash.c +0 -42
data/lib/fairy/version.rb
CHANGED
data/sample/sort.rb
CHANGED
@@ -7,6 +7,46 @@
|
|
7
7
|
require 'rubygems'
|
8
8
|
require 'fairy'
|
9
9
|
require 'optparse'
|
10
|
+
require 'json'
|
11
|
+
require 'pp'
|
12
|
+
|
13
|
+
|
14
|
+
def json2hash(str_or_hash)
|
15
|
+
if str_or_hash.is_a?(String)
|
16
|
+
h = JSON.parse(str_or_hash)
|
17
|
+
else
|
18
|
+
h = str_or_hash
|
19
|
+
end
|
20
|
+
|
21
|
+
ret = {}
|
22
|
+
|
23
|
+
h.each{|key,val|
|
24
|
+
key = key.to_sym
|
25
|
+
if val.is_a?(String) && val.start_with?(":")
|
26
|
+
v = val.to_sym
|
27
|
+
elsif val.is_a?(Hash)
|
28
|
+
v = json2hash(val)
|
29
|
+
else
|
30
|
+
v = val
|
31
|
+
end
|
32
|
+
ret[key.to_sym] = v
|
33
|
+
}
|
34
|
+
|
35
|
+
ret
|
36
|
+
end
|
37
|
+
|
38
|
+
def recursive_merge!(hash_a, hash_b)
|
39
|
+
hash_a.merge!(hash_b){|key,val_a,val_b|
|
40
|
+
if val_a.is_a?(Hash) && val_b.is_a?(Hash)
|
41
|
+
recursive_merge!(val_a, val_b)
|
42
|
+
val_a
|
43
|
+
elsif val_a.class == val_b.class
|
44
|
+
val_b
|
45
|
+
else
|
46
|
+
raise "Incompatible types: left=#{val_a.class}, right=#{val_b.class}"
|
47
|
+
end
|
48
|
+
}
|
49
|
+
end
|
10
50
|
|
11
51
|
|
12
52
|
opt = {:k => 0, :t => /\s+/}
|
@@ -16,6 +56,7 @@ op.on('-k', '--key=POS', Integer){|v| opt[:k] = v}
|
|
16
56
|
op.on('-n', '--numeric-sort'){|v| opt[:n] = v}
|
17
57
|
op.on('-r', '--reverse', nil, "This must be used with -n."){|v| opt[:r] = v}
|
18
58
|
op.on('-t', '--separator=SEPARATOR'){|v| opt[:t] = v}
|
59
|
+
op.on('-c', '--config=JSON'){|v| opt[:c] = json2hash(v)}
|
19
60
|
op.parse!(ARGV)
|
20
61
|
|
21
62
|
|
@@ -59,6 +100,7 @@ puts "[#{$$}] key: #{opt[:k]}"
|
|
59
100
|
puts "[#{$$}] separator: #{sep}"
|
60
101
|
puts "[#{$$}] num-sort: ON" if opt[:n]
|
61
102
|
puts "[#{$$}] reverse: ON" if opt[:r]
|
103
|
+
puts "[#{$$}] sort config: #{opt[:c].inspect}" if opt[:c]
|
62
104
|
|
63
105
|
|
64
106
|
fairy = Fairy::Fairy.new
|
@@ -77,14 +119,38 @@ maped = input.map(%{|ln|
|
|
77
119
|
end
|
78
120
|
})
|
79
121
|
|
122
|
+
sort_config = {
|
123
|
+
:postqueuing_policy => {
|
124
|
+
:queuing_class => :XMarshaledQueue,
|
125
|
+
:chunk_size => 10_000,
|
126
|
+
:buffers_cache_limit => 10
|
127
|
+
},
|
128
|
+
:postfilter_prequeuing_policy => {
|
129
|
+
:queuing_class => :XSizedQueue,
|
130
|
+
:queues_limit => 10
|
131
|
+
},
|
132
|
+
:buffering_policy => {
|
133
|
+
:buffering_class => "PGroupBy::XDirectMergeSortBuffer",
|
134
|
+
:threshold => 200_000,
|
135
|
+
:chunk_size => 1000
|
136
|
+
}
|
137
|
+
}
|
138
|
+
|
139
|
+
recursive_merge!(sort_config, opt[:c]) if opt[:c]
|
140
|
+
|
141
|
+
#pp sort_config
|
142
|
+
#exit
|
143
|
+
|
80
144
|
if opt[:n] && opt[:r]
|
81
|
-
|
145
|
+
sort_proc = %{|ary| -ary[0].to_i}
|
82
146
|
elsif opt[:n]
|
83
|
-
|
147
|
+
sort_proc = %{|ary| ary[0].to_i}
|
84
148
|
else
|
85
|
-
|
149
|
+
sort_proc = %{|ary| ary[0]}
|
86
150
|
end
|
87
151
|
|
152
|
+
sorted = maped.sort_by(sort_proc, sort_config)
|
153
|
+
|
88
154
|
formatted = sorted.map(%{|ary| ary[1]})
|
89
155
|
formatted.output output_path
|
90
156
|
|
data/spec/fairy8_spec.rb
CHANGED
@@ -30,7 +30,7 @@ describe Fairy do
|
|
30
30
|
|
31
31
|
# exec
|
32
32
|
it 'should print all node-names' do
|
33
|
-
answer = @cluster["nodes"].sort
|
33
|
+
answer = @cluster["nodes"].map{|n| (n == "localhost") ? `hostname`.chomp : n}.sort
|
34
34
|
|
35
35
|
result = []
|
36
36
|
@fairy.exec(@cluster["nodes"].map{|n| "file://#{n}"}).map(%q{|uri|
|
data/test/testc.rb
CHANGED
@@ -2635,6 +2635,7 @@ when "55.1.4"
|
|
2635
2635
|
# "file://giant/home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_30M.txt"])
|
2636
2636
|
f = f.mapf(%{|ln| begin
|
2637
2637
|
ln.chomp.split
|
2638
|
+
|
2638
2639
|
rescue
|
2639
2640
|
[]
|
2640
2641
|
end
|
@@ -6328,8 +6329,8 @@ when "94.1"
|
|
6328
6329
|
|
6329
6330
|
when "95"
|
6330
6331
|
|
6331
|
-
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
|
6332
|
-
|
6332
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
|
6333
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
6333
6334
|
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
6334
6335
|
f = f.mapf(%{|ln| begin
|
6335
6336
|
ln.chomp.split
|
@@ -7106,6 +7107,383 @@ when "113.0"
|
|
7106
7107
|
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*2)
|
7107
7108
|
f.here.each{|e| puts e}
|
7108
7109
|
|
7110
|
+
when "114"
|
7111
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"])
|
7112
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_120M.txt"])
|
7113
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
|
7114
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7115
|
+
f = f.mapf(%{|ln| begin
|
7116
|
+
ln.chomp.split
|
7117
|
+
rescue
|
7118
|
+
[]
|
7119
|
+
end
|
7120
|
+
})
|
7121
|
+
f = f.group_by(%{|w| w},
|
7122
|
+
:no_segment => 1,
|
7123
|
+
:postqueuing_policy => {
|
7124
|
+
:queuing_class => :XMarshaledQueue,
|
7125
|
+
:chunk_size => 10000},
|
7126
|
+
:postfilter_prequeuing_policy => {
|
7127
|
+
:queuing_class => :XMarshaledQueue,
|
7128
|
+
:chunk_size => 10000},)
|
7129
|
+
f = f.map(%{|values| [values.key, values.size].join(" ")})
|
7130
|
+
# f.here.each{|e| puts e}
|
7131
|
+
f.output("test/test-114.vf")
|
7132
|
+
|
7133
|
+
when "114.NS"
|
7134
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"])
|
7135
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_120M.txt"])
|
7136
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7137
|
+
f = f.mapf(%{|ln| begin
|
7138
|
+
ln.chomp.split
|
7139
|
+
rescue
|
7140
|
+
[]
|
7141
|
+
end
|
7142
|
+
})
|
7143
|
+
f = f.group_by(%{|w| w},
|
7144
|
+
:no_segment => 1,
|
7145
|
+
:postqueuing_policy => {:queuing_class => :XMarshaledQueue},
|
7146
|
+
:postfilter_prequeuing_policy => {:queuing_class => :XMarshaledQueue},
|
7147
|
+
:use_string_buffer => false)
|
7148
|
+
f = f.map(%{|values| [values.key, values.size].join(" ")})
|
7149
|
+
# f.here.each{|e| puts e}
|
7150
|
+
f.output("test/test-114.vf")
|
7151
|
+
|
7152
|
+
|
7153
|
+
when "114.F"
|
7154
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
|
7155
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_120M.txt"])
|
7156
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7157
|
+
f = f.mapf(%{|ln| begin
|
7158
|
+
ln.chomp.split
|
7159
|
+
rescue
|
7160
|
+
[]
|
7161
|
+
end
|
7162
|
+
})
|
7163
|
+
f = f.group_by(%{|w| w},
|
7164
|
+
:no_segment => 1,
|
7165
|
+
:postqueuing_policy => {:queuing_class => :FileMarshaledQueue},
|
7166
|
+
:postfilter_prequeuing_policy => {:queuing_class => :FileMarshaledQueue},)
|
7167
|
+
f = f.map(%{|values| [values.key, values.size].join(" ")})
|
7168
|
+
# f.here.each{|e| puts e}
|
7169
|
+
f.output("test/test-114.vf")
|
7170
|
+
|
7171
|
+
when "114.0"
|
7172
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
|
7173
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7174
|
+
f = f.mapf(%{|ln| begin
|
7175
|
+
ln.chomp.split
|
7176
|
+
rescue
|
7177
|
+
[]
|
7178
|
+
end
|
7179
|
+
},
|
7180
|
+
:postmapping_policy => :MPNewProcessorN,
|
7181
|
+
:postqueuing_policy => {:queuing_class => :XMarshaledQueue},
|
7182
|
+
)
|
7183
|
+
# f.here.each{|e| puts e}
|
7184
|
+
f.output("test/test-114.vf",
|
7185
|
+
:prequeuing_policy => {:queuing_class => :XMarshaledQueue},
|
7186
|
+
)
|
7187
|
+
|
7188
|
+
when "114.0.F"
|
7189
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
|
7190
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7191
|
+
f = f.mapf(%{|ln| begin
|
7192
|
+
ln.chomp.split
|
7193
|
+
rescue
|
7194
|
+
[]
|
7195
|
+
end
|
7196
|
+
},
|
7197
|
+
:postmapping_policy => :MPNewProcessorN,
|
7198
|
+
:postqueuing_policy => {:queuing_class => :FileMarshaledQueue},
|
7199
|
+
)
|
7200
|
+
# f.here.each{|e| puts e}
|
7201
|
+
f.output("test/test-114.vf")
|
7202
|
+
|
7203
|
+
when "114.0.H"
|
7204
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"])
|
7205
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7206
|
+
f = f.mapf(%{|ln| begin
|
7207
|
+
ln.chomp.split
|
7208
|
+
rescue
|
7209
|
+
[]
|
7210
|
+
end
|
7211
|
+
},
|
7212
|
+
:postmapping_policy => :MPNewProcessorN,
|
7213
|
+
:postqueuing_policy => {:queuing_class => :FileMarshaledQueue},
|
7214
|
+
)
|
7215
|
+
f.here.each{|e| puts e}
|
7216
|
+
# f.output("test/test-114.vf")
|
7217
|
+
|
7218
|
+
when "115"
|
7219
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
|
7220
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7221
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7222
|
+
f = f.mapf(%{|ln| begin
|
7223
|
+
ln.chomp.split
|
7224
|
+
rescue
|
7225
|
+
[]
|
7226
|
+
end
|
7227
|
+
})
|
7228
|
+
f = f.group_by(%{|w| w},
|
7229
|
+
:no_segment => 1,
|
7230
|
+
:postqueuing_policy => {
|
7231
|
+
:queuing_class => :XMarshaledQueue,
|
7232
|
+
:chunk_size => 10000,
|
7233
|
+
:log_mstore => true,
|
7234
|
+
:buffers_cache_limit => 100},
|
7235
|
+
:postfilter_prequeuing_policy => {
|
7236
|
+
:queuing_class => :XMarshaledQueue,
|
7237
|
+
:chunk_size => 10000,
|
7238
|
+
:log_mstore => true,
|
7239
|
+
:buffers_cache_limit => 100},)
|
7240
|
+
f = f.map(%{|values| [values.key, values.size].join(" ")})
|
7241
|
+
f.output("test/test-pf.vf")
|
7242
|
+
# f.here.each{|e| puts e.join(" ")}
|
7243
|
+
|
7244
|
+
when "116"
|
7245
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
|
7246
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7247
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7248
|
+
f = f.mapf(%{|ln| begin
|
7249
|
+
ln.chomp.split
|
7250
|
+
rescue
|
7251
|
+
[]
|
7252
|
+
end
|
7253
|
+
})
|
7254
|
+
f = f.xgroup_by(%{|w| w},
|
7255
|
+
:no_segment => 1,
|
7256
|
+
:postqueuing_policy => {
|
7257
|
+
:queuing_class => :XMarshaledQueue,
|
7258
|
+
:chunk_size => 10000,
|
7259
|
+
:log_mstore => true,
|
7260
|
+
:buffers_cache_limit => 100},
|
7261
|
+
:postfilter_prequeuing_policy => {
|
7262
|
+
:queuing_class => :XMarshaledQueue,
|
7263
|
+
:chunk_size => 10000,
|
7264
|
+
:log_mstore => true,
|
7265
|
+
:buffers_cache_limit => 100},)
|
7266
|
+
f = f.map(%{|values| [values.key, values.size].join(" ")})
|
7267
|
+
f.output("test/test-pf.vf")
|
7268
|
+
# f.here.each{|e| puts e.join(" ")}
|
7269
|
+
|
7270
|
+
when "117"
|
7271
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
|
7272
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7273
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7274
|
+
f = f.mapf(%{|ln| begin
|
7275
|
+
ln.chomp.split
|
7276
|
+
rescue
|
7277
|
+
[]
|
7278
|
+
end
|
7279
|
+
})
|
7280
|
+
f = f.group_by(%{|w| w},
|
7281
|
+
:group_by => :XGroupBy,
|
7282
|
+
:no_segment => 1,
|
7283
|
+
:postqueuing_policy => {
|
7284
|
+
:queuing_class => :XMarshaledQueue,
|
7285
|
+
:chunk_size => 10000,
|
7286
|
+
:log_mstore => true,
|
7287
|
+
:buffers_cache_limit => 100},
|
7288
|
+
:buffering_policy => {
|
7289
|
+
:buffering_class => :DirectMergeSortBuffer,
|
7290
|
+
:threshold => 400_000},
|
7291
|
+
:postfilter_prequeuing_policy => {
|
7292
|
+
:queuing_class => :XMarshaledQueue,
|
7293
|
+
:chunk_size => 10000,
|
7294
|
+
:log_mstore => true,
|
7295
|
+
:buffers_cache_limit => 100},)
|
7296
|
+
f = f.map(%{|values| [values.key, values.size].join(" ")})
|
7297
|
+
f.output("test/test-pf.vf")
|
7298
|
+
# f.here.each{|e| puts e.join(" ")}
|
7299
|
+
|
7300
|
+
when "117.0"
|
7301
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
|
7302
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7303
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7304
|
+
f = f.mapf(%{|ln| begin
|
7305
|
+
ln.chomp.split
|
7306
|
+
rescue
|
7307
|
+
[]
|
7308
|
+
end
|
7309
|
+
})
|
7310
|
+
f = f.group_by(%{|w| w}, :no_segment => 1)
|
7311
|
+
f = f.map(%{|values| [values.key, values.size].join(" ")})
|
7312
|
+
f.output("test/test-pf.vf")
|
7313
|
+
# f.here.each{|e| puts e.join(" ")}
|
7314
|
+
|
7315
|
+
when "118.0"
|
7316
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*2)
|
7317
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7318
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7319
|
+
f = f.mapf(%{|ln| begin
|
7320
|
+
ln.chomp.split
|
7321
|
+
rescue
|
7322
|
+
[]
|
7323
|
+
end
|
7324
|
+
})
|
7325
|
+
f = f.group_by(%{|w| w}, :no_segment => 2)
|
7326
|
+
f = f.map(%{|values| [values.key, values.size].join(" ")})
|
7327
|
+
f.output("test/test-pf.vf")
|
7328
|
+
# f.here.each{|e| puts e.join(" ")}
|
7329
|
+
|
7330
|
+
when "119"
|
7331
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*1)
|
7332
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7333
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7334
|
+
f = f.mapf(%{|ln| begin
|
7335
|
+
ln.chomp.split
|
7336
|
+
rescue
|
7337
|
+
[]
|
7338
|
+
end
|
7339
|
+
})
|
7340
|
+
f = f.group_by(%{|w| w},
|
7341
|
+
:group_by => :XGroupBy,
|
7342
|
+
:no_segment => 1,
|
7343
|
+
:postqueuing_policy => {
|
7344
|
+
:queuing_class => :XMarshaledQueue,
|
7345
|
+
:chunk_size => 10000,
|
7346
|
+
:buffers_cache_limit => 100},
|
7347
|
+
:buffering_policy => {
|
7348
|
+
:buffering_class => :XDirectMergeSortBuffer,
|
7349
|
+
:threshold => 400_000},
|
7350
|
+
:postfilter_prequeuing_policy => {
|
7351
|
+
:queuing_class => :XSizedQueue,
|
7352
|
+
:chunk_size => 10000,
|
7353
|
+
:cache_limit => 100},)
|
7354
|
+
f = f.map(%{|values| [values.key, values.size].join(" ")})
|
7355
|
+
f.output("test/test-pf.vf")
|
7356
|
+
# f.here.each{|e| puts e.join(" ")}
|
7357
|
+
|
7358
|
+
|
7359
|
+
when "200"
|
7360
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7361
|
+
f.mapf(%{|ln|
|
7362
|
+
nums = ln.split.map{|s| s.to_i}
|
7363
|
+
nums
|
7364
|
+
}).group_by(%{|n| n.to_s}).map(%q{|bag|
|
7365
|
+
"#{bag.key}\t#{bag.size}"
|
7366
|
+
}).output("/tmp/fairy_spec_testdata_multi.txt")
|
7367
|
+
|
7368
|
+
when "201"
|
7369
|
+
|
7370
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
|
7371
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7372
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7373
|
+
f = f.map(%{|ln| begin
|
7374
|
+
ln.chomp.split
|
7375
|
+
rescue
|
7376
|
+
[]
|
7377
|
+
end
|
7378
|
+
})
|
7379
|
+
f.sort_by(%{|l| l[0] || ""},
|
7380
|
+
:no_segment=>12,
|
7381
|
+
:log_mstore=>true).map(%{|l| l.join("+")}).output("test/test.vf")
|
7382
|
+
|
7383
|
+
when "201.1"
|
7384
|
+
|
7385
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
|
7386
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7387
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7388
|
+
f = f.mapf(%{|ln| begin
|
7389
|
+
ln.chomp.split
|
7390
|
+
rescue
|
7391
|
+
[]
|
7392
|
+
end
|
7393
|
+
})
|
7394
|
+
f.sort_by(%{|l| l}, :no_segment=>24).output("test/test.vf")
|
7395
|
+
|
7396
|
+
when "201.2"
|
7397
|
+
|
7398
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
|
7399
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7400
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7401
|
+
f = f.map(%{|ln| begin
|
7402
|
+
ln.chomp.split
|
7403
|
+
rescue
|
7404
|
+
[]
|
7405
|
+
end
|
7406
|
+
})
|
7407
|
+
f.sort_by(%{|l| l[0] || ""},
|
7408
|
+
:no_segment => 12,
|
7409
|
+
:postqueuing_policy => {
|
7410
|
+
:queuing_class => :XMarshaledQueue,
|
7411
|
+
:chunk_size => 10_000,
|
7412
|
+
:buffers_cache_limit => 10,
|
7413
|
+
},
|
7414
|
+
:postfilter_prequeuing_policy => {
|
7415
|
+
:queuing_class => :XSizedQueue,
|
7416
|
+
:queues_limit => 10,
|
7417
|
+
},
|
7418
|
+
:buffering_policy => {
|
7419
|
+
:buffering_class => "PGroupBy::DirectMergeSortBuffer",
|
7420
|
+
:threshold => 400_000,
|
7421
|
+
:chunk_size => 1000,
|
7422
|
+
}).output("test/test-pf.vf")
|
7423
|
+
|
7424
|
+
when "201.3"
|
7425
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
|
7426
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7427
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7428
|
+
f = f.map(%{|ln| begin
|
7429
|
+
ln.chomp.split
|
7430
|
+
rescue
|
7431
|
+
[]
|
7432
|
+
end
|
7433
|
+
})
|
7434
|
+
f.sort_by(%{|l| l[0] || ""},
|
7435
|
+
:no_segment => 12,
|
7436
|
+
:postqueuing_policy => {
|
7437
|
+
:queuing_class => :XMarshaledQueue,
|
7438
|
+
:chunk_size => 10_000,
|
7439
|
+
:buffers_cache_limit => 10,
|
7440
|
+
},
|
7441
|
+
:postfilter_prequeuing_policy => {
|
7442
|
+
:queuing_class => :XSizedQueue,
|
7443
|
+
:queues_limit => 10,
|
7444
|
+
},
|
7445
|
+
:buffering_policy => {
|
7446
|
+
:buffering_class => "PGroupBy::XDirectMergeSortBuffer",
|
7447
|
+
:threshold => 200_000,
|
7448
|
+
:chunk_size => 1000,
|
7449
|
+
}).output("test/test-pf.vf")
|
7450
|
+
|
7451
|
+
when "202", "BUG#316"
|
7452
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
|
7453
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7454
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7455
|
+
f = f.map(%{|ln| begin
|
7456
|
+
ln.chomp.split.size
|
7457
|
+
rescue
|
7458
|
+
0
|
7459
|
+
end
|
7460
|
+
})
|
7461
|
+
f.sort_by(%{|n| n.to_i}).map(%q{|n| "#{n}"}).output("/tmp/fairy-test202.txt")
|
7462
|
+
|
7463
|
+
when "202.1", "BUG#316"
|
7464
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_960M.txt"]*1)
|
7465
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*1)
|
7466
|
+
# f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/fairy.cat"]*1)
|
7467
|
+
f = f.map(%{|ln| begin
|
7468
|
+
ln.chomp.split.size
|
7469
|
+
rescue
|
7470
|
+
0
|
7471
|
+
end
|
7472
|
+
})
|
7473
|
+
f.sort_by(%{|n| -(n.to_i)}).map(%q{|n| "#{n}"}).output("/tmp/fairy-test202.txt")
|
7474
|
+
|
7475
|
+
when "203"
|
7476
|
+
|
7477
|
+
f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_240M.txt"]*10)
|
7478
|
+
#f = fairy.input(["file://emperor//home/keiju/public/a.research/fairy/git/fairy/sample/wc/data/sample_10M.txt"]*10)
|
7479
|
+
i = 0
|
7480
|
+
f.here(:prequeuing_policy => {
|
7481
|
+
:queuing_class => :XSizedQueue,
|
7482
|
+
:queues_limit => 1, }).each{i += 1}
|
7483
|
+
p i
|
7484
|
+
|
7109
7485
|
end
|
7110
7486
|
|
7111
7487
|
# test
|
7488
|
+
|
7489
|
+
|