pmux 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/README.md +36 -0
- data/Rakefile +4 -0
- data/bin/pmux +5 -0
- data/lib/pmux/application.rb +166 -0
- data/lib/pmux/cleaner.rb +28 -0
- data/lib/pmux/fiber18.rb +64 -0
- data/lib/pmux/fixcmd.rb +25 -0
- data/lib/pmux/gatherer.rb +23 -0
- data/lib/pmux/handler.rb +262 -0
- data/lib/pmux/job.rb +101 -0
- data/lib/pmux/joblogger.rb +46 -0
- data/lib/pmux/mapper.rb +151 -0
- data/lib/pmux/mros.rb +207 -0
- data/lib/pmux/multi_session.rb +309 -0
- data/lib/pmux/pipeio.rb +19 -0
- data/lib/pmux/plugin.rb +23 -0
- data/lib/pmux/q.rb +3 -0
- data/lib/pmux/reducer.rb +90 -0
- data/lib/pmux/storage_adapter.rb +105 -0
- data/lib/pmux/task_dispatcher.rb +167 -0
- data/lib/pmux/task_queue.rb +11 -0
- data/lib/pmux/task_scheduler.rb +166 -0
- data/lib/pmux/util_daemon.rb +18 -0
- data/lib/pmux/util_logger.rb +137 -0
- data/lib/pmux/version.rb +3 -0
- data/lib/pmux/worker.rb +91 -0
- data/lib/pmux/writer.rb +19 -0
- data/lib/pmux.rb +27 -0
- data/pmux.gemspec +24 -0
- data/test/mock_mros.rb +284 -0
- data/test/mock_pipeio.rb +26 -0
- data/test/mock_world.rb +193 -0
- data/test/mock_xattr.rb +10 -0
- data/test/runner.rb +10 -0
- data/test/test_application.rb +13 -0
- data/test/test_fixcmd.rb +17 -0
- data/test/test_handler.rb +15 -0
- data/test/test_i_mapreduce.rb +169 -0
- data/test/test_i_mros.rb +28 -0
- data/test/test_i_msession.rb +27 -0
- data/test/test_job.rb +35 -0
- data/test/test_joblogger.rb +16 -0
- data/test/test_mapper.rb +60 -0
- data/test/test_pipeio.rb +24 -0
- data/test/test_storage_adapter.rb +63 -0
- data/test/test_task_queue.rb +87 -0
- data/test/test_task_scheduler.rb +39 -0
- data/test/txt/0.log +105 -0
- data/test/txt/1.log +105 -0
- data/test/txt/2.log +105 -0
- data/test/txt/3.log +105 -0
- data/test/txt/4.log +105 -0
- data/test/txt/5.log +105 -0
- data/test/txt/6.log +105 -0
- data/test/txt/7.log +105 -0
- data/test/txt/8.log +105 -0
- data/test/unittest_helper.rb +57 -0
- metadata +153 -0
@@ -0,0 +1,309 @@
|
|
1
|
+
require 'net/scp'
|
2
|
+
|
3
|
+
class MR::Future
|
4
|
+
attr_accessor :addr
|
5
|
+
end
|
6
|
+
|
7
|
+
module Pmux
|
8
|
+
class SessionWrapper
|
9
|
+
attr_reader :addr
|
10
|
+
attr_accessor :ssh, :scp_session_count
|
11
|
+
|
12
|
+
def initialize addr
|
13
|
+
@addr = addr
|
14
|
+
@ssh = nil
|
15
|
+
@scp = nil
|
16
|
+
@scp_session_count = 0
|
17
|
+
end
|
18
|
+
|
19
|
+
def scp
|
20
|
+
@ssh ? (@scp ||= @ssh.scp) : nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class MultiSession
|
25
|
+
attr_reader :loop
|
26
|
+
attr_accessor :timeout
|
27
|
+
|
28
|
+
def initialize addrs, options={}, loop=nil
|
29
|
+
@addrs = addrs
|
30
|
+
@options = options
|
31
|
+
@user = options[:user]
|
32
|
+
@loop = loop || Coolio::Loop.default
|
33
|
+
Net::SSH::Compat.coolio_loop = @loop
|
34
|
+
@timeout = 3600 #FIXME
|
35
|
+
|
36
|
+
@err_addrs = []
|
37
|
+
@sessions = {}
|
38
|
+
@channels = {}
|
39
|
+
|
40
|
+
@scptable = {}
|
41
|
+
@scpid = 0
|
42
|
+
@scps = {}
|
43
|
+
@scp_queue = {}
|
44
|
+
@buffers = {}
|
45
|
+
|
46
|
+
@on_error = nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def connect_to_addr addr, cmd=nil
|
50
|
+
return if @sessions[addr]
|
51
|
+
@sessions[addr] = SessionWrapper.new addr
|
52
|
+
@loop.start_ssh(addr, @user) {|ssh|
|
53
|
+
if ssh.respond_to? :open_channel
|
54
|
+
host = ssh.host
|
55
|
+
@sessions[host].ssh = ssh
|
56
|
+
if cmd
|
57
|
+
channel = ssh.open_channel {|ch|
|
58
|
+
ch.exec(cmd) {|ch, success|
|
59
|
+
raise RuntimeError unless success
|
60
|
+
@channels[host] = ch
|
61
|
+
|
62
|
+
if (ary = @buffers[host]) and !ary.empty?
|
63
|
+
ch.send_data ary.join('')
|
64
|
+
ary.clear
|
65
|
+
end
|
66
|
+
|
67
|
+
setup_channel ch
|
68
|
+
}
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
if (queue = @scp_queue[host]) and !queue.empty?
|
73
|
+
queue2 = queue.dup
|
74
|
+
queue.clear
|
75
|
+
scp = @sessions[host].scp
|
76
|
+
queue2.each {|updown, qf, qaddr, qremote, qlocal, qoptions|
|
77
|
+
scp_download_sub scp, qaddr, qf, qremote, qlocal, qoptions}
|
78
|
+
end
|
79
|
+
|
80
|
+
ssh.floop 0.1
|
81
|
+
elsif ssh.kind_of? Exception
|
82
|
+
e = ssh
|
83
|
+
error_on_addr addr, e.inspect
|
84
|
+
else
|
85
|
+
error_on_addr addr, 'failed'
|
86
|
+
end
|
87
|
+
}
|
88
|
+
end
|
89
|
+
|
90
|
+
def setup_channel ch
|
91
|
+
ch.on_data {|c, data|}
|
92
|
+
ch.on_extended_data {|c, type, data|
|
93
|
+
#STDERR.puts c.connection.host+': '+data
|
94
|
+
}
|
95
|
+
#ch.on_close {|c| error_on_addr ch.connection.host}
|
96
|
+
end
|
97
|
+
|
98
|
+
def error_on_addr addr, err=nil
|
99
|
+
@err_addrs.push addr
|
100
|
+
@addrs.delete addr
|
101
|
+
@on_error.call addr, err if @on_error
|
102
|
+
end
|
103
|
+
|
104
|
+
def close_channel addr=nil
|
105
|
+
if addr
|
106
|
+
if (ch = @channels[addr])
|
107
|
+
ch.close
|
108
|
+
ch.wait
|
109
|
+
else
|
110
|
+
end
|
111
|
+
else
|
112
|
+
@channels.each {|addr, ch| ch.close}
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def on_error &block
|
117
|
+
@on_error = block
|
118
|
+
end
|
119
|
+
|
120
|
+
def scp_upload_files addr, files, remote, options={}, &block
|
121
|
+
mf = MR::MultiFuture.new
|
122
|
+
for file in files
|
123
|
+
future = scp_upload addr, file, remote, options
|
124
|
+
mf.add future
|
125
|
+
end
|
126
|
+
mf.on_all &block if block
|
127
|
+
mf
|
128
|
+
end
|
129
|
+
|
130
|
+
def scp_upload addr, local, remote, options={}
|
131
|
+
future = MR::Future.new self, @loop
|
132
|
+
queue = (@scp_queue[addr] ||= [])
|
133
|
+
|
134
|
+
if (scp = @sessions[addr].scp)
|
135
|
+
#if (scp = @scps[addr]) or
|
136
|
+
# (@sessions[addr] and @sessions[addr].ssh and
|
137
|
+
# scp = (@scps[addr] = @sessions[addr].scp))
|
138
|
+
scp_upload_sub scp, addr, future, local, remote, options
|
139
|
+
else
|
140
|
+
queue.push [:up, future, addr, remote, local, options]
|
141
|
+
end
|
142
|
+
future
|
143
|
+
end
|
144
|
+
|
145
|
+
def scp_upload_sub scp, addr, future, local, remote, options
|
146
|
+
scpid = @scpid
|
147
|
+
@scpid += 1
|
148
|
+
@scptable[scpid] = future
|
149
|
+
scp.upload(local, remote, options) {|ch, name, sent, total|
|
150
|
+
if sent >= total
|
151
|
+
future.set_result nil, sent
|
152
|
+
@scptable.delete scpid
|
153
|
+
end
|
154
|
+
}
|
155
|
+
end
|
156
|
+
|
157
|
+
def scp_download addr, remote, local, options={}
|
158
|
+
future = MR::Future.new self, @loop
|
159
|
+
queue = (@scp_queue[addr] ||= [])
|
160
|
+
|
161
|
+
if (scp = @sessions[addr].scp)
|
162
|
+
#if (scp = @scps[addr]) or
|
163
|
+
# (@sessions[addr] and @sessions[addr].ssh and
|
164
|
+
# scp = (@scps[addr] = @sessions[addr].scp))
|
165
|
+
scp_download_sub scp, addr, future, remote, local, options
|
166
|
+
else
|
167
|
+
queue.push [:down, future, addr, remote, local, options]
|
168
|
+
end
|
169
|
+
future
|
170
|
+
end
|
171
|
+
|
172
|
+
def scp_download_sub scp, addr, future, remote, local, options
|
173
|
+
session = @sessions[addr]
|
174
|
+
if !session or session.scp_session_count > 5
|
175
|
+
queue = (@scp_queue[addr] ||= [])
|
176
|
+
queue.push [:down, future, addr, remote, local, options]
|
177
|
+
return
|
178
|
+
end
|
179
|
+
session.scp_session_count += 1
|
180
|
+
|
181
|
+
scpid = @scpid
|
182
|
+
@scpid += 1
|
183
|
+
@scptable[scpid] = future
|
184
|
+
channel = scp.download(remote, local, options)
|
185
|
+
channel.on_eof {|ch|
|
186
|
+
session.scp_session_count -= 1
|
187
|
+
@loop.set_timer(0) {process_scp_queue_once addr}
|
188
|
+
|
189
|
+
future.set_result(nil, options[:set_result])
|
190
|
+
@scptable.delete scpid
|
191
|
+
}
|
192
|
+
channel.on_open_failed {|ch, code, desc|
|
193
|
+
Log.error "#{addr}: scp error: #{desc}"
|
194
|
+
err = RuntimeError.new "scp error: #{desc}"
|
195
|
+
@on_error.call addr, err
|
196
|
+
session.scp_session_count -= 1
|
197
|
+
@loop.set_timer(0) {process_scp_queue_once addr}
|
198
|
+
|
199
|
+
future.set_result(nil, options[:set_result])
|
200
|
+
@scptable.delete scpid
|
201
|
+
}
|
202
|
+
end
|
203
|
+
|
204
|
+
def process_scp_queue_once addr
|
205
|
+
#scp = @scps[addr]
|
206
|
+
scp = @sessions[addr].scp
|
207
|
+
queue = (@scp_queue[addr] ||= [])
|
208
|
+
if scp and !queue.empty?
|
209
|
+
updown, future, addr, remote, local, options = queue.shift
|
210
|
+
case updown
|
211
|
+
when :down
|
212
|
+
scp_download_sub scp, addr, future, remote, local, options
|
213
|
+
when :up
|
214
|
+
scp_upload_sub scp, addr, future, local, remote, options
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
class MRSession < MultiSession
|
221
|
+
include MR::MessageReceiver
|
222
|
+
|
223
|
+
def initialize addrs, options={}, loop=nil
|
224
|
+
super
|
225
|
+
|
226
|
+
@reqtable = {}
|
227
|
+
@seqid = 0
|
228
|
+
|
229
|
+
program_name = options[:program_name] || 'pmux'
|
230
|
+
@cmd = "#{program_name} --server"
|
231
|
+
end
|
232
|
+
|
233
|
+
def connect
|
234
|
+
for addr in @addrs
|
235
|
+
connect_to_addr addr, @cmd + " --ipaddr=#{addr}"
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def setup_channel ch
|
240
|
+
pac = MessagePack::Unpacker.new
|
241
|
+
ch.on_data {|c, data|
|
242
|
+
pac.feed_each(data) {|obj| on_message obj}
|
243
|
+
}
|
244
|
+
ch.on_extended_data {|c, type, data|
|
245
|
+
#STDERR.puts c.connection.host+': '+data
|
246
|
+
}
|
247
|
+
ch.on_close {|c| error_on_addr ch.connection.host}
|
248
|
+
end
|
249
|
+
|
250
|
+
def error_on_addr addr, err=nil
|
251
|
+
super
|
252
|
+
err ||= 'closed'
|
253
|
+
@reqtable.select {|msgid, f| f.addr == addr}.each {|msgid, f|
|
254
|
+
f.set_result err, nil
|
255
|
+
}
|
256
|
+
end
|
257
|
+
|
258
|
+
def call_async addr, method, *args
|
259
|
+
send_request addr, method, args
|
260
|
+
end
|
261
|
+
|
262
|
+
def multicast_call_async method, *args
|
263
|
+
mf = MR::MultiFuture.new
|
264
|
+
for addr in @addrs
|
265
|
+
future = send_request addr, method, args
|
266
|
+
mf.add future
|
267
|
+
end
|
268
|
+
mf
|
269
|
+
end
|
270
|
+
|
271
|
+
def on_response msgid, error, result
|
272
|
+
if (future = @reqtable.delete msgid)
|
273
|
+
future.set_result error, result
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
private
|
278
|
+
def send_request addr, method, param
|
279
|
+
method = method.to_s
|
280
|
+
msgid = @seqid
|
281
|
+
@seqid += 1; if @seqid >= 1<<31 then @seqid = 0 end
|
282
|
+
data = [MR::REQUEST, msgid, method, param].to_msgpack
|
283
|
+
if (ch = @channels[addr])
|
284
|
+
ch.send_data data
|
285
|
+
else
|
286
|
+
(@buffers[addr] ||= []).push data
|
287
|
+
end
|
288
|
+
future = MR::Future.new self, @loop
|
289
|
+
future.addr = addr
|
290
|
+
@reqtable[msgid] = future
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
class MR::MultiFuture
|
296
|
+
# monkey patch for MR::MultiFuture#join_all
|
297
|
+
def join_all
|
298
|
+
@not_joined.dup.each {|future|
|
299
|
+
future.join
|
300
|
+
}
|
301
|
+
@all
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
class MR::UNIXClientTransport::ClientSocket
|
306
|
+
def on_close
|
307
|
+
raise MR::TransportError, 'MR::UNIXClientTransport::ClientSocket on close'
|
308
|
+
end
|
309
|
+
end
|
data/lib/pmux/pipeio.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Pmux
|
2
|
+
class PipeIO < Coolio::IO
|
3
|
+
def initialize cmd, mode='r'
|
4
|
+
io = IO.popen cmd, mode
|
5
|
+
super io
|
6
|
+
@on_receive = nil
|
7
|
+
end
|
8
|
+
|
9
|
+
def on_receive &block
|
10
|
+
@on_receive = block
|
11
|
+
end
|
12
|
+
|
13
|
+
def on_read data
|
14
|
+
if @on_receive
|
15
|
+
@on_receive.call data
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/pmux/plugin.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module Pmux
|
2
|
+
class PluginClass
|
3
|
+
def initialize
|
4
|
+
end
|
5
|
+
|
6
|
+
def load_plugins
|
7
|
+
dir = File.join File.dirname(__FILE__), 'plugin'
|
8
|
+
load_plugin_dir dir
|
9
|
+
end
|
10
|
+
|
11
|
+
def load_plugin_dir dir
|
12
|
+
dir = File.expand_path dir
|
13
|
+
return unless File.directory? dir
|
14
|
+
Dir.entries(dir).sort.each {|fname|
|
15
|
+
if fname =~ /\.rb$/
|
16
|
+
require File.join(dir, fname)
|
17
|
+
end
|
18
|
+
}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
Plugin = PluginClass.new
|
23
|
+
end
|
data/lib/pmux/q.rb
ADDED
data/lib/pmux/reducer.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
module Pmux
|
2
|
+
class Reducer
|
3
|
+
attr_reader :exitstatus, :output_path
|
4
|
+
attr_accessor :tmp_dir
|
5
|
+
|
6
|
+
def initialize task, tmp_dir, loop=nil
|
7
|
+
@task = task
|
8
|
+
@tmp_dir = tmp_dir
|
9
|
+
@loop = loop
|
10
|
+
@paths = Dir.glob("#{tmp_dir}/t*-#{task['pindex']}")
|
11
|
+
@output_path = nil
|
12
|
+
@on_receive = nil
|
13
|
+
@on_success = nil
|
14
|
+
@on_error = nil
|
15
|
+
raise RuntimeError, 'no input files' if @paths.empty?
|
16
|
+
end
|
17
|
+
|
18
|
+
def do_reduce_task; end
|
19
|
+
end
|
20
|
+
|
21
|
+
class StreamingReducer < Reducer
|
22
|
+
include FixCmdLine
|
23
|
+
|
24
|
+
def on_receive &block
|
25
|
+
@on_receive = block
|
26
|
+
end
|
27
|
+
def on_success &block
|
28
|
+
@on_success = block
|
29
|
+
end
|
30
|
+
def on_error &block
|
31
|
+
@on_error = block
|
32
|
+
end
|
33
|
+
|
34
|
+
def do_reduce_task
|
35
|
+
reducer_cmd = @task['reducer'] || 'cat'
|
36
|
+
@output_path = "#{@tmp_dir}/r#{@task['pindex']}"
|
37
|
+
err_path = "#{@tmp_dir}/.err.#{$$}"
|
38
|
+
err_msg = nil
|
39
|
+
cmd_line = fix_cmd_line reducer_cmd,
|
40
|
+
@paths.join(' '), @output_path, err_path, tmp_dir
|
41
|
+
Log.debug "system: #{cmd_line}"
|
42
|
+
system cmd_line
|
43
|
+
@exitstatus = $?.exitstatus
|
44
|
+
if File.size? err_path
|
45
|
+
err_msg = File.read(err_path).chomp!
|
46
|
+
raise RuntimeError, err_msg
|
47
|
+
end
|
48
|
+
if @exitstatus > 1
|
49
|
+
raise RuntimeError, "failed to execute reducer: #{cmd_line}"
|
50
|
+
end
|
51
|
+
@output_path
|
52
|
+
end
|
53
|
+
|
54
|
+
def do_streaming_reduce_task
|
55
|
+
reducer_cmd = @task['reducer'] || 'cat'
|
56
|
+
@output_path = "#{@tmp_dir}/r#{@task['pindex']}"
|
57
|
+
err_path = "#{@tmp_dir}/.rerr.#{$$}"
|
58
|
+
err_msg = nil
|
59
|
+
cmd_line = fix_cmd_line reducer_cmd,
|
60
|
+
@paths.join(' '), nil, err_path, tmp_dir
|
61
|
+
Log.debug "popen: #{cmd_line}"
|
62
|
+
pipeio = PipeIO.new cmd_line
|
63
|
+
if @on_receive
|
64
|
+
pipeio.on_receive &@on_receive
|
65
|
+
else
|
66
|
+
out = open(@output_path, 'a')
|
67
|
+
pipeio.on_receive {|data|
|
68
|
+
out.write data
|
69
|
+
}
|
70
|
+
end
|
71
|
+
on_success = @on_success
|
72
|
+
on_error = @on_error
|
73
|
+
pipeio.on_close {
|
74
|
+
if out
|
75
|
+
out.close rescue nil
|
76
|
+
end
|
77
|
+
if File.size? err_path
|
78
|
+
err_msg = File.read(err_path).chomp!
|
79
|
+
#raise RuntimeError, err_msg
|
80
|
+
e = RuntimeError.new err_msg
|
81
|
+
e.set_backtrace ['reducer']
|
82
|
+
on_error.call e if on_error
|
83
|
+
else
|
84
|
+
on_success.call self if on_success
|
85
|
+
end
|
86
|
+
}
|
87
|
+
@loop.attach pipeio
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module Pmux
|
5
|
+
class StorageAdapter
|
6
|
+
Adapters = {}
|
7
|
+
extend Forwardable
|
8
|
+
def_delegators :@h, :each, :size, :[], :[]=, :update, :delete
|
9
|
+
|
10
|
+
def self.create name, addrs, options={}
|
11
|
+
adapter_class = Adapters[name] || GlusterFSAdapter
|
12
|
+
adapter_class.new addrs, options
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_reader :addrs
|
16
|
+
|
17
|
+
def initialize addrs=[], options={}
|
18
|
+
@addrs = addrs
|
19
|
+
@options = options
|
20
|
+
@h = init_node_hash addrs
|
21
|
+
end
|
22
|
+
|
23
|
+
def init_node_hash addrs
|
24
|
+
Hash[*(addrs.map {|addr| [addr, {}]}).flatten]
|
25
|
+
end
|
26
|
+
|
27
|
+
def connect_to_storage locator_host, locator_port
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
module Pmux
|
33
|
+
class LocalAdapter < StorageAdapter
|
34
|
+
Adapters['local'] = self
|
35
|
+
|
36
|
+
def get_files args, glob_flag=false
|
37
|
+
if glob_flag
|
38
|
+
Dir.glob args.join("\0")
|
39
|
+
else
|
40
|
+
args
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def _get_file_locations files, glob_flag=false
|
45
|
+
files = Dir.glob files.join("\0") if glob_flag
|
46
|
+
files.map {|file|
|
47
|
+
a = addrs.map {|addr| [addr, file]}
|
48
|
+
addrs.push addrs.shift
|
49
|
+
[file, a]
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
def lookup_file file
|
54
|
+
res = addrs.map {|addr| [addr, file]}
|
55
|
+
addrs.push addrs.shift
|
56
|
+
res
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
module Pmux
|
62
|
+
class GlusterFSAdapter < StorageAdapter
|
63
|
+
Adapters['glusterfs'] = self
|
64
|
+
|
65
|
+
def getaddr host
|
66
|
+
sa = Socket.pack_sockaddr_in 0, host
|
67
|
+
port, addr = Socket.unpack_sockaddr_in sa
|
68
|
+
addr
|
69
|
+
end
|
70
|
+
|
71
|
+
def connect_to_storage locator_host, locator_port
|
72
|
+
locator_port ||= 7076
|
73
|
+
@client = MR::Client.new locator_host, locator_port
|
74
|
+
@client.timeout = 3600 #FIXME
|
75
|
+
@client
|
76
|
+
end
|
77
|
+
|
78
|
+
def get_files args, glob_flag=false
|
79
|
+
raise RuntimeError, 'not connected' unless @client
|
80
|
+
result = @client.call :get_locations, args, glob_flag
|
81
|
+
@locations = result
|
82
|
+
|
83
|
+
# hostname -> addr
|
84
|
+
hosts = {}
|
85
|
+
for vs in result.values
|
86
|
+
for host, path in vs
|
87
|
+
hosts[host] = true
|
88
|
+
end
|
89
|
+
end
|
90
|
+
@host2addr = {}
|
91
|
+
@addrs = hosts.keys.map {|host| addr = getaddr host
|
92
|
+
@host2addr[host] = addr}
|
93
|
+
@h = init_node_hash @addrs
|
94
|
+
result.keys # files
|
95
|
+
end
|
96
|
+
|
97
|
+
def lookup_file file
|
98
|
+
if (res = @locations[file])
|
99
|
+
res.map {|host, path| [@host2addr[host], path]}
|
100
|
+
else
|
101
|
+
nil
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
module Pmux
|
2
|
+
class TaskDispatcher
|
3
|
+
attr_reader :options, :adapter
|
4
|
+
attr_reader :msession, :scheduler, :gatherer
|
5
|
+
attr_reader :mf_shuffle
|
6
|
+
attr_reader :jl
|
7
|
+
|
8
|
+
def initialize options, adapter, msession, gatherer=nil
|
9
|
+
@options = options
|
10
|
+
@adapter = adapter
|
11
|
+
@msession = msession
|
12
|
+
@gatherer = gatherer || Gatherer.new(STDOUTWriter.new)
|
13
|
+
@scheduler = TaskScheduler.new adapter
|
14
|
+
|
15
|
+
@verbose = options[:verbose]
|
16
|
+
@on_error = proc {|r|
|
17
|
+
$stderr.write "%s: %s, %s\n" %
|
18
|
+
[r['error'], r['error_message'], r['backtrace']]
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def run job
|
23
|
+
if job.num_r.nonzero?
|
24
|
+
@mf_shuffle = MR::MultiFuture.new
|
25
|
+
end
|
26
|
+
@jl = Joblogger.new options[:log_dir], job
|
27
|
+
scheduler.push_job job
|
28
|
+
scheduler.attach_flush_callback {|node_addr, task|
|
29
|
+
t = task.dup
|
30
|
+
[:node_addrs, :alloc_time].each {|k| t.delete k}
|
31
|
+
Log.info "send task #{t[:task_id]} to #{node_addr}"
|
32
|
+
printf "send task %s to %s\n", t[:task_id], node_addr if @verbose
|
33
|
+
future = msession.call_async node_addr, :exec_streaming_task, t
|
34
|
+
future.attach_callback {|f| receive_result job, f.get}
|
35
|
+
# err callback ?
|
36
|
+
}
|
37
|
+
|
38
|
+
# init job
|
39
|
+
wdir = "#{options[:tmp_dir]}/w#{job.id}"
|
40
|
+
Dir.mkdir wdir
|
41
|
+
jl.dump_header
|
42
|
+
jl.sep
|
43
|
+
Log.init "#{options[:log_dir] or wdir}/dispatcher.log"
|
44
|
+
|
45
|
+
puts 'send "init_job"' if @verbose
|
46
|
+
mf_init = msession.multicast_call_async :init_job, job.id
|
47
|
+
mf_init.on_success {|future|
|
48
|
+
addr = future.addr
|
49
|
+
res = future.get
|
50
|
+
job_dir = adapter[addr][:job_dir] = res['job_dir'] # remote job_dir
|
51
|
+
adapter[addr][:num_workers] = res['num_cpu'] || 2
|
52
|
+
printf "%s: remote job_dir = %s\n", addr, res['job_dir'] if @verbose
|
53
|
+
# scp ship_files to remote job_dir
|
54
|
+
if (ship_files = options[:ship_files])
|
55
|
+
mf_scp = msession.scp_upload_files addr, ship_files, job_dir
|
56
|
+
mf_scp.on_all {
|
57
|
+
scheduler.shipped[addr] = true
|
58
|
+
scheduler.process_queue
|
59
|
+
}
|
60
|
+
else
|
61
|
+
puts 'start scheduler' if @verbose
|
62
|
+
scheduler.shipped[addr] = true
|
63
|
+
scheduler.process_queue
|
64
|
+
end
|
65
|
+
}
|
66
|
+
mf_init.on_error {job.taskhash.clear}
|
67
|
+
|
68
|
+
# wait for all map tasks to finish
|
69
|
+
until job.completed?
|
70
|
+
msession.loop.run_once
|
71
|
+
end
|
72
|
+
if job.num_r.zero?
|
73
|
+
gatherer.join_all
|
74
|
+
else
|
75
|
+
mf_shuffle.join_all
|
76
|
+
# reduce phase
|
77
|
+
job.mk_reduce_tasks
|
78
|
+
scheduler.inject_tasks job.tasks
|
79
|
+
scheduler.process_queue
|
80
|
+
# wait for all reduce tasks to finish
|
81
|
+
until job.completed?
|
82
|
+
msession.loop.run_once
|
83
|
+
end
|
84
|
+
gatherer.join_all
|
85
|
+
end
|
86
|
+
|
87
|
+
Log.info "END"
|
88
|
+
job[:end_time] = Time.now
|
89
|
+
jl.dump_footer
|
90
|
+
jl.close
|
91
|
+
|
92
|
+
mf_quit = msession.multicast_call_async :quit
|
93
|
+
mf_quit.join_all
|
94
|
+
|
95
|
+
cleaner = Cleaner.new "#{options[:tmp_dir]}/w*"
|
96
|
+
cleaner.run
|
97
|
+
end
|
98
|
+
|
99
|
+
def on_error &block
|
100
|
+
@on_error = block
|
101
|
+
end
|
102
|
+
|
103
|
+
def receive_result job, result
|
104
|
+
task_id, node_addr, ifbase =
|
105
|
+
result.values_at 'task_id', 'node_addr', 'ifbase'
|
106
|
+
Log.info "receive result #{task_id} from #{node_addr}"
|
107
|
+
jl.add task_id, :node_addr=>node_addr, :ifbase=>ifbase,
|
108
|
+
:welapse=>result['welapse']
|
109
|
+
puts "receive result #{task_id} from #{node_addr}" if @verbose
|
110
|
+
if result['error']
|
111
|
+
if @on_error
|
112
|
+
@on_error.call result
|
113
|
+
else
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
if (ifbase = result['ifbase'])
|
118
|
+
if job.num_r.zero?
|
119
|
+
# no reducers; get the intermediate file from the mapper node
|
120
|
+
ifpath = ifbase + '-0'
|
121
|
+
local = "#{@options[:tmp_dir]}/w#{job.id}/#{File.basename(ifpath).sub /^[mr]/, 'w'}"
|
122
|
+
if (body = result['result_body'])
|
123
|
+
if body.size > 0
|
124
|
+
open(local, 'w') {|f| f.write body}
|
125
|
+
gatherer.writer.write local
|
126
|
+
end
|
127
|
+
else
|
128
|
+
puts "gather #{node_addr}:#{ifpath} -> #{local}" if @verbose
|
129
|
+
Log.info "gather #{node_addr}:#{ifpath} -> #{local}"
|
130
|
+
gatherer.gather msession, node_addr, ifpath, local
|
131
|
+
end
|
132
|
+
else
|
133
|
+
# send 'notify_reduce' message to the reducer node
|
134
|
+
job.reducers.each_with_index {|reducer_addr, pindex|
|
135
|
+
puts "send notify_reduce #{task_id} to #{reducer_addr}" if @verbose
|
136
|
+
future = msession.call_async reducer_addr, :notify_reduce,
|
137
|
+
:job_id=>job.id, :task_id=>task_id, :pindex=>pindex,
|
138
|
+
:node_addr=>node_addr, :ifbase=>ifbase
|
139
|
+
mf_shuffle.add future
|
140
|
+
}
|
141
|
+
end
|
142
|
+
elsif (output_path = result['output_path'])
|
143
|
+
# reduced result
|
144
|
+
local = "#{@options[:tmp_dir]}/w#{job.id}/#{File.basename(output_path).sub /^[mr]/, 'w'}"
|
145
|
+
gatherer.gather msession, node_addr, output_path, local
|
146
|
+
end
|
147
|
+
|
148
|
+
task = job.get_task_by_id task_id
|
149
|
+
task[:elapse] = Time.now - task[:alloc_time] if task[:alloc_time]
|
150
|
+
task[:welapse] = result['welapse']
|
151
|
+
|
152
|
+
# delete task
|
153
|
+
scheduler.delete_task_from_job job, task, node_addr
|
154
|
+
if (task_keys = result['task_keys'])
|
155
|
+
for tid in task_keys.keys
|
156
|
+
job.delete_task_by_id tid
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
scheduler.process_queue
|
161
|
+
|
162
|
+
perc = 100 * (job.tasks.size - job.taskhash.size) / job.tasks.size
|
163
|
+
task_type = result['map'] ? 'map' : 'reduce'
|
164
|
+
Log.info "done #{task_type} task #{result['task_id']} (#{perc}%)"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|