pmux 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/README.md +36 -0
- data/Rakefile +4 -0
- data/bin/pmux +5 -0
- data/lib/pmux/application.rb +166 -0
- data/lib/pmux/cleaner.rb +28 -0
- data/lib/pmux/fiber18.rb +64 -0
- data/lib/pmux/fixcmd.rb +25 -0
- data/lib/pmux/gatherer.rb +23 -0
- data/lib/pmux/handler.rb +262 -0
- data/lib/pmux/job.rb +101 -0
- data/lib/pmux/joblogger.rb +46 -0
- data/lib/pmux/mapper.rb +151 -0
- data/lib/pmux/mros.rb +207 -0
- data/lib/pmux/multi_session.rb +309 -0
- data/lib/pmux/pipeio.rb +19 -0
- data/lib/pmux/plugin.rb +23 -0
- data/lib/pmux/q.rb +3 -0
- data/lib/pmux/reducer.rb +90 -0
- data/lib/pmux/storage_adapter.rb +105 -0
- data/lib/pmux/task_dispatcher.rb +167 -0
- data/lib/pmux/task_queue.rb +11 -0
- data/lib/pmux/task_scheduler.rb +166 -0
- data/lib/pmux/util_daemon.rb +18 -0
- data/lib/pmux/util_logger.rb +137 -0
- data/lib/pmux/version.rb +3 -0
- data/lib/pmux/worker.rb +91 -0
- data/lib/pmux/writer.rb +19 -0
- data/lib/pmux.rb +27 -0
- data/pmux.gemspec +24 -0
- data/test/mock_mros.rb +284 -0
- data/test/mock_pipeio.rb +26 -0
- data/test/mock_world.rb +193 -0
- data/test/mock_xattr.rb +10 -0
- data/test/runner.rb +10 -0
- data/test/test_application.rb +13 -0
- data/test/test_fixcmd.rb +17 -0
- data/test/test_handler.rb +15 -0
- data/test/test_i_mapreduce.rb +169 -0
- data/test/test_i_mros.rb +28 -0
- data/test/test_i_msession.rb +27 -0
- data/test/test_job.rb +35 -0
- data/test/test_joblogger.rb +16 -0
- data/test/test_mapper.rb +60 -0
- data/test/test_pipeio.rb +24 -0
- data/test/test_storage_adapter.rb +63 -0
- data/test/test_task_queue.rb +87 -0
- data/test/test_task_scheduler.rb +39 -0
- data/test/txt/0.log +105 -0
- data/test/txt/1.log +105 -0
- data/test/txt/2.log +105 -0
- data/test/txt/3.log +105 -0
- data/test/txt/4.log +105 -0
- data/test/txt/5.log +105 -0
- data/test/txt/6.log +105 -0
- data/test/txt/7.log +105 -0
- data/test/txt/8.log +105 -0
- data/test/unittest_helper.rb +57 -0
- metadata +153 -0
data/lib/pmux/job.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Pmux
|
4
|
+
class Job
|
5
|
+
extend Forwardable
|
6
|
+
def_delegators :@h, :[], :[]=, :delete
|
7
|
+
attr_reader :tasks, :num_t, :num_r
|
8
|
+
attr_reader :taskhash
|
9
|
+
attr_reader :reducers
|
10
|
+
|
11
|
+
def initialize params, files
|
12
|
+
@params = params
|
13
|
+
@files = files
|
14
|
+
|
15
|
+
@task_id = 0
|
16
|
+
@num_r = @params[:num_r] || 0
|
17
|
+
@params[:job_name] ||= %Q{"#{@params[:mapper]}"}
|
18
|
+
|
19
|
+
@taskhash = {}
|
20
|
+
@done_taskhash = {}
|
21
|
+
@tasks = mk_tasks files
|
22
|
+
@num_t = @tasks.size + @num_r
|
23
|
+
@h = {:start_time=>Time.now,
|
24
|
+
:map_tasks=>@tasks.size, :reduce_tasks=>@num_r,
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def mk_tasks files
|
29
|
+
job_id = self.id
|
30
|
+
files.map {|file|
|
31
|
+
@task_id += 1
|
32
|
+
@taskhash[@task_id] = {:job_id=>job_id, :task_id=>@task_id,
|
33
|
+
:file=>file,
|
34
|
+
:mapper=>@params[:mapper], #:reducer=>@params[:reducer],
|
35
|
+
:num_r=>@num_r, :ff=>@params[:ff],
|
36
|
+
:separator=>@params[:separator],
|
37
|
+
:hist=>[],
|
38
|
+
}
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def mk_reducer_addrs addrs, num_r=nil
|
43
|
+
num_r ||= @num_r
|
44
|
+
step = addrs.size.to_f / num_r
|
45
|
+
@reducers = (0..num_r-1).map {|ind| addrs[step*ind]}
|
46
|
+
@reducers
|
47
|
+
end
|
48
|
+
|
49
|
+
def mk_reduce_tasks
|
50
|
+
pindex = 0
|
51
|
+
job_id = self.id
|
52
|
+
@tasks = reducers.map {|reducer_node_addr|
|
53
|
+
@task_id += 1
|
54
|
+
#task = make_reduce_task pindex, reducer_node_addr
|
55
|
+
task = {:pindex=>pindex, :job_id=>self.id, :task_id=>@task_id,
|
56
|
+
:node_addr=>reducer_node_addr,
|
57
|
+
:reducer=>@params[:reducer],
|
58
|
+
:hist=>[],
|
59
|
+
}
|
60
|
+
@taskhash[@task_id] = task
|
61
|
+
pindex += 1
|
62
|
+
task
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
def id
|
67
|
+
self.object_id.abs.to_s
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_task_by_id task_id
|
71
|
+
@taskhash[task_id]
|
72
|
+
end
|
73
|
+
|
74
|
+
def delete_task_by_id task_id
|
75
|
+
if (task = @taskhash[task_id])
|
76
|
+
task[:status] = :DONE
|
77
|
+
@done_taskhash[task_id] = task
|
78
|
+
@taskhash.delete task_id
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def completed?
|
83
|
+
@taskhash.empty?
|
84
|
+
end
|
85
|
+
|
86
|
+
def to_jlheader
|
87
|
+
h = {:id=>id, :files_first=>@files.first, :tasksize=>@tasks.size,
|
88
|
+
:params=>@params,
|
89
|
+
:start_time=>@h[:start_time],
|
90
|
+
:map_tasks=>@h[:map_tasks], :reduce_tasks=>@h[:reduce_tasks],
|
91
|
+
:storage_name=>@params[:storage_name],
|
92
|
+
:mapper=>@params[:mapper], :reducer=>@params[:reducer],
|
93
|
+
:num_r=>@params[:num_r],
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
def to_jlfooter
|
98
|
+
h = {:end_time=>@h[:end_time]}
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Pmux
|
4
|
+
class Joblogger
|
5
|
+
attr_reader :path
|
6
|
+
|
7
|
+
def initialize dir, job
|
8
|
+
@dir = dir
|
9
|
+
@job = job
|
10
|
+
if dir and File.directory? dir
|
11
|
+
@path = "#{dir}/#{job.id}.yml"
|
12
|
+
@f = open(@path, 'w')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def dump_header
|
17
|
+
dump @job.to_jlheader
|
18
|
+
end
|
19
|
+
|
20
|
+
def dump_footer
|
21
|
+
dump @job.to_jlfooter
|
22
|
+
end
|
23
|
+
|
24
|
+
def dump obj
|
25
|
+
@f.print YAML.dump(obj) if @f
|
26
|
+
end
|
27
|
+
|
28
|
+
def sep
|
29
|
+
return unless @f
|
30
|
+
@f.puts '---'
|
31
|
+
@f.flush
|
32
|
+
end
|
33
|
+
|
34
|
+
def add key, obj
|
35
|
+
return unless @f
|
36
|
+
@f.puts "#{key}:"
|
37
|
+
for k, v in obj
|
38
|
+
@f.puts " #{k}: #{v.inspect}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def close
|
43
|
+
@f.close if @f
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/pmux/mapper.rb
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
module Pmux
|
2
|
+
class Mapper
|
3
|
+
attr_accessor :tmp_dir
|
4
|
+
attr_accessor :num_r
|
5
|
+
attr_reader :ifbase
|
6
|
+
attr_reader :exitstatus
|
7
|
+
|
8
|
+
def initialize task, tmp_dir, loop=nil
|
9
|
+
@task = task
|
10
|
+
@tmp_dir = tmp_dir
|
11
|
+
@exitstatus = nil
|
12
|
+
job_id, task_id, fusion_id =
|
13
|
+
task.values_at 'job_id', 'task_id', 'fusion_id'
|
14
|
+
@path = task['path']
|
15
|
+
@num_r = task['num_r'] || 0
|
16
|
+
@loop = loop
|
17
|
+
@on_receive = nil
|
18
|
+
@on_success = nil
|
19
|
+
@on_error = nil
|
20
|
+
if false
|
21
|
+
else
|
22
|
+
@ifbase = "#{tmp_dir}/m#{fusion_id or task_id}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def do_map_task; end
|
27
|
+
def result_size; end
|
28
|
+
def result_body; end
|
29
|
+
end
|
30
|
+
|
31
|
+
class StreamingMapper < Mapper
|
32
|
+
include FixCmdLine
|
33
|
+
CHUNK_SIZE = 8192
|
34
|
+
|
35
|
+
def on_success &block
|
36
|
+
@on_success = block
|
37
|
+
end
|
38
|
+
def on_error &block
|
39
|
+
@on_error = block
|
40
|
+
end
|
41
|
+
|
42
|
+
def do_map_task
|
43
|
+
mapper_cmd = @task['mapper'] || 'cat'
|
44
|
+
err_path = "#{tmp_dir}/.err.#{$$}"
|
45
|
+
err_msg = nil
|
46
|
+
if @num_r <= 1
|
47
|
+
cmd_line = fix_cmd_line mapper_cmd,
|
48
|
+
@path, "#{@ifbase}-0", err_path, tmp_dir
|
49
|
+
Log.debug "system: #{cmd_line}"
|
50
|
+
system cmd_line
|
51
|
+
else # @num_r >= 2
|
52
|
+
partitioner = TextPartitioner.new @ifbase, @num_r,
|
53
|
+
:separator=>@task['separator']
|
54
|
+
cmd_line = fix_cmd_line mapper_cmd, @path, nil, err_path, tmp_dir
|
55
|
+
IO.popen(cmd_line, 'r') {|io|
|
56
|
+
until io.eof?
|
57
|
+
data = io.read CHUNK_SIZE
|
58
|
+
partitioner.emit data
|
59
|
+
end
|
60
|
+
}
|
61
|
+
partitioner.close
|
62
|
+
end
|
63
|
+
@exitstatus = $?.exitstatus
|
64
|
+
if File.size? err_path
|
65
|
+
err_msg = File.read(err_path).chomp!
|
66
|
+
raise RuntimeError, err_msg
|
67
|
+
end
|
68
|
+
if @exitstatus > 1
|
69
|
+
raise RuntimeError, "failed to execute mapper: #{cmd_line}"
|
70
|
+
end
|
71
|
+
@ifbase
|
72
|
+
end
|
73
|
+
|
74
|
+
def do_streaming_map_task
|
75
|
+
mapper_cmd = @task['mapper'] || 'cat'
|
76
|
+
err_path = "#{tmp_dir}/.err.#{object_id}"
|
77
|
+
err_msg = nil
|
78
|
+
if @num_r <= 1
|
79
|
+
cmd_line = fix_cmd_line mapper_cmd,
|
80
|
+
@path, nil, err_path, tmp_dir
|
81
|
+
Log.debug "pipe: #{cmd_line}"
|
82
|
+
pipeio = PipeIO.new cmd_line
|
83
|
+
out = open("#{@ifbase}-0", 'a')
|
84
|
+
pipeio.on_receive {|data| out.write data}
|
85
|
+
else # @num_r >= 2
|
86
|
+
partitioner = TextPartitioner.new @ifbase, @num_r,
|
87
|
+
:separator=>@task['separator']
|
88
|
+
cmd_line = fix_cmd_line mapper_cmd, @path, nil, err_path, tmp_dir
|
89
|
+
pipeio = PipeIO.new cmd_line
|
90
|
+
pipeio.on_receive {|data| partitioner.emit data}
|
91
|
+
end
|
92
|
+
on_success = @on_success
|
93
|
+
on_error = @on_error
|
94
|
+
pipeio.on_close {
|
95
|
+
if out
|
96
|
+
out.close rescue nil
|
97
|
+
end
|
98
|
+
if partitioner
|
99
|
+
partitioner.close
|
100
|
+
end
|
101
|
+
#@exitstatus = $?.exitstatus
|
102
|
+
if File.size? err_path
|
103
|
+
err_msg = File.read(err_path).chomp!
|
104
|
+
e = RuntimeError.new err_msg
|
105
|
+
e.set_backtrace ['mapper']
|
106
|
+
on_error.call e if on_error
|
107
|
+
else
|
108
|
+
on_success.call if on_success
|
109
|
+
end
|
110
|
+
}
|
111
|
+
@loop.attach pipeio
|
112
|
+
end
|
113
|
+
|
114
|
+
def result_size
|
115
|
+
@num_r <= 1 ? File.size?("#{@ifbase}-0") : nil
|
116
|
+
end
|
117
|
+
|
118
|
+
def result_body
|
119
|
+
@num_r <= 1 ? File.read("#{@ifbase}-0") : nil
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
class TextPartitioner
|
124
|
+
def initialize ifbase, num_r, options={}
|
125
|
+
@ifbase = ifbase
|
126
|
+
@num_r = num_r
|
127
|
+
@ifiles = (0..(num_r-1)).map {|n| open("#{ifbase}-#{n}", 'w')}
|
128
|
+
@rbuf = ''
|
129
|
+
if (sep = options[:separator])
|
130
|
+
@separator_re = Regexp.new sep
|
131
|
+
else
|
132
|
+
@separator_re = /\t/
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def emit data
|
137
|
+
@rbuf << data
|
138
|
+
while true
|
139
|
+
break unless @rbuf =~ /\n/
|
140
|
+
line, s = @rbuf.split /^/, 2
|
141
|
+
key, = line.split @separator_re, 2
|
142
|
+
@ifiles[key.hash % @num_r].write line
|
143
|
+
@rbuf.replace(s || '')
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def close
|
148
|
+
@ifiles.each {|io| io.close}
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
data/lib/pmux/mros.rb
ADDED
@@ -0,0 +1,207 @@
|
|
1
|
+
# msgpack-rpc over ssh
|
2
|
+
|
3
|
+
require 'net/ssh'
|
4
|
+
if require 'msgpack/rpc'
|
5
|
+
require 'msgpack/rpc/transport/unix'
|
6
|
+
MR = MessagePack::RPC
|
7
|
+
end
|
8
|
+
if defined? Rev
|
9
|
+
Coolio = Rev
|
10
|
+
end
|
11
|
+
require 'net/scp'
|
12
|
+
|
13
|
+
class Net::SSH::Compat
|
14
|
+
class Watcher < Coolio::IOWatcher
|
15
|
+
def initialize ruby_io, flags, fiber, watchers
|
16
|
+
@ruby_io = ruby_io
|
17
|
+
@fiber = fiber
|
18
|
+
@watchers = watchers
|
19
|
+
super ruby_io, flags
|
20
|
+
end
|
21
|
+
|
22
|
+
def on_readable
|
23
|
+
cleanup
|
24
|
+
@fiber.resume([[@ruby_io]])
|
25
|
+
end
|
26
|
+
def on_writable
|
27
|
+
cleanup
|
28
|
+
@fiber.resume([[], [@ruby_io]])
|
29
|
+
end
|
30
|
+
def cleanup
|
31
|
+
for w in @watchers
|
32
|
+
w.detach
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class TimerWatcher < Coolio::TimerWatcher
|
38
|
+
def initialize interval, fiber, watchers
|
39
|
+
@fiber = fiber
|
40
|
+
@watchers = watchers
|
41
|
+
super(interval, false)
|
42
|
+
end
|
43
|
+
|
44
|
+
def on_timer
|
45
|
+
for w in @watchers; w.detach; end
|
46
|
+
@fiber.resume(nil)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class <<self
|
51
|
+
@@loop = Coolio::Loop.default
|
52
|
+
|
53
|
+
def coolio_loop
|
54
|
+
@@loop
|
55
|
+
end
|
56
|
+
def coolio_loop=(loop)
|
57
|
+
@@loop = loop
|
58
|
+
end
|
59
|
+
|
60
|
+
alias :io_select0 :io_select
|
61
|
+
def io_select reads, writes=nil, excepts=nil, timeout=nil
|
62
|
+
if timeout and timeout.zero?
|
63
|
+
io_select0 reads, writes, excepts, 0
|
64
|
+
else
|
65
|
+
writes ||= []
|
66
|
+
loop = @@loop
|
67
|
+
|
68
|
+
watchers = []
|
69
|
+
rw = reads & writes
|
70
|
+
for io in (reads | writes)
|
71
|
+
if rw.include? io
|
72
|
+
flag = :rw
|
73
|
+
elsif reads.include? io
|
74
|
+
flag = :r
|
75
|
+
else
|
76
|
+
flag = :w
|
77
|
+
end
|
78
|
+
watcher = Watcher.new io, flag, Fiber.current, watchers
|
79
|
+
watchers.push watcher
|
80
|
+
watcher.attach loop
|
81
|
+
end
|
82
|
+
if timeout
|
83
|
+
watcher = TimerWatcher.new timeout, Fiber.current, watchers
|
84
|
+
watchers.push watcher
|
85
|
+
watcher.attach loop
|
86
|
+
end
|
87
|
+
Fiber.yield
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
class Net::SSH::Connection::Session
|
94
|
+
attr_accessor :coolio_loop
|
95
|
+
|
96
|
+
def floop wait=nil
|
97
|
+
while true
|
98
|
+
break if closed?
|
99
|
+
#break unless busy?
|
100
|
+
loop wait
|
101
|
+
@coolio_loop.waitings[Fiber.current] = self
|
102
|
+
Fiber.yield
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
module Coolio
|
108
|
+
class Loop
|
109
|
+
attr_reader :waitings
|
110
|
+
def start_ssh *args, &block
|
111
|
+
@waitings ||= {}
|
112
|
+
fiber = Fiber.new {
|
113
|
+
begin
|
114
|
+
ssh = Net::SSH.start *args
|
115
|
+
ssh.coolio_loop = self
|
116
|
+
block.call ssh
|
117
|
+
rescue => e
|
118
|
+
block.call e
|
119
|
+
end
|
120
|
+
}
|
121
|
+
fiber.resume
|
122
|
+
end
|
123
|
+
|
124
|
+
def run
|
125
|
+
raise RuntimeError, "no watchers for this loop" if @watchers.empty?
|
126
|
+
|
127
|
+
@running = true
|
128
|
+
while @running and not @active_watchers.zero?
|
129
|
+
run_once
|
130
|
+
if @waitings and !@waitings.empty?
|
131
|
+
busy_sessions = @waitings.select {|f, s| s.busy?}
|
132
|
+
for fiber, ssh in busy_sessions
|
133
|
+
@waitings.delete fiber
|
134
|
+
fiber.resume
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
@running = false
|
139
|
+
end
|
140
|
+
|
141
|
+
def set_timer(interval, repeating=false, &block)
|
142
|
+
timer = TimerWatcher.new interval, repeating
|
143
|
+
timer.on_timer {
|
144
|
+
block.call
|
145
|
+
timer.detach unless repeating
|
146
|
+
}
|
147
|
+
timer.attach self
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
module MR
|
153
|
+
class PipeTransport
|
154
|
+
def initialize ruby_in, ruby_out, ruby_err
|
155
|
+
@ruby_in = ruby_in
|
156
|
+
@ruby_out = ruby_out
|
157
|
+
@ruby_err = ruby_err
|
158
|
+
end
|
159
|
+
|
160
|
+
def listen server
|
161
|
+
pout = PipeOut.new @ruby_out
|
162
|
+
pin = PipeIn.new @ruby_in, server, pout
|
163
|
+
perr = PipeOut.new @ruby_err
|
164
|
+
server.loop.attach pout
|
165
|
+
server.loop.attach pin
|
166
|
+
server.loop.attach perr
|
167
|
+
end
|
168
|
+
|
169
|
+
def close
|
170
|
+
end
|
171
|
+
|
172
|
+
class PipeOut < Coolio::IO
|
173
|
+
def initialize ruby_io
|
174
|
+
@ruby_io = ruby_io
|
175
|
+
super ruby_io
|
176
|
+
end
|
177
|
+
|
178
|
+
def on_readable
|
179
|
+
end
|
180
|
+
|
181
|
+
def send_data data
|
182
|
+
@ruby_io.write data
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
class PipeIn < Coolio::IO
|
187
|
+
include MR::MessageReceiver
|
188
|
+
|
189
|
+
def initialize ruby_io, server, pout
|
190
|
+
@ruby_io = ruby_io
|
191
|
+
super ruby_io
|
192
|
+
@server = server
|
193
|
+
@pout = pout
|
194
|
+
@pac = MessagePack::Unpacker.new
|
195
|
+
end
|
196
|
+
def on_read data
|
197
|
+
@pac.feed_each(data) {|obj| on_message obj}
|
198
|
+
end
|
199
|
+
def on_request msgid, method, param
|
200
|
+
@server.on_request @pout, msgid, method, param
|
201
|
+
end
|
202
|
+
def on_close
|
203
|
+
on_request 0, 'quit', []
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|