pmux 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/README.md +36 -0
- data/Rakefile +4 -0
- data/bin/pmux +5 -0
- data/lib/pmux/application.rb +166 -0
- data/lib/pmux/cleaner.rb +28 -0
- data/lib/pmux/fiber18.rb +64 -0
- data/lib/pmux/fixcmd.rb +25 -0
- data/lib/pmux/gatherer.rb +23 -0
- data/lib/pmux/handler.rb +262 -0
- data/lib/pmux/job.rb +101 -0
- data/lib/pmux/joblogger.rb +46 -0
- data/lib/pmux/mapper.rb +151 -0
- data/lib/pmux/mros.rb +207 -0
- data/lib/pmux/multi_session.rb +309 -0
- data/lib/pmux/pipeio.rb +19 -0
- data/lib/pmux/plugin.rb +23 -0
- data/lib/pmux/q.rb +3 -0
- data/lib/pmux/reducer.rb +90 -0
- data/lib/pmux/storage_adapter.rb +105 -0
- data/lib/pmux/task_dispatcher.rb +167 -0
- data/lib/pmux/task_queue.rb +11 -0
- data/lib/pmux/task_scheduler.rb +166 -0
- data/lib/pmux/util_daemon.rb +18 -0
- data/lib/pmux/util_logger.rb +137 -0
- data/lib/pmux/version.rb +3 -0
- data/lib/pmux/worker.rb +91 -0
- data/lib/pmux/writer.rb +19 -0
- data/lib/pmux.rb +27 -0
- data/pmux.gemspec +24 -0
- data/test/mock_mros.rb +284 -0
- data/test/mock_pipeio.rb +26 -0
- data/test/mock_world.rb +193 -0
- data/test/mock_xattr.rb +10 -0
- data/test/runner.rb +10 -0
- data/test/test_application.rb +13 -0
- data/test/test_fixcmd.rb +17 -0
- data/test/test_handler.rb +15 -0
- data/test/test_i_mapreduce.rb +169 -0
- data/test/test_i_mros.rb +28 -0
- data/test/test_i_msession.rb +27 -0
- data/test/test_job.rb +35 -0
- data/test/test_joblogger.rb +16 -0
- data/test/test_mapper.rb +60 -0
- data/test/test_pipeio.rb +24 -0
- data/test/test_storage_adapter.rb +63 -0
- data/test/test_task_queue.rb +87 -0
- data/test/test_task_scheduler.rb +39 -0
- data/test/txt/0.log +105 -0
- data/test/txt/1.log +105 -0
- data/test/txt/2.log +105 -0
- data/test/txt/3.log +105 -0
- data/test/txt/4.log +105 -0
- data/test/txt/5.log +105 -0
- data/test/txt/6.log +105 -0
- data/test/txt/7.log +105 -0
- data/test/txt/8.log +105 -0
- data/test/unittest_helper.rb +57 -0
- metadata +153 -0
@@ -0,0 +1,166 @@
|
|
1
|
+
|
2
|
+
module Pmux
|
3
|
+
class TaskScheduler
|
4
|
+
attr_reader :shipped
|
5
|
+
|
6
|
+
def initialize adapter=nil
|
7
|
+
@adapter = adapter
|
8
|
+
@node_table = adapter
|
9
|
+
@job_table = {}
|
10
|
+
@task_queue = TaskQueue.new
|
11
|
+
@allocated_tasks = {}
|
12
|
+
@shipped = {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def push_job job
|
16
|
+
@job_table[job.id] = job
|
17
|
+
@task_queue.inject_tasks job.tasks
|
18
|
+
end
|
19
|
+
|
20
|
+
def inject_tasks tasks
|
21
|
+
@task_queue.inject_tasks tasks
|
22
|
+
end
|
23
|
+
|
24
|
+
def delete_task_from_job job, task, node_addr=nil
|
25
|
+
node_addr ||= task[:node_addr]
|
26
|
+
remove_allocated_task node_addr, job.id, task[:task_id]
|
27
|
+
job.delete_task_by_id task[:task_id]
|
28
|
+
end
|
29
|
+
|
30
|
+
def process_queue
|
31
|
+
job_map = {}
|
32
|
+
success_count = 0
|
33
|
+
fail_count = 0
|
34
|
+
fail_lim = @node_table.size * 2
|
35
|
+
tmp_queue = []
|
36
|
+
|
37
|
+
task_queue_size = @task_queue.size
|
38
|
+
loop_count = 0
|
39
|
+
while task = @task_queue.shift
|
40
|
+
if task[:pindex] and task[:node_addr]
|
41
|
+
allocated_p = allocate_reduce_task_to_node job_map, task
|
42
|
+
else
|
43
|
+
unless task[:node_addrs]
|
44
|
+
task[:node_addrs] = @adapter.lookup_file task[:file]
|
45
|
+
end
|
46
|
+
allocated_p = allocate_map_task_to_node job_map, task
|
47
|
+
end
|
48
|
+
if allocated_p
|
49
|
+
# success
|
50
|
+
fail_count = 0
|
51
|
+
else
|
52
|
+
# fail
|
53
|
+
tmp_queue.push task
|
54
|
+
fail_count += 1
|
55
|
+
end
|
56
|
+
|
57
|
+
loop_count += 1
|
58
|
+
|
59
|
+
break if loop_count >= task_queue_size
|
60
|
+
end
|
61
|
+
@task_queue.replace tmp_queue + @task_queue
|
62
|
+
|
63
|
+
flush_job_map job_map
|
64
|
+
end
|
65
|
+
|
66
|
+
#
|
67
|
+
def attach_flush_callback &block
|
68
|
+
@flush_callback = block
|
69
|
+
end
|
70
|
+
|
71
|
+
#
|
72
|
+
def flush_job_map job_map
|
73
|
+
for job_id, nt_map in job_map
|
74
|
+
for node_addr, fslots in nt_map
|
75
|
+
for fslot in fslots
|
76
|
+
if fslot.size > 1
|
77
|
+
task_keys =
|
78
|
+
Hash[*(fslot.map {|t| [t[:task_id], t[:path]]}).flatten]
|
79
|
+
task = fslot.first.merge :task_keys=>task_keys
|
80
|
+
@flush_callback.call node_addr, task if @flush_callback
|
81
|
+
else
|
82
|
+
@flush_callback.call node_addr, fslot.first if @flush_callback
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def allocate_task_to_node job_map, task, node_addr
|
90
|
+
if (ff = task[:ff]) and !task[:pindex]
|
91
|
+
# task fusion
|
92
|
+
job_id = task[:job_id]
|
93
|
+
if (nt_map = job_map[job_id]) and (fslots = nt_map[node_addr])
|
94
|
+
for fslot in fslots
|
95
|
+
if fslot.size < ff
|
96
|
+
fslot.push task
|
97
|
+
return true
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
#if @node_table.allocate_task node_addr, task
|
104
|
+
if allocate_task_to_slot node_addr, task
|
105
|
+
# success
|
106
|
+
job_id = task[:job_id]
|
107
|
+
nt_map = (job_map[job_id] ||= {})
|
108
|
+
|
109
|
+
#task = task.dup #???
|
110
|
+
task[:alloc_time] = Time.now
|
111
|
+
(nt_map[node_addr] ||= []).push [task]
|
112
|
+
|
113
|
+
return true
|
114
|
+
end
|
115
|
+
return false
|
116
|
+
end
|
117
|
+
|
118
|
+
#
|
119
|
+
def allocate_task_to_slot node_addr, task
|
120
|
+
if (node = @node_table[node_addr]) and @shipped[node_addr]
|
121
|
+
slot = (@allocated_tasks[node_addr] ||= [])
|
122
|
+
num_workers = node[:num_workers] || 2
|
123
|
+
if slot.size >= num_workers
|
124
|
+
return false
|
125
|
+
else
|
126
|
+
return false if slot.include? task
|
127
|
+
task[:node_addr] = node_addr
|
128
|
+
slot.push task
|
129
|
+
return true
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
#
|
135
|
+
def remove_allocated_task node_addr, job_id, task_id
|
136
|
+
if (slot = @allocated_tasks[node_addr])
|
137
|
+
slot.delete_if {|t| t[:job_id] == job_id and t[:task_id] == task_id}
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
#
|
142
|
+
def allocate_map_task_to_node job_map, task
|
143
|
+
node_addrs = task[:node_addrs].dup
|
144
|
+
for node_addr, path in node_addrs
|
145
|
+
next unless @node_table[node_addr]
|
146
|
+
next unless path
|
147
|
+
task[:path] = path
|
148
|
+
if allocate_task_to_node job_map, task, node_addr
|
149
|
+
task[:node_addrs].delete [node_addr, path]
|
150
|
+
return true
|
151
|
+
end
|
152
|
+
end
|
153
|
+
return false
|
154
|
+
end
|
155
|
+
|
156
|
+
#
|
157
|
+
def allocate_reduce_task_to_node job_map, task
|
158
|
+
return false unless (node_addr = task[:node_addr])
|
159
|
+
if allocate_task_to_node job_map, task, node_addr
|
160
|
+
return true
|
161
|
+
else
|
162
|
+
return false
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Process
|
2
|
+
class << self
|
3
|
+
unless method_defined?(:daemon)
|
4
|
+
def daemon(nochdir = nil, noclose = nil)
|
5
|
+
exit!(0) if Process.fork
|
6
|
+
Process.setsid
|
7
|
+
exit!(0) if Process.fork
|
8
|
+
Dir.chdir("/") unless nochdir
|
9
|
+
unless noclose
|
10
|
+
STDIN.reopen("/dev/null", "r")
|
11
|
+
STDOUT.reopen("/dev/null", "w")
|
12
|
+
STDERR.reopen("/dev/null", "w")
|
13
|
+
end
|
14
|
+
0
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module Log
|
4
|
+
IOBUF = {}
|
5
|
+
|
6
|
+
class Formatter
|
7
|
+
Format = "%s %s\n"
|
8
|
+
attr_accessor :datetime_format
|
9
|
+
|
10
|
+
def call severity, time, progname, msg
|
11
|
+
tstr = time.strftime("%Y-%m-%d %H:%M:%S")
|
12
|
+
Format % [tstr, msg2str(msg)]
|
13
|
+
end
|
14
|
+
|
15
|
+
def msg2str(msg)
|
16
|
+
case msg
|
17
|
+
when ::String
|
18
|
+
msg
|
19
|
+
when ::Exception
|
20
|
+
"#{ msg.message } (#{ msg.class })\n" <<
|
21
|
+
(msg.backtrace || []).join("\n")
|
22
|
+
else
|
23
|
+
msg.inspect
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class << self
|
29
|
+
attr_accessor :logger
|
30
|
+
end
|
31
|
+
|
32
|
+
module_function
|
33
|
+
|
34
|
+
def init logdev, options={}
|
35
|
+
shift_age = options[:log_shift_age] || 8
|
36
|
+
shift_size = options[:log_shift_size] || 100_000_000
|
37
|
+
logger = Logger.new logdev, shift_age, shift_size
|
38
|
+
logger.formatter = Log::Formatter.new
|
39
|
+
logger.level = {'fatal'=>Logger::FATAL, 'error'=>Logger::ERROR,
|
40
|
+
'warn'=>Logger::WARN, 'info'=>Logger::INFO, 'debug'=>Logger::DEBUG,
|
41
|
+
}[(options[:log_level] || '').downcase] || Logger::INFO
|
42
|
+
Log.logger = logger
|
43
|
+
end
|
44
|
+
|
45
|
+
def open logdev
|
46
|
+
if logdev == '-'
|
47
|
+
logdev = STDOUT
|
48
|
+
end
|
49
|
+
init logdev
|
50
|
+
end
|
51
|
+
|
52
|
+
def close grp=nil
|
53
|
+
if @logger
|
54
|
+
@logger.close
|
55
|
+
else
|
56
|
+
if grp
|
57
|
+
if (io = IOBUF[grp])
|
58
|
+
io.close
|
59
|
+
end
|
60
|
+
else
|
61
|
+
for io in IOBUF.values
|
62
|
+
io.close
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def write grp, str, severity=nil
|
69
|
+
if @logger
|
70
|
+
if severity.kind_of? Symbol
|
71
|
+
severity = @logger.class.const_get severity
|
72
|
+
else
|
73
|
+
severity ||= @logger.class::INFO
|
74
|
+
end
|
75
|
+
begin
|
76
|
+
logger.add severity, str.chomp
|
77
|
+
rescue Logger::Error
|
78
|
+
disable
|
79
|
+
end
|
80
|
+
else
|
81
|
+
io = IOBUF[grp]
|
82
|
+
if io
|
83
|
+
if io.kind_of? File
|
84
|
+
#mtime = io.mtime
|
85
|
+
tstr = Time.now.strftime("%Y-%m-%d %H:%M:%S ")
|
86
|
+
gstr = (grp == :default) ? '' : "#{grp}: "
|
87
|
+
io.print tstr + gstr + str
|
88
|
+
else
|
89
|
+
io.print str
|
90
|
+
io.print "\n" if severity #XXX
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def puts(*args)
|
97
|
+
if args.size >= 2 and args[0].kind_of?(Symbol)
|
98
|
+
grp = args.shift
|
99
|
+
else
|
100
|
+
grp = :default
|
101
|
+
end
|
102
|
+
str = args.join('').chomp + "\n"
|
103
|
+
self.write grp, str
|
104
|
+
end
|
105
|
+
|
106
|
+
def pp(*args)
|
107
|
+
if args.size >= 2 and args[0].kind_of?(Symbol)
|
108
|
+
self.puts args[0], args[1..-1].map {|e| e.inspect}.join(', ')
|
109
|
+
else
|
110
|
+
self.puts args.map {|e| e.inspect}.join(', ')
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def debug(*args, &block)
|
115
|
+
write :default, args.first, :DEBUG
|
116
|
+
end
|
117
|
+
def info(*args, &block)
|
118
|
+
write :default, args.first, :INFO
|
119
|
+
end
|
120
|
+
def warn(*args, &block)
|
121
|
+
write :default, args.first, :WARN
|
122
|
+
end
|
123
|
+
def error(*args, &block)
|
124
|
+
write :default, args.first, :ERROR
|
125
|
+
end
|
126
|
+
def fatal(*args, &block)
|
127
|
+
write :default, args.first, :FATAL
|
128
|
+
end
|
129
|
+
|
130
|
+
def null(*args, &block)
|
131
|
+
end
|
132
|
+
|
133
|
+
def disable
|
134
|
+
@logger = nil
|
135
|
+
IOBUF.clear
|
136
|
+
end
|
137
|
+
end
|
data/lib/pmux/version.rb
ADDED
data/lib/pmux/worker.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'timeout'
|
2
|
+
|
3
|
+
module Pmux
|
4
|
+
class Worker
|
5
|
+
def initialize options, client=nil
|
6
|
+
@options = options
|
7
|
+
unless (@client = client)
|
8
|
+
timeout(3) {
|
9
|
+
@client = init_client(options[:sock_path])
|
10
|
+
@client.timeout = 3600
|
11
|
+
}
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def init_client sock_path
|
16
|
+
transport = MR::UNIXTransport.new
|
17
|
+
MR::Client.new transport, sock_path
|
18
|
+
end
|
19
|
+
|
20
|
+
def run
|
21
|
+
Log.debug "W#{$$}: run"
|
22
|
+
while true
|
23
|
+
task = @client.call :get_task
|
24
|
+
result = exec_task task
|
25
|
+
@client.call :put_result, result
|
26
|
+
end
|
27
|
+
Log.debug "W#{$$}: end"
|
28
|
+
rescue MR::TimeoutError
|
29
|
+
Log.debug "W#{$$}: request timed out"
|
30
|
+
rescue MR::TransportError
|
31
|
+
Log.debug "W#{$$}: transport closed"
|
32
|
+
end
|
33
|
+
|
34
|
+
def exec_task task
|
35
|
+
start_time = Time.now
|
36
|
+
if task['task_keys']
|
37
|
+
error_ids = []
|
38
|
+
fusion_id = task['task_id']
|
39
|
+
for task_id, file in task['task_keys']
|
40
|
+
ntask = task.merge 'fusion_id'=>fusion_id,
|
41
|
+
'task_id'=>task_id, 'path'=>file
|
42
|
+
result = do_one_task ntask
|
43
|
+
result.update :task_id=>fusion_id, :task_keys=>task['task_keys']
|
44
|
+
end
|
45
|
+
else
|
46
|
+
result = do_one_task task
|
47
|
+
end
|
48
|
+
result[:welapse] = Time.now - start_time
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
def do_one_task task
|
53
|
+
job_id, task_id, node_addr =
|
54
|
+
task.values_at 'job_id', 'task_id', 'node_addr'
|
55
|
+
num_r = task['num_r'].to_i
|
56
|
+
result = {:job_id=>job_id, :task_id=>task_id, :node_addr=>node_addr}
|
57
|
+
exception = nil
|
58
|
+
error_level = nil
|
59
|
+
if @options and (otmp_dir = @options[:tmp_dir])
|
60
|
+
tmp_dir = "#{otmp_dir}/#{job_id}"
|
61
|
+
else
|
62
|
+
tmp_dir = "/var/tmp/#{job_id}"
|
63
|
+
end
|
64
|
+
begin
|
65
|
+
if task['pindex']
|
66
|
+
# reduce task
|
67
|
+
result[:reduce] = true
|
68
|
+
reducer = StreamingReducer.new task, tmp_dir
|
69
|
+
result[:output_path] = reducer.do_reduce_task
|
70
|
+
else
|
71
|
+
# map task
|
72
|
+
result[:map] = true
|
73
|
+
mapper = StreamingMapper.new task, tmp_dir
|
74
|
+
result[:ifbase] = mapper.do_map_task
|
75
|
+
if num_r.zero?
|
76
|
+
if (rsize = mapper.result_size)
|
77
|
+
result[:result_body] = mapper.result_body if rsize < 1024
|
78
|
+
else
|
79
|
+
result[:result_body] = ''
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
rescue StandardError => e
|
84
|
+
#q $!, $@[0] if $test
|
85
|
+
result.update :error=>e.class.to_s,
|
86
|
+
:error_message=>e.message, :backtrace=>e.backtrace[0]
|
87
|
+
end
|
88
|
+
result
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/pmux/writer.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Pmux
|
2
|
+
class Writer
|
3
|
+
CHUNK_SIZE = 8192
|
4
|
+
|
5
|
+
def write; end
|
6
|
+
def finish; end
|
7
|
+
end
|
8
|
+
|
9
|
+
class STDOUTWriter < Writer
|
10
|
+
def write path
|
11
|
+
open(path) {|f|
|
12
|
+
until f.eof?
|
13
|
+
data = f.read(CHUNK_SIZE)
|
14
|
+
STDOUT.write data
|
15
|
+
end
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/pmux.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
File.umask 0022
|
2
|
+
ENV['LC_ALL'] = 'C'
|
3
|
+
Encoding.default_external = 'ascii-8bit' if RUBY_VERSION > '1.9'
|
4
|
+
|
5
|
+
require 'pmux/version'
|
6
|
+
require 'pmux/q'
|
7
|
+
require 'pmux/util_logger'
|
8
|
+
require 'pmux/fiber18'
|
9
|
+
require 'pmux/plugin'
|
10
|
+
require 'pmux/mros'
|
11
|
+
require 'pmux/pipeio'
|
12
|
+
require 'pmux/storage_adapter'
|
13
|
+
require 'pmux/job'
|
14
|
+
require 'pmux/joblogger'
|
15
|
+
require 'pmux/fixcmd'
|
16
|
+
require 'pmux/mapper'
|
17
|
+
require 'pmux/reducer'
|
18
|
+
require 'pmux/worker'
|
19
|
+
require 'pmux/handler'
|
20
|
+
require 'pmux/writer'
|
21
|
+
require 'pmux/gatherer'
|
22
|
+
require 'pmux/task_queue'
|
23
|
+
require 'pmux/task_scheduler'
|
24
|
+
require 'pmux/task_dispatcher'
|
25
|
+
require 'pmux/multi_session'
|
26
|
+
require 'pmux/cleaner'
|
27
|
+
require 'pmux/application'
|
data/pmux.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "pmux/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "pmux"
|
7
|
+
s.version = Pmux::VERSION
|
8
|
+
s.authors = ["maebashi"]
|
9
|
+
s.homepage = ""
|
10
|
+
s.summary = %q{lightweight file-based MapReduce system}
|
11
|
+
s.description = %q{lightweight file-based MapReduce system}
|
12
|
+
|
13
|
+
s.rubyforge_project = "pmux"
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n").select {|e| /^tmp/!~e}
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
|
20
|
+
# specify any dependencies here; for example:
|
21
|
+
|
22
|
+
s.add_runtime_dependency "msgpack-rpc"
|
23
|
+
s.add_runtime_dependency "net-scp"
|
24
|
+
end
|