pwrake 0.9.9.2 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGES_V2.md +90 -0
- data/{LICENSE.txt → MIT-LICENSE} +2 -3
- data/README +12 -0
- data/README.md +75 -52
- data/bin/gfwhere-pipe +23 -12
- data/bin/pwrake +22 -29
- data/bin/pwrake_branch +24 -0
- data/lib/pwrake/branch.rb +22 -0
- data/lib/pwrake/branch/branch.rb +213 -0
- data/lib/pwrake/branch/branch_application.rb +53 -0
- data/lib/pwrake/branch/fiber_queue.rb +36 -0
- data/lib/pwrake/branch/file_utils.rb +101 -0
- data/lib/pwrake/branch/shell.rb +231 -0
- data/lib/pwrake/{profiler.rb → branch/shell_profiler.rb} +28 -27
- data/lib/pwrake/branch/worker_communicator.rb +104 -0
- data/lib/pwrake/{gfarm_feature.rb → gfarm/gfarm_path.rb} +2 -100
- data/lib/pwrake/gfarm/gfarm_postprocess.rb +53 -0
- data/lib/pwrake/iomux/channel.rb +70 -0
- data/lib/pwrake/iomux/handler.rb +124 -0
- data/lib/pwrake/iomux/handler_set.rb +35 -0
- data/lib/pwrake/iomux/runner.rb +62 -0
- data/lib/pwrake/logger.rb +3 -150
- data/lib/pwrake/master.rb +30 -137
- data/lib/pwrake/master/fiber_pool.rb +69 -0
- data/lib/pwrake/master/idle_cores.rb +30 -0
- data/lib/pwrake/master/master.rb +345 -0
- data/lib/pwrake/master/master_application.rb +150 -0
- data/lib/pwrake/master/postprocess.rb +16 -0
- data/lib/pwrake/{graphviz.rb → misc/graphviz.rb} +0 -0
- data/lib/pwrake/{mcgp.rb → misc/mcgp.rb} +63 -42
- data/lib/pwrake/option/host_map.rb +158 -0
- data/lib/pwrake/option/option.rb +357 -0
- data/lib/pwrake/option/option_filesystem.rb +112 -0
- data/lib/pwrake/queue/locality_aware_queue.rb +158 -0
- data/lib/pwrake/queue/no_action_queue.rb +67 -0
- data/lib/pwrake/queue/queue_array.rb +366 -0
- data/lib/pwrake/queue/task_queue.rb +164 -0
- data/lib/pwrake/report.rb +1 -0
- data/lib/pwrake/report/parallelism.rb +9 -3
- data/lib/pwrake/report/report.rb +50 -103
- data/lib/pwrake/report/task_stat.rb +83 -0
- data/lib/pwrake/task/task_algorithm.rb +107 -0
- data/lib/pwrake/task/task_manager.rb +32 -0
- data/lib/pwrake/task/task_property.rb +98 -0
- data/lib/pwrake/task/task_rank.rb +48 -0
- data/lib/pwrake/task/task_wrapper.rb +296 -0
- data/lib/pwrake/version.rb +1 -1
- data/lib/pwrake/worker/executor.rb +169 -0
- data/lib/pwrake/worker/gfarm_directory.rb +90 -0
- data/lib/pwrake/worker/invoker.rb +199 -0
- data/lib/pwrake/worker/load.rb +14 -0
- data/lib/pwrake/worker/log_executor.rb +73 -0
- data/lib/pwrake/worker/shared_directory.rb +74 -0
- data/lib/pwrake/worker/worker_main.rb +14 -0
- data/lib/pwrake/worker/writer.rb +59 -0
- data/setup.rb +1212 -1502
- data/spec/003/Rakefile +2 -2
- data/spec/008/Rakefile +2 -1
- data/spec/009/Rakefile +1 -1
- data/spec/009/pwrake_conf.yaml +1 -3
- data/spec/hosts +0 -2
- data/spec/pwrake_spec.rb +9 -8
- metadata +50 -21
- data/lib/pwrake.rb +0 -19
- data/lib/pwrake/application.rb +0 -232
- data/lib/pwrake/counter.rb +0 -54
- data/lib/pwrake/file_utils.rb +0 -98
- data/lib/pwrake/gfwhere_pool.rb +0 -109
- data/lib/pwrake/host_list.rb +0 -88
- data/lib/pwrake/locality_aware_queue.rb +0 -413
- data/lib/pwrake/option.rb +0 -400
- data/lib/pwrake/rake_modify.rb +0 -14
- data/lib/pwrake/shell.rb +0 -186
- data/lib/pwrake/task_algorithm.rb +0 -475
- data/lib/pwrake/task_queue.rb +0 -633
- data/lib/pwrake/timer.rb +0 -22
data/lib/pwrake/version.rb
CHANGED
@@ -0,0 +1,169 @@
|
|
1
|
+
module Pwrake
|
2
|
+
|
3
|
+
class Executor
|
4
|
+
|
5
|
+
LIST = {}
|
6
|
+
CHARS='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
7
|
+
TLEN=32
|
8
|
+
|
9
|
+
def initialize(dir_class,id,shell_cmd,shell_rc)
|
10
|
+
@id = id
|
11
|
+
@shell_rc = shell_rc
|
12
|
+
@shell_cmd = shell_cmd || ENV['SHELL'] || '/bin/sh'
|
13
|
+
@terminator = ""
|
14
|
+
TLEN.times{ @terminator << CHARS[rand(CHARS.length)] }
|
15
|
+
@out = Writer.instance
|
16
|
+
@log = LogExecutor.instance
|
17
|
+
@queue = Queue.new
|
18
|
+
@dir = dir_class.new
|
19
|
+
@spawn_in, @sh_in = IO.pipe
|
20
|
+
@sh_out, @spawn_out = IO.pipe
|
21
|
+
@sh_err, @spawn_err = IO.pipe
|
22
|
+
LIST[@id] = self
|
23
|
+
@exec_thread = start_exec_thread
|
24
|
+
end
|
25
|
+
|
26
|
+
def execute(cmd)
|
27
|
+
@queue.enq(cmd)
|
28
|
+
end
|
29
|
+
|
30
|
+
def start_exec_thread
|
31
|
+
Thread.new do
|
32
|
+
begin
|
33
|
+
@dir.open
|
34
|
+
@dir.open_messages.each{|m| @log.info(m)}
|
35
|
+
begin
|
36
|
+
@pid = Kernel.spawn(@shell_cmd,
|
37
|
+
:out=>@spawn_out,
|
38
|
+
:err=>@spawn_err,
|
39
|
+
:in=>@spawn_in,
|
40
|
+
:chdir=>@dir.current)
|
41
|
+
@out.puts "#{@id}:open"
|
42
|
+
@shell_rc.each do |cmd|
|
43
|
+
run_rc(cmd)
|
44
|
+
end
|
45
|
+
while cmd = @queue.deq
|
46
|
+
run(cmd)
|
47
|
+
end
|
48
|
+
@sh_in.puts("exit")
|
49
|
+
@sh_in.flush
|
50
|
+
ensure
|
51
|
+
status = nil
|
52
|
+
begin
|
53
|
+
timeout(5){
|
54
|
+
pid,status = Process.waitpid2(@pid)
|
55
|
+
}
|
56
|
+
rescue
|
57
|
+
@log.info("#{@id}:kill INT sh @pid=#{@pid}")
|
58
|
+
Process.kill("INT",@pid)
|
59
|
+
pid,status = Process.waitpid2(@pid)
|
60
|
+
end
|
61
|
+
@log.info("shell exit status: "+status.inspect)
|
62
|
+
end
|
63
|
+
rescue => exc
|
64
|
+
@out.puts "#{@id}:exc:#{exc}"
|
65
|
+
@log.error exc
|
66
|
+
ensure
|
67
|
+
@dir.close_messages.each{|m| @log.info(m)}
|
68
|
+
@dir.close
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def run(cmd)
|
74
|
+
case cmd
|
75
|
+
when Proc
|
76
|
+
cmd.call
|
77
|
+
when "cd"
|
78
|
+
@dir.cd
|
79
|
+
run_command("cd "+@dir.current)
|
80
|
+
#
|
81
|
+
when /^cd\s+(.*)$/
|
82
|
+
@dir.cd($1)
|
83
|
+
run_command("cd "+@dir.current)
|
84
|
+
#
|
85
|
+
when /^exit\b/
|
86
|
+
close
|
87
|
+
@out.puts "#{@id}:exit"
|
88
|
+
#
|
89
|
+
when String
|
90
|
+
run_command(cmd)
|
91
|
+
#
|
92
|
+
else
|
93
|
+
raise RuntimeError,"invalid cmd: #{cmd.inspect}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def run_rc(cmd)
|
98
|
+
run_command_main(cmd){|s| @log.info "<"+s if @log}
|
99
|
+
end
|
100
|
+
|
101
|
+
def run_command(cmd)
|
102
|
+
run_command_main(cmd){|s| @out.puts s}
|
103
|
+
end
|
104
|
+
|
105
|
+
def run_command_main(cmd)
|
106
|
+
if /\\$/ =~ cmd # command line continues
|
107
|
+
@sh_in.puts(cmd)
|
108
|
+
@sh_in.flush
|
109
|
+
return
|
110
|
+
end
|
111
|
+
term = "\necho '#{@terminator}':$? \necho '#{@terminator}' 1>&2"
|
112
|
+
@sh_in.puts(cmd+term)
|
113
|
+
@sh_in.flush
|
114
|
+
status = ""
|
115
|
+
io_set = [@sh_out,@sh_err]
|
116
|
+
loop do
|
117
|
+
io_sel, = IO.select(io_set,nil,nil)
|
118
|
+
for io in io_sel
|
119
|
+
s = io.gets.chomp
|
120
|
+
case io
|
121
|
+
when @sh_out
|
122
|
+
if s[0,TLEN] == @terminator
|
123
|
+
status = s[TLEN+1..-1]
|
124
|
+
io_set.delete(@sh_out)
|
125
|
+
else
|
126
|
+
yield "#{@id}:o:"+s
|
127
|
+
end
|
128
|
+
when @sh_err
|
129
|
+
if s[0,TLEN] == @terminator
|
130
|
+
io_set.delete(@sh_err)
|
131
|
+
else
|
132
|
+
yield "#{@id}:e:"+s
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
break if io_set.empty?
|
137
|
+
end
|
138
|
+
yield "#{@id}:z:#{status}"
|
139
|
+
end
|
140
|
+
|
141
|
+
def close
|
142
|
+
execute(nil) # threads end
|
143
|
+
end
|
144
|
+
|
145
|
+
def join
|
146
|
+
LIST.delete(@id)
|
147
|
+
@exec_thread.join(15) if @exec_thread
|
148
|
+
end
|
149
|
+
|
150
|
+
def kill(sig)
|
151
|
+
@queue.clear
|
152
|
+
if @pid
|
153
|
+
# kill process group
|
154
|
+
s = `ps ho pid --ppid=#{@pid}`
|
155
|
+
s.each_line do |x|
|
156
|
+
pid = x.to_i
|
157
|
+
Process.kill(sig,pid)
|
158
|
+
@log.warn "Executor(id=#{@id})#kill pid=#{pid} sig=#{sig}"
|
159
|
+
end
|
160
|
+
if s.empty?
|
161
|
+
@log.warn "Executor(id=#{@id})#kill nothing killed"
|
162
|
+
end
|
163
|
+
end
|
164
|
+
@spawn_out.flush
|
165
|
+
@spawn_err.flush
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
module Pwrake
|
2
|
+
|
3
|
+
class GfarmDirectory < SharedDirectory
|
4
|
+
@@prefix = nil
|
5
|
+
@@work_dir = nil
|
6
|
+
@@log_dir = nil
|
7
|
+
@@gfarm2fs_option = nil
|
8
|
+
@@gfarm2fs_debug = nil
|
9
|
+
@@gfarm2fs_debug_wait = 1
|
10
|
+
@@current_id = 0
|
11
|
+
@@hostname = `hostname`.chomp
|
12
|
+
|
13
|
+
def self.init(opts)
|
14
|
+
@@prefix = opts[:base_dir]
|
15
|
+
@@work_dir = opts[:work_dir]
|
16
|
+
@@log_dir = opts[:log_dir]
|
17
|
+
@@gfarm2fs_option = opts[:gfarm2fs_option]
|
18
|
+
@@gfarm2fs_debug = opts[:gfarm2fs_debug]
|
19
|
+
@@gfarm2fs_debug_wait = opts[:gfarm2fs_debug_wait]
|
20
|
+
Dir.chdir(ENV['HOME'])
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
super
|
25
|
+
@id = @@current_id
|
26
|
+
@@current_id += 1
|
27
|
+
@suffix = "%05d_%03d" % [Process.pid,@id]
|
28
|
+
@gfarm_mountpoint = @@prefix+"_"+@suffix
|
29
|
+
end
|
30
|
+
|
31
|
+
def home_path
|
32
|
+
Pathname.new(@gfarm_mountpoint)
|
33
|
+
end
|
34
|
+
|
35
|
+
def spawn_cmd(cmd)
|
36
|
+
@log.info "spawn_cmd: "+cmd
|
37
|
+
r,w = IO.pipe
|
38
|
+
pid = spawn(cmd,[:out,:err]=>w)
|
39
|
+
w.close
|
40
|
+
pidmy,status = Process.waitpid2(pid)
|
41
|
+
a = []
|
42
|
+
while s = r.gets
|
43
|
+
a << s.chomp
|
44
|
+
end
|
45
|
+
if status.success?
|
46
|
+
msg = a.empty? ? cmd : cmd+" => #{a.join(',')}"
|
47
|
+
@log.info msg
|
48
|
+
else
|
49
|
+
msg = "failed to execute `#{cmd}' => #{a.join(',')}"
|
50
|
+
raise msg
|
51
|
+
end
|
52
|
+
a
|
53
|
+
end
|
54
|
+
|
55
|
+
def open
|
56
|
+
FileUtils.mkdir_p @gfarm_mountpoint
|
57
|
+
path = @log.path
|
58
|
+
begin
|
59
|
+
if @@gfarm2fs_debug && path
|
60
|
+
f = path+("gfarm2fs-"+`hostname`.chomp+"-"+@suffix)
|
61
|
+
spawn_cmd "gfarm2fs #{@@gfarm2fs_option} -d #{@gfarm_mountpoint} > #{f} 2>&1 & sleep #{@@gfarm2fs_debug_wait}"
|
62
|
+
else
|
63
|
+
spawn_cmd "gfarm2fs #{@@gfarm2fs_option} #{@gfarm_mountpoint}"
|
64
|
+
end
|
65
|
+
rescue => exc
|
66
|
+
sleep 1
|
67
|
+
raise exc
|
68
|
+
end
|
69
|
+
super
|
70
|
+
end
|
71
|
+
|
72
|
+
def close
|
73
|
+
super
|
74
|
+
if File.directory? @gfarm_mountpoint
|
75
|
+
begin
|
76
|
+
spawn_cmd "fusermount -u #{@gfarm_mountpoint}"
|
77
|
+
rescue
|
78
|
+
end
|
79
|
+
system "sync"
|
80
|
+
begin
|
81
|
+
FileUtils.rmdir @gfarm_mountpoint
|
82
|
+
@log.info "rmdir #{@gfarm_mountpoint} @#{@@hostname}"
|
83
|
+
rescue
|
84
|
+
@log.error "failed to rmdir #{@gfarm_mountpoint} @#{@@hostname}"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
require "timeout"
|
2
|
+
|
3
|
+
module Pwrake
|
4
|
+
|
5
|
+
class Invoker
|
6
|
+
|
7
|
+
def initialize(dir_class, ncore, option)
|
8
|
+
@dir_class = dir_class
|
9
|
+
@option = option
|
10
|
+
@out = Writer.instance # firstly replace $stderr
|
11
|
+
@log = LogExecutor.instance
|
12
|
+
@log.init(@option)
|
13
|
+
@log.open(@dir_class)
|
14
|
+
@out.add_logger(@log)
|
15
|
+
ncore_max = processor_count()
|
16
|
+
if ncore.kind_of?(Integer)
|
17
|
+
if ncore > 0
|
18
|
+
@ncore = ncore
|
19
|
+
else
|
20
|
+
@ncore = ncore_max + ncore
|
21
|
+
end
|
22
|
+
if @ncore <= 0
|
23
|
+
m = "Out of range: ncore=#{ncore.inspect}"
|
24
|
+
@out.puts "ncore:"+m
|
25
|
+
raise ArgumentError,m
|
26
|
+
end
|
27
|
+
elsif ncore.nil?
|
28
|
+
@ncore = ncore_max
|
29
|
+
else
|
30
|
+
m = "Invalid argument: ncore=#{ncore.inspect}"
|
31
|
+
@out.puts "ncore:"+m
|
32
|
+
raise ArgumentError,m
|
33
|
+
end
|
34
|
+
@out.puts "ncore:#{@ncore}"
|
35
|
+
# does NOT exit when writing to broken pipe
|
36
|
+
Signal.trap("PIPE", "SIG_IGN")
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_line
|
40
|
+
begin
|
41
|
+
line = $stdin.gets
|
42
|
+
exit if !line
|
43
|
+
line.chomp!
|
44
|
+
line.strip!
|
45
|
+
@log.info ">#{line}"
|
46
|
+
return line
|
47
|
+
rescue
|
48
|
+
exit
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def run
|
53
|
+
setup_option
|
54
|
+
if setup_loop
|
55
|
+
start_heartbeat
|
56
|
+
command_loop
|
57
|
+
end
|
58
|
+
ensure
|
59
|
+
close_all
|
60
|
+
end
|
61
|
+
|
62
|
+
def setup_option
|
63
|
+
@log.info @option.inspect
|
64
|
+
@heartbeat_interval = @option[:heartbeat]
|
65
|
+
@shell_cmd = @option[:shell_command]
|
66
|
+
@shell_rc = @option[:shell_rc] || []
|
67
|
+
(@option[:pass_env]||{}).each do |k,v|
|
68
|
+
ENV[k] = v
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def setup_loop
|
73
|
+
while line = get_line
|
74
|
+
case line
|
75
|
+
when /^(\d+):open$/o
|
76
|
+
$1.split.each do |id|
|
77
|
+
Executor.new(@dir_class,id,@shell_cmd,@shell_rc)
|
78
|
+
end
|
79
|
+
when "setup_end"
|
80
|
+
return true
|
81
|
+
else
|
82
|
+
return false if common_line(line)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
false
|
86
|
+
end
|
87
|
+
|
88
|
+
def start_heartbeat
|
89
|
+
if @heartbeat_interval
|
90
|
+
@heartbeat_thread = Thread.new do
|
91
|
+
while true
|
92
|
+
@out.puts "heartbeat"
|
93
|
+
sleep @heartbeat_interval
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def command_loop
|
100
|
+
while line = get_line
|
101
|
+
case line
|
102
|
+
when /^(\d+):(.*)$/o
|
103
|
+
id,cmd = $1,$2
|
104
|
+
ex = Executor::LIST[id]
|
105
|
+
if ex.nil?
|
106
|
+
if cmd=="exit"
|
107
|
+
@out.puts "#{id}:end"
|
108
|
+
next
|
109
|
+
else
|
110
|
+
ex = Executor.new(@dir_class,id,@shell_cmd,@shell_rc)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
ex.execute(cmd)
|
114
|
+
else
|
115
|
+
break if common_line(line)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def common_line(line)
|
121
|
+
case line
|
122
|
+
when /^exit$/o
|
123
|
+
return true
|
124
|
+
#
|
125
|
+
when /^kill:(.*)$/o
|
126
|
+
kill_all($1)
|
127
|
+
return false
|
128
|
+
#
|
129
|
+
when /^p$/o
|
130
|
+
puts "Executor::LIST = #{Executor::LIST.inspect}"
|
131
|
+
return false
|
132
|
+
#
|
133
|
+
else
|
134
|
+
msg = "invalid line: #{line}"
|
135
|
+
@log.fatal msg
|
136
|
+
raise RuntimeError,msg
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def kill_all(sig)
|
141
|
+
sig = sig.to_i if /^\d+$/o =~ sig
|
142
|
+
@log.warn "worker_killed:signal=#{sig}"
|
143
|
+
Executor::LIST.each{|id,exc| exc.kill(sig)}
|
144
|
+
end
|
145
|
+
|
146
|
+
def close_all
|
147
|
+
@log.info "close_all"
|
148
|
+
@heartbeat_thread.kill if @heartbeat_thread
|
149
|
+
Dir.chdir
|
150
|
+
id_list = Executor::LIST.keys
|
151
|
+
ex_list = Executor::LIST.values
|
152
|
+
ex_list.each{|ex| ex.close}
|
153
|
+
begin
|
154
|
+
ex_list.each{|ex| ex.join}
|
155
|
+
rescue => e
|
156
|
+
@log.error e
|
157
|
+
@log.error e.backtrace.join("\n")
|
158
|
+
end
|
159
|
+
@log.info "worker:end:#{id_list.inspect}"
|
160
|
+
begin
|
161
|
+
timeout(20){@log.close}
|
162
|
+
rescue => e
|
163
|
+
$stdout.puts e
|
164
|
+
$stdout.puts e.backtrace.join("\n")
|
165
|
+
end
|
166
|
+
@out.puts "exited"
|
167
|
+
end
|
168
|
+
|
169
|
+
# from Michael Grosser's parallel
|
170
|
+
# https://github.com/grosser/parallel
|
171
|
+
def processor_count
|
172
|
+
host_os = RbConfig::CONFIG['host_os']
|
173
|
+
case host_os
|
174
|
+
when /linux|cygwin/
|
175
|
+
ncpu = 0
|
176
|
+
open("/proc/cpuinfo").each do |l|
|
177
|
+
ncpu += 1 if /^processor\s+: \d+/=~l
|
178
|
+
end
|
179
|
+
ncpu
|
180
|
+
when /darwin9/
|
181
|
+
`hwprefs cpu_count`.to_i
|
182
|
+
when /darwin/
|
183
|
+
(hwprefs_available? ? `hwprefs thread_count` : `sysctl -n hw.ncpu`).to_i
|
184
|
+
when /(open|free)bsd/
|
185
|
+
`sysctl -n hw.ncpu`.to_i
|
186
|
+
when /mswin|mingw/
|
187
|
+
require 'win32ole'
|
188
|
+
wmi = WIN32OLE.connect("winmgmts://")
|
189
|
+
cpu = wmi.ExecQuery("select NumberOfLogicalProcessors from Win32_Processor")
|
190
|
+
cpu.to_enum.first.NumberOfLogicalProcessors
|
191
|
+
when /solaris2/
|
192
|
+
`psrinfo -p`.to_i # physical cpus
|
193
|
+
else
|
194
|
+
raise "Unknown architecture: #{host_os}"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
end
|
199
|
+
end
|