pwrake 0.9.9.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGES_V2.md +90 -0
- data/{LICENSE.txt → MIT-LICENSE} +2 -3
- data/README +12 -0
- data/README.md +75 -52
- data/bin/gfwhere-pipe +23 -12
- data/bin/pwrake +22 -29
- data/bin/pwrake_branch +24 -0
- data/lib/pwrake/branch.rb +22 -0
- data/lib/pwrake/branch/branch.rb +213 -0
- data/lib/pwrake/branch/branch_application.rb +53 -0
- data/lib/pwrake/branch/fiber_queue.rb +36 -0
- data/lib/pwrake/branch/file_utils.rb +101 -0
- data/lib/pwrake/branch/shell.rb +231 -0
- data/lib/pwrake/{profiler.rb → branch/shell_profiler.rb} +28 -27
- data/lib/pwrake/branch/worker_communicator.rb +104 -0
- data/lib/pwrake/{gfarm_feature.rb → gfarm/gfarm_path.rb} +2 -100
- data/lib/pwrake/gfarm/gfarm_postprocess.rb +53 -0
- data/lib/pwrake/iomux/channel.rb +70 -0
- data/lib/pwrake/iomux/handler.rb +124 -0
- data/lib/pwrake/iomux/handler_set.rb +35 -0
- data/lib/pwrake/iomux/runner.rb +62 -0
- data/lib/pwrake/logger.rb +3 -150
- data/lib/pwrake/master.rb +30 -137
- data/lib/pwrake/master/fiber_pool.rb +69 -0
- data/lib/pwrake/master/idle_cores.rb +30 -0
- data/lib/pwrake/master/master.rb +345 -0
- data/lib/pwrake/master/master_application.rb +150 -0
- data/lib/pwrake/master/postprocess.rb +16 -0
- data/lib/pwrake/{graphviz.rb → misc/graphviz.rb} +0 -0
- data/lib/pwrake/{mcgp.rb → misc/mcgp.rb} +63 -42
- data/lib/pwrake/option/host_map.rb +158 -0
- data/lib/pwrake/option/option.rb +357 -0
- data/lib/pwrake/option/option_filesystem.rb +112 -0
- data/lib/pwrake/queue/locality_aware_queue.rb +158 -0
- data/lib/pwrake/queue/no_action_queue.rb +67 -0
- data/lib/pwrake/queue/queue_array.rb +366 -0
- data/lib/pwrake/queue/task_queue.rb +164 -0
- data/lib/pwrake/report.rb +1 -0
- data/lib/pwrake/report/parallelism.rb +9 -3
- data/lib/pwrake/report/report.rb +50 -103
- data/lib/pwrake/report/task_stat.rb +83 -0
- data/lib/pwrake/task/task_algorithm.rb +107 -0
- data/lib/pwrake/task/task_manager.rb +32 -0
- data/lib/pwrake/task/task_property.rb +98 -0
- data/lib/pwrake/task/task_rank.rb +48 -0
- data/lib/pwrake/task/task_wrapper.rb +296 -0
- data/lib/pwrake/version.rb +1 -1
- data/lib/pwrake/worker/executor.rb +169 -0
- data/lib/pwrake/worker/gfarm_directory.rb +90 -0
- data/lib/pwrake/worker/invoker.rb +199 -0
- data/lib/pwrake/worker/load.rb +14 -0
- data/lib/pwrake/worker/log_executor.rb +73 -0
- data/lib/pwrake/worker/shared_directory.rb +74 -0
- data/lib/pwrake/worker/worker_main.rb +14 -0
- data/lib/pwrake/worker/writer.rb +59 -0
- data/setup.rb +1212 -1502
- data/spec/003/Rakefile +2 -2
- data/spec/008/Rakefile +2 -1
- data/spec/009/Rakefile +1 -1
- data/spec/009/pwrake_conf.yaml +1 -3
- data/spec/hosts +0 -2
- data/spec/pwrake_spec.rb +9 -8
- metadata +50 -21
- data/lib/pwrake.rb +0 -19
- data/lib/pwrake/application.rb +0 -232
- data/lib/pwrake/counter.rb +0 -54
- data/lib/pwrake/file_utils.rb +0 -98
- data/lib/pwrake/gfwhere_pool.rb +0 -109
- data/lib/pwrake/host_list.rb +0 -88
- data/lib/pwrake/locality_aware_queue.rb +0 -413
- data/lib/pwrake/option.rb +0 -400
- data/lib/pwrake/rake_modify.rb +0 -14
- data/lib/pwrake/shell.rb +0 -186
- data/lib/pwrake/task_algorithm.rb +0 -475
- data/lib/pwrake/task_queue.rb +0 -633
- data/lib/pwrake/timer.rb +0 -22
data/lib/pwrake/version.rb
CHANGED
@@ -0,0 +1,169 @@
|
|
1
|
+
module Pwrake
|
2
|
+
|
3
|
+
class Executor
|
4
|
+
|
5
|
+
LIST = {}
|
6
|
+
CHARS='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
7
|
+
TLEN=32
|
8
|
+
|
9
|
+
def initialize(dir_class,id,shell_cmd,shell_rc)
|
10
|
+
@id = id
|
11
|
+
@shell_rc = shell_rc
|
12
|
+
@shell_cmd = shell_cmd || ENV['SHELL'] || '/bin/sh'
|
13
|
+
@terminator = ""
|
14
|
+
TLEN.times{ @terminator << CHARS[rand(CHARS.length)] }
|
15
|
+
@out = Writer.instance
|
16
|
+
@log = LogExecutor.instance
|
17
|
+
@queue = Queue.new
|
18
|
+
@dir = dir_class.new
|
19
|
+
@spawn_in, @sh_in = IO.pipe
|
20
|
+
@sh_out, @spawn_out = IO.pipe
|
21
|
+
@sh_err, @spawn_err = IO.pipe
|
22
|
+
LIST[@id] = self
|
23
|
+
@exec_thread = start_exec_thread
|
24
|
+
end
|
25
|
+
|
26
|
+
def execute(cmd)
|
27
|
+
@queue.enq(cmd)
|
28
|
+
end
|
29
|
+
|
30
|
+
def start_exec_thread
|
31
|
+
Thread.new do
|
32
|
+
begin
|
33
|
+
@dir.open
|
34
|
+
@dir.open_messages.each{|m| @log.info(m)}
|
35
|
+
begin
|
36
|
+
@pid = Kernel.spawn(@shell_cmd,
|
37
|
+
:out=>@spawn_out,
|
38
|
+
:err=>@spawn_err,
|
39
|
+
:in=>@spawn_in,
|
40
|
+
:chdir=>@dir.current)
|
41
|
+
@out.puts "#{@id}:open"
|
42
|
+
@shell_rc.each do |cmd|
|
43
|
+
run_rc(cmd)
|
44
|
+
end
|
45
|
+
while cmd = @queue.deq
|
46
|
+
run(cmd)
|
47
|
+
end
|
48
|
+
@sh_in.puts("exit")
|
49
|
+
@sh_in.flush
|
50
|
+
ensure
|
51
|
+
status = nil
|
52
|
+
begin
|
53
|
+
timeout(5){
|
54
|
+
pid,status = Process.waitpid2(@pid)
|
55
|
+
}
|
56
|
+
rescue
|
57
|
+
@log.info("#{@id}:kill INT sh @pid=#{@pid}")
|
58
|
+
Process.kill("INT",@pid)
|
59
|
+
pid,status = Process.waitpid2(@pid)
|
60
|
+
end
|
61
|
+
@log.info("shell exit status: "+status.inspect)
|
62
|
+
end
|
63
|
+
rescue => exc
|
64
|
+
@out.puts "#{@id}:exc:#{exc}"
|
65
|
+
@log.error exc
|
66
|
+
ensure
|
67
|
+
@dir.close_messages.each{|m| @log.info(m)}
|
68
|
+
@dir.close
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def run(cmd)
|
74
|
+
case cmd
|
75
|
+
when Proc
|
76
|
+
cmd.call
|
77
|
+
when "cd"
|
78
|
+
@dir.cd
|
79
|
+
run_command("cd "+@dir.current)
|
80
|
+
#
|
81
|
+
when /^cd\s+(.*)$/
|
82
|
+
@dir.cd($1)
|
83
|
+
run_command("cd "+@dir.current)
|
84
|
+
#
|
85
|
+
when /^exit\b/
|
86
|
+
close
|
87
|
+
@out.puts "#{@id}:exit"
|
88
|
+
#
|
89
|
+
when String
|
90
|
+
run_command(cmd)
|
91
|
+
#
|
92
|
+
else
|
93
|
+
raise RuntimeError,"invalid cmd: #{cmd.inspect}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def run_rc(cmd)
|
98
|
+
run_command_main(cmd){|s| @log.info "<"+s if @log}
|
99
|
+
end
|
100
|
+
|
101
|
+
def run_command(cmd)
|
102
|
+
run_command_main(cmd){|s| @out.puts s}
|
103
|
+
end
|
104
|
+
|
105
|
+
def run_command_main(cmd)
|
106
|
+
if /\\$/ =~ cmd # command line continues
|
107
|
+
@sh_in.puts(cmd)
|
108
|
+
@sh_in.flush
|
109
|
+
return
|
110
|
+
end
|
111
|
+
term = "\necho '#{@terminator}':$? \necho '#{@terminator}' 1>&2"
|
112
|
+
@sh_in.puts(cmd+term)
|
113
|
+
@sh_in.flush
|
114
|
+
status = ""
|
115
|
+
io_set = [@sh_out,@sh_err]
|
116
|
+
loop do
|
117
|
+
io_sel, = IO.select(io_set,nil,nil)
|
118
|
+
for io in io_sel
|
119
|
+
s = io.gets.chomp
|
120
|
+
case io
|
121
|
+
when @sh_out
|
122
|
+
if s[0,TLEN] == @terminator
|
123
|
+
status = s[TLEN+1..-1]
|
124
|
+
io_set.delete(@sh_out)
|
125
|
+
else
|
126
|
+
yield "#{@id}:o:"+s
|
127
|
+
end
|
128
|
+
when @sh_err
|
129
|
+
if s[0,TLEN] == @terminator
|
130
|
+
io_set.delete(@sh_err)
|
131
|
+
else
|
132
|
+
yield "#{@id}:e:"+s
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
break if io_set.empty?
|
137
|
+
end
|
138
|
+
yield "#{@id}:z:#{status}"
|
139
|
+
end
|
140
|
+
|
141
|
+
def close
|
142
|
+
execute(nil) # threads end
|
143
|
+
end
|
144
|
+
|
145
|
+
def join
|
146
|
+
LIST.delete(@id)
|
147
|
+
@exec_thread.join(15) if @exec_thread
|
148
|
+
end
|
149
|
+
|
150
|
+
def kill(sig)
|
151
|
+
@queue.clear
|
152
|
+
if @pid
|
153
|
+
# kill process group
|
154
|
+
s = `ps ho pid --ppid=#{@pid}`
|
155
|
+
s.each_line do |x|
|
156
|
+
pid = x.to_i
|
157
|
+
Process.kill(sig,pid)
|
158
|
+
@log.warn "Executor(id=#{@id})#kill pid=#{pid} sig=#{sig}"
|
159
|
+
end
|
160
|
+
if s.empty?
|
161
|
+
@log.warn "Executor(id=#{@id})#kill nothing killed"
|
162
|
+
end
|
163
|
+
end
|
164
|
+
@spawn_out.flush
|
165
|
+
@spawn_err.flush
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
module Pwrake
|
2
|
+
|
3
|
+
class GfarmDirectory < SharedDirectory
|
4
|
+
@@prefix = nil
|
5
|
+
@@work_dir = nil
|
6
|
+
@@log_dir = nil
|
7
|
+
@@gfarm2fs_option = nil
|
8
|
+
@@gfarm2fs_debug = nil
|
9
|
+
@@gfarm2fs_debug_wait = 1
|
10
|
+
@@current_id = 0
|
11
|
+
@@hostname = `hostname`.chomp
|
12
|
+
|
13
|
+
def self.init(opts)
|
14
|
+
@@prefix = opts[:base_dir]
|
15
|
+
@@work_dir = opts[:work_dir]
|
16
|
+
@@log_dir = opts[:log_dir]
|
17
|
+
@@gfarm2fs_option = opts[:gfarm2fs_option]
|
18
|
+
@@gfarm2fs_debug = opts[:gfarm2fs_debug]
|
19
|
+
@@gfarm2fs_debug_wait = opts[:gfarm2fs_debug_wait]
|
20
|
+
Dir.chdir(ENV['HOME'])
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
super
|
25
|
+
@id = @@current_id
|
26
|
+
@@current_id += 1
|
27
|
+
@suffix = "%05d_%03d" % [Process.pid,@id]
|
28
|
+
@gfarm_mountpoint = @@prefix+"_"+@suffix
|
29
|
+
end
|
30
|
+
|
31
|
+
def home_path
|
32
|
+
Pathname.new(@gfarm_mountpoint)
|
33
|
+
end
|
34
|
+
|
35
|
+
def spawn_cmd(cmd)
|
36
|
+
@log.info "spawn_cmd: "+cmd
|
37
|
+
r,w = IO.pipe
|
38
|
+
pid = spawn(cmd,[:out,:err]=>w)
|
39
|
+
w.close
|
40
|
+
pidmy,status = Process.waitpid2(pid)
|
41
|
+
a = []
|
42
|
+
while s = r.gets
|
43
|
+
a << s.chomp
|
44
|
+
end
|
45
|
+
if status.success?
|
46
|
+
msg = a.empty? ? cmd : cmd+" => #{a.join(',')}"
|
47
|
+
@log.info msg
|
48
|
+
else
|
49
|
+
msg = "failed to execute `#{cmd}' => #{a.join(',')}"
|
50
|
+
raise msg
|
51
|
+
end
|
52
|
+
a
|
53
|
+
end
|
54
|
+
|
55
|
+
def open
|
56
|
+
FileUtils.mkdir_p @gfarm_mountpoint
|
57
|
+
path = @log.path
|
58
|
+
begin
|
59
|
+
if @@gfarm2fs_debug && path
|
60
|
+
f = path+("gfarm2fs-"+`hostname`.chomp+"-"+@suffix)
|
61
|
+
spawn_cmd "gfarm2fs #{@@gfarm2fs_option} -d #{@gfarm_mountpoint} > #{f} 2>&1 & sleep #{@@gfarm2fs_debug_wait}"
|
62
|
+
else
|
63
|
+
spawn_cmd "gfarm2fs #{@@gfarm2fs_option} #{@gfarm_mountpoint}"
|
64
|
+
end
|
65
|
+
rescue => exc
|
66
|
+
sleep 1
|
67
|
+
raise exc
|
68
|
+
end
|
69
|
+
super
|
70
|
+
end
|
71
|
+
|
72
|
+
def close
|
73
|
+
super
|
74
|
+
if File.directory? @gfarm_mountpoint
|
75
|
+
begin
|
76
|
+
spawn_cmd "fusermount -u #{@gfarm_mountpoint}"
|
77
|
+
rescue
|
78
|
+
end
|
79
|
+
system "sync"
|
80
|
+
begin
|
81
|
+
FileUtils.rmdir @gfarm_mountpoint
|
82
|
+
@log.info "rmdir #{@gfarm_mountpoint} @#{@@hostname}"
|
83
|
+
rescue
|
84
|
+
@log.error "failed to rmdir #{@gfarm_mountpoint} @#{@@hostname}"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
require "timeout"
|
2
|
+
|
3
|
+
module Pwrake
|
4
|
+
|
5
|
+
class Invoker
|
6
|
+
|
7
|
+
def initialize(dir_class, ncore, option)
|
8
|
+
@dir_class = dir_class
|
9
|
+
@option = option
|
10
|
+
@out = Writer.instance # firstly replace $stderr
|
11
|
+
@log = LogExecutor.instance
|
12
|
+
@log.init(@option)
|
13
|
+
@log.open(@dir_class)
|
14
|
+
@out.add_logger(@log)
|
15
|
+
ncore_max = processor_count()
|
16
|
+
if ncore.kind_of?(Integer)
|
17
|
+
if ncore > 0
|
18
|
+
@ncore = ncore
|
19
|
+
else
|
20
|
+
@ncore = ncore_max + ncore
|
21
|
+
end
|
22
|
+
if @ncore <= 0
|
23
|
+
m = "Out of range: ncore=#{ncore.inspect}"
|
24
|
+
@out.puts "ncore:"+m
|
25
|
+
raise ArgumentError,m
|
26
|
+
end
|
27
|
+
elsif ncore.nil?
|
28
|
+
@ncore = ncore_max
|
29
|
+
else
|
30
|
+
m = "Invalid argument: ncore=#{ncore.inspect}"
|
31
|
+
@out.puts "ncore:"+m
|
32
|
+
raise ArgumentError,m
|
33
|
+
end
|
34
|
+
@out.puts "ncore:#{@ncore}"
|
35
|
+
# does NOT exit when writing to broken pipe
|
36
|
+
Signal.trap("PIPE", "SIG_IGN")
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_line
|
40
|
+
begin
|
41
|
+
line = $stdin.gets
|
42
|
+
exit if !line
|
43
|
+
line.chomp!
|
44
|
+
line.strip!
|
45
|
+
@log.info ">#{line}"
|
46
|
+
return line
|
47
|
+
rescue
|
48
|
+
exit
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def run
|
53
|
+
setup_option
|
54
|
+
if setup_loop
|
55
|
+
start_heartbeat
|
56
|
+
command_loop
|
57
|
+
end
|
58
|
+
ensure
|
59
|
+
close_all
|
60
|
+
end
|
61
|
+
|
62
|
+
def setup_option
|
63
|
+
@log.info @option.inspect
|
64
|
+
@heartbeat_interval = @option[:heartbeat]
|
65
|
+
@shell_cmd = @option[:shell_command]
|
66
|
+
@shell_rc = @option[:shell_rc] || []
|
67
|
+
(@option[:pass_env]||{}).each do |k,v|
|
68
|
+
ENV[k] = v
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def setup_loop
|
73
|
+
while line = get_line
|
74
|
+
case line
|
75
|
+
when /^(\d+):open$/o
|
76
|
+
$1.split.each do |id|
|
77
|
+
Executor.new(@dir_class,id,@shell_cmd,@shell_rc)
|
78
|
+
end
|
79
|
+
when "setup_end"
|
80
|
+
return true
|
81
|
+
else
|
82
|
+
return false if common_line(line)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
false
|
86
|
+
end
|
87
|
+
|
88
|
+
def start_heartbeat
|
89
|
+
if @heartbeat_interval
|
90
|
+
@heartbeat_thread = Thread.new do
|
91
|
+
while true
|
92
|
+
@out.puts "heartbeat"
|
93
|
+
sleep @heartbeat_interval
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def command_loop
|
100
|
+
while line = get_line
|
101
|
+
case line
|
102
|
+
when /^(\d+):(.*)$/o
|
103
|
+
id,cmd = $1,$2
|
104
|
+
ex = Executor::LIST[id]
|
105
|
+
if ex.nil?
|
106
|
+
if cmd=="exit"
|
107
|
+
@out.puts "#{id}:end"
|
108
|
+
next
|
109
|
+
else
|
110
|
+
ex = Executor.new(@dir_class,id,@shell_cmd,@shell_rc)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
ex.execute(cmd)
|
114
|
+
else
|
115
|
+
break if common_line(line)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def common_line(line)
|
121
|
+
case line
|
122
|
+
when /^exit$/o
|
123
|
+
return true
|
124
|
+
#
|
125
|
+
when /^kill:(.*)$/o
|
126
|
+
kill_all($1)
|
127
|
+
return false
|
128
|
+
#
|
129
|
+
when /^p$/o
|
130
|
+
puts "Executor::LIST = #{Executor::LIST.inspect}"
|
131
|
+
return false
|
132
|
+
#
|
133
|
+
else
|
134
|
+
msg = "invalid line: #{line}"
|
135
|
+
@log.fatal msg
|
136
|
+
raise RuntimeError,msg
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def kill_all(sig)
|
141
|
+
sig = sig.to_i if /^\d+$/o =~ sig
|
142
|
+
@log.warn "worker_killed:signal=#{sig}"
|
143
|
+
Executor::LIST.each{|id,exc| exc.kill(sig)}
|
144
|
+
end
|
145
|
+
|
146
|
+
def close_all
|
147
|
+
@log.info "close_all"
|
148
|
+
@heartbeat_thread.kill if @heartbeat_thread
|
149
|
+
Dir.chdir
|
150
|
+
id_list = Executor::LIST.keys
|
151
|
+
ex_list = Executor::LIST.values
|
152
|
+
ex_list.each{|ex| ex.close}
|
153
|
+
begin
|
154
|
+
ex_list.each{|ex| ex.join}
|
155
|
+
rescue => e
|
156
|
+
@log.error e
|
157
|
+
@log.error e.backtrace.join("\n")
|
158
|
+
end
|
159
|
+
@log.info "worker:end:#{id_list.inspect}"
|
160
|
+
begin
|
161
|
+
timeout(20){@log.close}
|
162
|
+
rescue => e
|
163
|
+
$stdout.puts e
|
164
|
+
$stdout.puts e.backtrace.join("\n")
|
165
|
+
end
|
166
|
+
@out.puts "exited"
|
167
|
+
end
|
168
|
+
|
169
|
+
# from Michael Grosser's parallel
|
170
|
+
# https://github.com/grosser/parallel
|
171
|
+
def processor_count
|
172
|
+
host_os = RbConfig::CONFIG['host_os']
|
173
|
+
case host_os
|
174
|
+
when /linux|cygwin/
|
175
|
+
ncpu = 0
|
176
|
+
open("/proc/cpuinfo").each do |l|
|
177
|
+
ncpu += 1 if /^processor\s+: \d+/=~l
|
178
|
+
end
|
179
|
+
ncpu
|
180
|
+
when /darwin9/
|
181
|
+
`hwprefs cpu_count`.to_i
|
182
|
+
when /darwin/
|
183
|
+
(hwprefs_available? ? `hwprefs thread_count` : `sysctl -n hw.ncpu`).to_i
|
184
|
+
when /(open|free)bsd/
|
185
|
+
`sysctl -n hw.ncpu`.to_i
|
186
|
+
when /mswin|mingw/
|
187
|
+
require 'win32ole'
|
188
|
+
wmi = WIN32OLE.connect("winmgmts://")
|
189
|
+
cpu = wmi.ExecQuery("select NumberOfLogicalProcessors from Win32_Processor")
|
190
|
+
cpu.to_enum.first.NumberOfLogicalProcessors
|
191
|
+
when /solaris2/
|
192
|
+
`psrinfo -p`.to_i # physical cpus
|
193
|
+
else
|
194
|
+
raise "Unknown architecture: #{host_os}"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
end
|
199
|
+
end
|