pwrake 2.1.3 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +25 -12
- data/bin/pwrake-mpi +41 -0
- data/bin/pwrake-mpi-run +16 -0
- data/lib/pwrake/branch/branch.rb +17 -29
- data/lib/pwrake/branch/branch_application.rb +31 -41
- data/lib/pwrake/branch/communicator.rb +32 -11
- data/lib/pwrake/branch/communicator_set.rb +6 -0
- data/lib/pwrake/logger.rb +29 -1
- data/lib/pwrake/master/master.rb +51 -64
- data/lib/pwrake/master/master_application.rb +4 -9
- data/lib/pwrake/mpi/branch.rb +76 -0
- data/lib/pwrake/mpi/worker.rb +42 -0
- data/lib/pwrake/nbio.rb +60 -62
- data/lib/pwrake/option/host_map.rb +50 -9
- data/lib/pwrake/option/option.rb +55 -66
- data/lib/pwrake/option/option_default_filesystem.rb +48 -0
- data/lib/pwrake/option/option_gfarm.rb +1 -0
- data/lib/pwrake/option/option_gfarm2fs.rb +101 -0
- data/lib/pwrake/queue/locality_aware_queue.rb +7 -11
- data/lib/pwrake/report/task_stat.rb +4 -5
- data/lib/pwrake/task/task_wrapper.rb +57 -34
- data/lib/pwrake/version.rb +1 -1
- data/lib/pwrake/worker/executor.rb +32 -14
- data/lib/pwrake/worker/invoker.rb +61 -34
- data/lib/pwrake/worker/worker_main.rb +5 -5
- data/lib/pwrake/worker/writer.rb +27 -20
- metadata +11 -5
- data/lib/pwrake/option/option_filesystem.rb +0 -123
- data/lib/pwrake/worker/load.rb +0 -14
- data/lib/pwrake/worker/reader.rb +0 -73
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f781cdfe39747649da0fafb2ebb37e579d294483
|
4
|
+
data.tar.gz: 5985be277367367065e9fef64188fc43ec8a2de4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 847964dd1146902e2f6f5988a26188184845e1b7879ed43a311560266e90d33d3c5febe7bb6727cc2003075b540046028a6cbaea6c28451888866d1e43aa1cfe
|
7
|
+
data.tar.gz: 76af1e8e961e9ff22e1c6ca0626a416eb73f2f43a9ce83c0c4b72a311841fd319b6a99c1fbf9594cbf5a4efafe8dc00833dae772550b705837f58e40f48ad69a
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
Parallel Workflow extension for Rake, runs on multicores, clusters, clouds.
|
4
4
|
* Author: Masahiro Tanaka
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
[README in Japanese](https://github.com/masa16/pwrake/wiki/Pwrakeとは),
|
7
|
+
[GitHub Repository](https://github.com/masa16/pwrake),
|
8
|
+
[RubyGems](https://rubygems.org/gems/pwrake)
|
9
9
|
|
10
10
|
## Features
|
11
11
|
|
@@ -14,7 +14,7 @@ Parallel Workflow extension for Rake, runs on multicores, clusters, clouds.
|
|
14
14
|
* The tasks which do not have mutual dependencies are automatically executed in parallel.
|
15
15
|
* The `multitask` which is a parallel task definition of Rake is no more necessary.
|
16
16
|
* Parallel and distributed execution is possible using a computer cluster which consists of multiple compute nodes.
|
17
|
-
* Cluster settings: SSH login, and the directory sharing using a shared filesystem, e.g., NFS, Gfarm.
|
17
|
+
* Cluster settings: SSH login (or MPI), and the directory sharing using a shared filesystem, e.g., NFS, Gfarm.
|
18
18
|
* Pwrake automatically connects to remote hosts using SSH. You do not need to start a daemon.
|
19
19
|
* Remote host names and the number of cores to use are provided in a hostfile.
|
20
20
|
* [Gfarm file system](http://sourceforge.net/projects/gfarm/) utilizes storage of compute nodes. It provides the high-performance parallel I/O.
|
@@ -68,7 +68,15 @@ In this case, you need the rehash of command paths:
|
|
68
68
|
|
69
69
|
4. Run `pwrake` with an option `--hostfile` or `-F`:
|
70
70
|
|
71
|
-
$ pwrake
|
71
|
+
$ pwrake -F hosts
|
72
|
+
|
73
|
+
### Use MPI to start remote worker
|
74
|
+
|
75
|
+
1. Setup MPI on your cluster.
|
76
|
+
2. Install [MPipe gem](https://rubygems.org/gems/mpipe). (requires `mpicc`)
|
77
|
+
3. Run `pwrake-mpi` command.
|
78
|
+
|
79
|
+
$ pwrake-mpi -F hosts
|
72
80
|
|
73
81
|
## Options
|
74
82
|
|
@@ -115,8 +123,6 @@ In this case, you need the rehash of command paths:
|
|
115
123
|
WORK_DIR default=$PWD
|
116
124
|
FILESYSTEM default(autodetect)|gfarm
|
117
125
|
SSH_OPTION SSH option
|
118
|
-
SHELL_COMMAND default=$SHELL
|
119
|
-
SHELL_RC Run-Command when shell starts
|
120
126
|
PASS_ENV (Array) Environment variables passed to SSH
|
121
127
|
HEARTBEAT default=240 - Hearbeat interval in seconds
|
122
128
|
RETRY default=1 - The number of retry
|
@@ -177,7 +183,7 @@ Properties (The leftmost item is default):
|
|
177
183
|
|
178
184
|
gem install ffi
|
179
185
|
|
180
|
-
##
|
186
|
+
## Scheduling with Graph Partitioning
|
181
187
|
|
182
188
|
* Compile and Install METIS 5.1.0 (http://www.cs.umn.edu/~metis/). This requires CMake.
|
183
189
|
|
@@ -187,15 +193,22 @@ Properties (The leftmost item is default):
|
|
187
193
|
--with-metis-include=/usr/local/include \
|
188
194
|
--with-metis-lib=/usr/local/lib
|
189
195
|
|
196
|
+
* Option (`pwrake_conf.yaml`):
|
197
|
+
|
198
|
+
GRAPH_PARTITION: true
|
199
|
+
|
200
|
+
* See publication: [M. Tanaka and O. Tatebe, “Workflow Scheduling to Minimize Data Movement Using Multi-constraint Graph Partitioning,” in CCGrid 2012](http://ieeexplore.ieee.org/abstract/document/6217406/)
|
201
|
+
|
190
202
|
## Current version
|
191
203
|
|
192
|
-
* Pwrake version 2.
|
204
|
+
* Pwrake version 2.2.0
|
193
205
|
|
194
206
|
## Tested Platform
|
195
207
|
|
196
|
-
|
197
|
-
*
|
198
|
-
*
|
208
|
+
|
209
|
+
* Ruby 2.4.0
|
210
|
+
* Rake 12.0.0
|
211
|
+
* CentOS 7.3
|
199
212
|
|
200
213
|
## Acknowledgment
|
201
214
|
|
data/bin/pwrake-mpi
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rake'
|
5
|
+
rescue LoadError
|
6
|
+
require 'rubygems'
|
7
|
+
require 'rake'
|
8
|
+
end
|
9
|
+
|
10
|
+
libpath = File.absolute_path(File.dirname(__FILE__))+"/../lib"
|
11
|
+
$LOAD_PATH.unshift libpath
|
12
|
+
|
13
|
+
require "pwrake/version"
|
14
|
+
require "pwrake/master/master_application"
|
15
|
+
require "shellwords"
|
16
|
+
|
17
|
+
module Pwrake
|
18
|
+
module MasterApplication
|
19
|
+
def run
|
20
|
+
standard_exception_handling do
|
21
|
+
init("pwrake") # <- parse options here
|
22
|
+
opts = Option.new
|
23
|
+
hosts = opts.host_map.map{|b,a| a.map{|h| h.name}}.flatten
|
24
|
+
if opts['MASTER_IS_FIRST_HOST']
|
25
|
+
[hosts[0],*hosts]
|
26
|
+
else
|
27
|
+
[Socket.gethostname,*hosts]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end;end
|
32
|
+
|
33
|
+
class Rake::Application
|
34
|
+
prepend Pwrake::MasterApplication
|
35
|
+
end
|
36
|
+
|
37
|
+
hosts = Rake.application.run.join(',')
|
38
|
+
args = ARGV.map{|x| Shellwords.escape(x)}.join(" ")
|
39
|
+
|
40
|
+
cmd="mpirun -wdir \"$HOME\" -host #{hosts} pwrake-mpi-run \"$PWD\" #{args}"
|
41
|
+
exec cmd
|
data/bin/pwrake-mpi-run
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
libpath = File.absolute_path(File.dirname(__FILE__))+"/../lib"
|
4
|
+
$LOAD_PATH.unshift libpath
|
5
|
+
|
6
|
+
require "mpipe"
|
7
|
+
MPipe.init
|
8
|
+
|
9
|
+
wdir = ARGV.shift
|
10
|
+
|
11
|
+
if MPipe::Comm.rank == 0
|
12
|
+
Dir.chdir(wdir)
|
13
|
+
require "pwrake/mpi/branch"
|
14
|
+
else
|
15
|
+
require "pwrake/mpi/worker"
|
16
|
+
end
|
data/lib/pwrake/branch/branch.rb
CHANGED
@@ -9,6 +9,12 @@ module Pwrake
|
|
9
9
|
|
10
10
|
class Branch
|
11
11
|
|
12
|
+
@@io_class = IO
|
13
|
+
|
14
|
+
def self.io_class=(io_class)
|
15
|
+
@@io_class = io_class
|
16
|
+
end
|
17
|
+
|
12
18
|
def initialize(opts,r,w)
|
13
19
|
Thread.abort_on_exception = true
|
14
20
|
@option = opts
|
@@ -16,10 +22,17 @@ module Pwrake
|
|
16
22
|
@shells = []
|
17
23
|
@ior = r
|
18
24
|
@iow = w
|
19
|
-
@selector = NBIO::Selector.new
|
25
|
+
@selector = NBIO::Selector.new(@@io_class)
|
20
26
|
@master_rd = NBIO::Reader.new(@selector,@ior)
|
21
27
|
@master_wt = NBIO::Writer.new(@selector,@iow)
|
22
28
|
@shell_start_interval = @option['SHELL_START_INTERVAL']
|
29
|
+
|
30
|
+
# init_logger
|
31
|
+
Log.set_logger(@option)
|
32
|
+
if dir = @option['LOG_DIR']
|
33
|
+
fn = File.join(dir,@option["COMMAND_CSV_FILE"])
|
34
|
+
Shell.profiler.open(fn,@option['GNU_TIME'],@option['PLOT_PARALLELISM'])
|
35
|
+
end
|
23
36
|
end
|
24
37
|
|
25
38
|
# Rakefile is loaded after 'init' before 'run'
|
@@ -33,34 +46,6 @@ module Pwrake
|
|
33
46
|
Log.debug "Branch#run end"
|
34
47
|
end
|
35
48
|
|
36
|
-
attr_reader :logger
|
37
|
-
|
38
|
-
def init_logger
|
39
|
-
if dir = @option['LOG_DIR']
|
40
|
-
logfile = File.join(dir,@option['LOG_FILE'])
|
41
|
-
@logger = Logger.new(logfile)
|
42
|
-
else
|
43
|
-
if @option['DEBUG']
|
44
|
-
@logger = Logger.new($stderr)
|
45
|
-
else
|
46
|
-
@logger = Logger.new(File::NULL)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
if @option['DEBUG']
|
51
|
-
@logger.level = Logger::DEBUG
|
52
|
-
elsif @option['TRACE']
|
53
|
-
@logger.level = Logger::INFO
|
54
|
-
else
|
55
|
-
@logger.level = Logger::WARN
|
56
|
-
end
|
57
|
-
|
58
|
-
if dir = @option['LOG_DIR']
|
59
|
-
fn = File.join(dir,@option["COMMAND_CSV_FILE"])
|
60
|
-
Shell.profiler.open(fn,@option['GNU_TIME'],@option['PLOT_PARALLELISM'])
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
49
|
def setup_worker
|
65
50
|
@cs = CommunicatorSet.new(@master_rd,@selector,@option.worker_option)
|
66
51
|
@cs.create_communicators
|
@@ -76,6 +61,9 @@ module Pwrake
|
|
76
61
|
@cs.each_value do |comm|
|
77
62
|
# set WorkerChannel#ncore at Master
|
78
63
|
@master_wt.put_line "ncore:#{comm.id}:#{comm.ncore}"
|
64
|
+
comm.ipaddr.each do |ipa|
|
65
|
+
@master_wt.put_line "ip:#{comm.id}:#{ipa}"
|
66
|
+
end
|
79
67
|
end
|
80
68
|
@master_wt.put_line "ncore:done"
|
81
69
|
end.resume
|
@@ -6,51 +6,41 @@ module Pwrake
|
|
6
6
|
# The TaskManager module is a mixin for managing tasks.
|
7
7
|
module BranchApplication
|
8
8
|
|
9
|
-
def logger
|
10
|
-
@branch.logger
|
11
|
-
end
|
12
|
-
|
13
9
|
def run_branch(r,w)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
@branch.finish
|
35
|
-
end
|
36
|
-
#end
|
10
|
+
init("pwrake_branch")
|
11
|
+
opts = Marshal.load(r)
|
12
|
+
if !opts.kind_of?(Hash)
|
13
|
+
raise "opts is not a Hash: opts=#{opts.inspect}"
|
14
|
+
end
|
15
|
+
@branch = Branch.new(opts,r,w)
|
16
|
+
opts.feedback_options
|
17
|
+
load_rakefile
|
18
|
+
w.puts "pwrake_branch start"
|
19
|
+
w.flush
|
20
|
+
begin
|
21
|
+
@branch.run
|
22
|
+
rescue => e
|
23
|
+
Log.fatal e
|
24
|
+
$stderr.puts e
|
25
|
+
$stderr.puts e.backtrace
|
26
|
+
@branch.kill
|
27
|
+
ensure
|
28
|
+
@branch.finish
|
29
|
+
end
|
37
30
|
end
|
38
31
|
|
39
32
|
def run_branch_in_thread(r,w,opts)
|
40
|
-
|
41
|
-
|
42
|
-
@branch.
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
@branch.finish
|
52
|
-
end
|
53
|
-
#end
|
33
|
+
@branch = Branch.new(opts,r,w)
|
34
|
+
begin
|
35
|
+
@branch.run
|
36
|
+
rescue => e
|
37
|
+
Log.fatal e
|
38
|
+
$stderr.puts e
|
39
|
+
$stderr.puts e.backtrace
|
40
|
+
@branch.kill
|
41
|
+
ensure
|
42
|
+
@branch.finish
|
43
|
+
end
|
54
44
|
end
|
55
45
|
|
56
46
|
end
|
@@ -38,6 +38,7 @@ class Communicator
|
|
38
38
|
attr_reader :id, :host, :ncore, :channel
|
39
39
|
attr_reader :reader, :writer, :handler
|
40
40
|
attr_reader :shells
|
41
|
+
attr_reader :ipaddr
|
41
42
|
|
42
43
|
def initialize(set,id,host,ncore,selector,option)
|
43
44
|
@set = set
|
@@ -47,6 +48,7 @@ class Communicator
|
|
47
48
|
@selector = selector
|
48
49
|
@option = option
|
49
50
|
@shells = {}
|
51
|
+
@ipaddr = []
|
50
52
|
end
|
51
53
|
|
52
54
|
def inspect
|
@@ -58,10 +60,9 @@ class Communicator
|
|
58
60
|
CommChannel.new(@host,i,q,@writer,[@ior,@iow,@ioe])
|
59
61
|
end
|
60
62
|
|
61
|
-
def
|
63
|
+
def setup_pipe(worker_code)
|
62
64
|
rb_cmd = "ruby -e 'eval ARGF.read(#{worker_code.size})'"
|
63
|
-
if [
|
64
|
-
#if /^localhost/ =~ @host
|
65
|
+
if %w[127.0.0.1 ::1].include?(IPSocket.getaddress(@host))
|
65
66
|
cmd = rb_cmd
|
66
67
|
else
|
67
68
|
cmd = "ssh -x -T #{@option[:ssh_option]} #{@host} \"#{rb_cmd}\""
|
@@ -74,18 +75,33 @@ class Communicator
|
|
74
75
|
w0.close
|
75
76
|
w1.close
|
76
77
|
r2.close
|
78
|
+
# send worker_code
|
79
|
+
@iow.write(worker_code)
|
80
|
+
end
|
81
|
+
|
82
|
+
def connect(worker_code)
|
83
|
+
setup_pipe(worker_code)
|
84
|
+
|
85
|
+
# send ncore and options
|
86
|
+
opts = Marshal.dump(@option)
|
87
|
+
s = [@ncore||0, opts.size].pack("V2")
|
88
|
+
@iow.write(s)
|
89
|
+
@iow.write(opts)
|
90
|
+
|
77
91
|
sel = @set.selector
|
78
92
|
@reader = NBIO::MultiReader.new(sel,@ior)
|
79
93
|
@rd_err = NBIO::Reader.new(sel,@ioe)
|
80
94
|
@writer = NBIO::Writer.new(sel,@iow)
|
81
95
|
@handler = NBIO::Handler.new(@reader,@writer,@host)
|
82
|
-
|
83
|
-
@writer.write(worker_code)
|
84
|
-
@writer.write(Marshal.dump(@ncore))
|
85
|
-
@writer.write(Marshal.dump(@option))
|
96
|
+
|
86
97
|
# read ncore
|
87
98
|
while s = @reader.get_line
|
88
|
-
|
99
|
+
case s
|
100
|
+
when /^ip:(.*)$/
|
101
|
+
a = $1
|
102
|
+
@ipaddr.push(a)
|
103
|
+
Log.debug "ip=#{a} @#{@host}"
|
104
|
+
when /^ncore:(.*)$/
|
89
105
|
a = $1
|
90
106
|
Log.debug "ncore=#{a} @#{@host}"
|
91
107
|
if /^(\d+)$/ =~ a
|
@@ -134,9 +150,14 @@ class Communicator
|
|
134
150
|
err_out = []
|
135
151
|
begin
|
136
152
|
finish_shells
|
137
|
-
@handler
|
138
|
-
|
139
|
-
|
153
|
+
if @handler
|
154
|
+
@handler.exit
|
155
|
+
@handler = nil
|
156
|
+
end
|
157
|
+
if @rd_err
|
158
|
+
while s = @rd_err.get_line
|
159
|
+
err_out << s
|
160
|
+
end
|
140
161
|
end
|
141
162
|
rescue => e
|
142
163
|
m = Log.bt(e)
|
@@ -11,10 +11,16 @@ class CommunicatorSet
|
|
11
11
|
@selector = selector
|
12
12
|
@option = option
|
13
13
|
@communicators = {}
|
14
|
+
@error_host = []
|
14
15
|
@initial_communicators = []
|
15
16
|
if hb = @option[:heartbeat]
|
16
17
|
@heartbeat_timeout = hb + 15
|
17
18
|
end
|
19
|
+
init_hosts
|
20
|
+
end
|
21
|
+
|
22
|
+
def init_hosts
|
23
|
+
# for pwrake-mpi
|
18
24
|
end
|
19
25
|
|
20
26
|
attr_reader :selector
|
data/lib/pwrake/logger.rb
CHANGED
@@ -4,10 +4,38 @@ module Pwrake
|
|
4
4
|
|
5
5
|
module Log
|
6
6
|
|
7
|
+
@@logger = nil
|
8
|
+
|
7
9
|
module_function
|
8
10
|
|
11
|
+
def set_logger(option)
|
12
|
+
unless @@logger
|
13
|
+
if logdir = option['LOG_DIR']
|
14
|
+
::FileUtils.mkdir_p(logdir)
|
15
|
+
logfile = File.join(logdir, option['LOG_FILE'])
|
16
|
+
@@logger = Logger.new(logfile)
|
17
|
+
else
|
18
|
+
if option['DEBUG']
|
19
|
+
@@logger = Logger.new($stderr)
|
20
|
+
else
|
21
|
+
@@logger = Logger.new(File::NULL)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
if option['DEBUG']
|
26
|
+
@@logger.level = Logger::DEBUG
|
27
|
+
else
|
28
|
+
@@logger.level = Logger::INFO
|
29
|
+
end
|
30
|
+
|
31
|
+
at_exit{@@logger.close}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
9
35
|
def method_missing(meth_id,*args)
|
10
|
-
|
36
|
+
if @@logger
|
37
|
+
@@logger.send(meth_id,*args)
|
38
|
+
end
|
11
39
|
end
|
12
40
|
|
13
41
|
def bt(e)
|