pwrake 2.1.3 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +25 -12
- data/bin/pwrake-mpi +41 -0
- data/bin/pwrake-mpi-run +16 -0
- data/lib/pwrake/branch/branch.rb +17 -29
- data/lib/pwrake/branch/branch_application.rb +31 -41
- data/lib/pwrake/branch/communicator.rb +32 -11
- data/lib/pwrake/branch/communicator_set.rb +6 -0
- data/lib/pwrake/logger.rb +29 -1
- data/lib/pwrake/master/master.rb +51 -64
- data/lib/pwrake/master/master_application.rb +4 -9
- data/lib/pwrake/mpi/branch.rb +76 -0
- data/lib/pwrake/mpi/worker.rb +42 -0
- data/lib/pwrake/nbio.rb +60 -62
- data/lib/pwrake/option/host_map.rb +50 -9
- data/lib/pwrake/option/option.rb +55 -66
- data/lib/pwrake/option/option_default_filesystem.rb +48 -0
- data/lib/pwrake/option/option_gfarm.rb +1 -0
- data/lib/pwrake/option/option_gfarm2fs.rb +101 -0
- data/lib/pwrake/queue/locality_aware_queue.rb +7 -11
- data/lib/pwrake/report/task_stat.rb +4 -5
- data/lib/pwrake/task/task_wrapper.rb +57 -34
- data/lib/pwrake/version.rb +1 -1
- data/lib/pwrake/worker/executor.rb +32 -14
- data/lib/pwrake/worker/invoker.rb +61 -34
- data/lib/pwrake/worker/worker_main.rb +5 -5
- data/lib/pwrake/worker/writer.rb +27 -20
- metadata +11 -5
- data/lib/pwrake/option/option_filesystem.rb +0 -123
- data/lib/pwrake/worker/load.rb +0 -14
- data/lib/pwrake/worker/reader.rb +0 -73
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f781cdfe39747649da0fafb2ebb37e579d294483
|
4
|
+
data.tar.gz: 5985be277367367065e9fef64188fc43ec8a2de4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 847964dd1146902e2f6f5988a26188184845e1b7879ed43a311560266e90d33d3c5febe7bb6727cc2003075b540046028a6cbaea6c28451888866d1e43aa1cfe
|
7
|
+
data.tar.gz: 76af1e8e961e9ff22e1c6ca0626a416eb73f2f43a9ce83c0c4b72a311841fd319b6a99c1fbf9594cbf5a4efafe8dc00833dae772550b705837f58e40f48ad69a
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
Parallel Workflow extension for Rake, runs on multicores, clusters, clouds.
|
4
4
|
* Author: Masahiro Tanaka
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
[README in Japanese](https://github.com/masa16/pwrake/wiki/Pwrakeとは),
|
7
|
+
[GitHub Repository](https://github.com/masa16/pwrake),
|
8
|
+
[RubyGems](https://rubygems.org/gems/pwrake)
|
9
9
|
|
10
10
|
## Features
|
11
11
|
|
@@ -14,7 +14,7 @@ Parallel Workflow extension for Rake, runs on multicores, clusters, clouds.
|
|
14
14
|
* The tasks which do not have mutual dependencies are automatically executed in parallel.
|
15
15
|
* The `multitask` which is a parallel task definition of Rake is no more necessary.
|
16
16
|
* Parallel and distributed execution is possible using a computer cluster which consists of multiple compute nodes.
|
17
|
-
* Cluster settings: SSH login, and the directory sharing using a shared filesystem, e.g., NFS, Gfarm.
|
17
|
+
* Cluster settings: SSH login (or MPI), and the directory sharing using a shared filesystem, e.g., NFS, Gfarm.
|
18
18
|
* Pwrake automatically connects to remote hosts using SSH. You do not need to start a daemon.
|
19
19
|
* Remote host names and the number of cores to use are provided in a hostfile.
|
20
20
|
* [Gfarm file system](http://sourceforge.net/projects/gfarm/) utilizes storage of compute nodes. It provides the high-performance parallel I/O.
|
@@ -68,7 +68,15 @@ In this case, you need the rehash of command paths:
|
|
68
68
|
|
69
69
|
4. Run `pwrake` with an option `--hostfile` or `-F`:
|
70
70
|
|
71
|
-
$ pwrake
|
71
|
+
$ pwrake -F hosts
|
72
|
+
|
73
|
+
### Use MPI to start remote worker
|
74
|
+
|
75
|
+
1. Setup MPI on your cluster.
|
76
|
+
2. Install [MPipe gem](https://rubygems.org/gems/mpipe). (requires `mpicc`)
|
77
|
+
3. Run `pwrake-mpi` command.
|
78
|
+
|
79
|
+
$ pwrake-mpi -F hosts
|
72
80
|
|
73
81
|
## Options
|
74
82
|
|
@@ -115,8 +123,6 @@ In this case, you need the rehash of command paths:
|
|
115
123
|
WORK_DIR default=$PWD
|
116
124
|
FILESYSTEM default(autodetect)|gfarm
|
117
125
|
SSH_OPTION SSH option
|
118
|
-
SHELL_COMMAND default=$SHELL
|
119
|
-
SHELL_RC Run-Command when shell starts
|
120
126
|
PASS_ENV (Array) Environment variables passed to SSH
|
121
127
|
HEARTBEAT default=240 - Hearbeat interval in seconds
|
122
128
|
RETRY default=1 - The number of retry
|
@@ -177,7 +183,7 @@ Properties (The leftmost item is default):
|
|
177
183
|
|
178
184
|
gem install ffi
|
179
185
|
|
180
|
-
##
|
186
|
+
## Scheduling with Graph Partitioning
|
181
187
|
|
182
188
|
* Compile and Install METIS 5.1.0 (http://www.cs.umn.edu/~metis/). This requires CMake.
|
183
189
|
|
@@ -187,15 +193,22 @@ Properties (The leftmost item is default):
|
|
187
193
|
--with-metis-include=/usr/local/include \
|
188
194
|
--with-metis-lib=/usr/local/lib
|
189
195
|
|
196
|
+
* Option (`pwrake_conf.yaml`):
|
197
|
+
|
198
|
+
GRAPH_PARTITION: true
|
199
|
+
|
200
|
+
* See publication: [M. Tanaka and O. Tatebe, “Workflow Scheduling to Minimize Data Movement Using Multi-constraint Graph Partitioning,” in CCGrid 2012](http://ieeexplore.ieee.org/abstract/document/6217406/)
|
201
|
+
|
190
202
|
## Current version
|
191
203
|
|
192
|
-
* Pwrake version 2.
|
204
|
+
* Pwrake version 2.2.0
|
193
205
|
|
194
206
|
## Tested Platform
|
195
207
|
|
196
|
-
|
197
|
-
*
|
198
|
-
*
|
208
|
+
|
209
|
+
* Ruby 2.4.0
|
210
|
+
* Rake 12.0.0
|
211
|
+
* CentOS 7.3
|
199
212
|
|
200
213
|
## Acknowledgment
|
201
214
|
|
data/bin/pwrake-mpi
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rake'
|
5
|
+
rescue LoadError
|
6
|
+
require 'rubygems'
|
7
|
+
require 'rake'
|
8
|
+
end
|
9
|
+
|
10
|
+
libpath = File.absolute_path(File.dirname(__FILE__))+"/../lib"
|
11
|
+
$LOAD_PATH.unshift libpath
|
12
|
+
|
13
|
+
require "pwrake/version"
|
14
|
+
require "pwrake/master/master_application"
|
15
|
+
require "shellwords"
|
16
|
+
|
17
|
+
module Pwrake
|
18
|
+
module MasterApplication
|
19
|
+
def run
|
20
|
+
standard_exception_handling do
|
21
|
+
init("pwrake") # <- parse options here
|
22
|
+
opts = Option.new
|
23
|
+
hosts = opts.host_map.map{|b,a| a.map{|h| h.name}}.flatten
|
24
|
+
if opts['MASTER_IS_FIRST_HOST']
|
25
|
+
[hosts[0],*hosts]
|
26
|
+
else
|
27
|
+
[Socket.gethostname,*hosts]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end;end
|
32
|
+
|
33
|
+
class Rake::Application
|
34
|
+
prepend Pwrake::MasterApplication
|
35
|
+
end
|
36
|
+
|
37
|
+
hosts = Rake.application.run.join(',')
|
38
|
+
args = ARGV.map{|x| Shellwords.escape(x)}.join(" ")
|
39
|
+
|
40
|
+
cmd="mpirun -wdir \"$HOME\" -host #{hosts} pwrake-mpi-run \"$PWD\" #{args}"
|
41
|
+
exec cmd
|
data/bin/pwrake-mpi-run
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
libpath = File.absolute_path(File.dirname(__FILE__))+"/../lib"
|
4
|
+
$LOAD_PATH.unshift libpath
|
5
|
+
|
6
|
+
require "mpipe"
|
7
|
+
MPipe.init
|
8
|
+
|
9
|
+
wdir = ARGV.shift
|
10
|
+
|
11
|
+
if MPipe::Comm.rank == 0
|
12
|
+
Dir.chdir(wdir)
|
13
|
+
require "pwrake/mpi/branch"
|
14
|
+
else
|
15
|
+
require "pwrake/mpi/worker"
|
16
|
+
end
|
data/lib/pwrake/branch/branch.rb
CHANGED
@@ -9,6 +9,12 @@ module Pwrake
|
|
9
9
|
|
10
10
|
class Branch
|
11
11
|
|
12
|
+
@@io_class = IO
|
13
|
+
|
14
|
+
def self.io_class=(io_class)
|
15
|
+
@@io_class = io_class
|
16
|
+
end
|
17
|
+
|
12
18
|
def initialize(opts,r,w)
|
13
19
|
Thread.abort_on_exception = true
|
14
20
|
@option = opts
|
@@ -16,10 +22,17 @@ module Pwrake
|
|
16
22
|
@shells = []
|
17
23
|
@ior = r
|
18
24
|
@iow = w
|
19
|
-
@selector = NBIO::Selector.new
|
25
|
+
@selector = NBIO::Selector.new(@@io_class)
|
20
26
|
@master_rd = NBIO::Reader.new(@selector,@ior)
|
21
27
|
@master_wt = NBIO::Writer.new(@selector,@iow)
|
22
28
|
@shell_start_interval = @option['SHELL_START_INTERVAL']
|
29
|
+
|
30
|
+
# init_logger
|
31
|
+
Log.set_logger(@option)
|
32
|
+
if dir = @option['LOG_DIR']
|
33
|
+
fn = File.join(dir,@option["COMMAND_CSV_FILE"])
|
34
|
+
Shell.profiler.open(fn,@option['GNU_TIME'],@option['PLOT_PARALLELISM'])
|
35
|
+
end
|
23
36
|
end
|
24
37
|
|
25
38
|
# Rakefile is loaded after 'init' before 'run'
|
@@ -33,34 +46,6 @@ module Pwrake
|
|
33
46
|
Log.debug "Branch#run end"
|
34
47
|
end
|
35
48
|
|
36
|
-
attr_reader :logger
|
37
|
-
|
38
|
-
def init_logger
|
39
|
-
if dir = @option['LOG_DIR']
|
40
|
-
logfile = File.join(dir,@option['LOG_FILE'])
|
41
|
-
@logger = Logger.new(logfile)
|
42
|
-
else
|
43
|
-
if @option['DEBUG']
|
44
|
-
@logger = Logger.new($stderr)
|
45
|
-
else
|
46
|
-
@logger = Logger.new(File::NULL)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
if @option['DEBUG']
|
51
|
-
@logger.level = Logger::DEBUG
|
52
|
-
elsif @option['TRACE']
|
53
|
-
@logger.level = Logger::INFO
|
54
|
-
else
|
55
|
-
@logger.level = Logger::WARN
|
56
|
-
end
|
57
|
-
|
58
|
-
if dir = @option['LOG_DIR']
|
59
|
-
fn = File.join(dir,@option["COMMAND_CSV_FILE"])
|
60
|
-
Shell.profiler.open(fn,@option['GNU_TIME'],@option['PLOT_PARALLELISM'])
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
49
|
def setup_worker
|
65
50
|
@cs = CommunicatorSet.new(@master_rd,@selector,@option.worker_option)
|
66
51
|
@cs.create_communicators
|
@@ -76,6 +61,9 @@ module Pwrake
|
|
76
61
|
@cs.each_value do |comm|
|
77
62
|
# set WorkerChannel#ncore at Master
|
78
63
|
@master_wt.put_line "ncore:#{comm.id}:#{comm.ncore}"
|
64
|
+
comm.ipaddr.each do |ipa|
|
65
|
+
@master_wt.put_line "ip:#{comm.id}:#{ipa}"
|
66
|
+
end
|
79
67
|
end
|
80
68
|
@master_wt.put_line "ncore:done"
|
81
69
|
end.resume
|
@@ -6,51 +6,41 @@ module Pwrake
|
|
6
6
|
# The TaskManager module is a mixin for managing tasks.
|
7
7
|
module BranchApplication
|
8
8
|
|
9
|
-
def logger
|
10
|
-
@branch.logger
|
11
|
-
end
|
12
|
-
|
13
9
|
def run_branch(r,w)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
@branch.finish
|
35
|
-
end
|
36
|
-
#end
|
10
|
+
init("pwrake_branch")
|
11
|
+
opts = Marshal.load(r)
|
12
|
+
if !opts.kind_of?(Hash)
|
13
|
+
raise "opts is not a Hash: opts=#{opts.inspect}"
|
14
|
+
end
|
15
|
+
@branch = Branch.new(opts,r,w)
|
16
|
+
opts.feedback_options
|
17
|
+
load_rakefile
|
18
|
+
w.puts "pwrake_branch start"
|
19
|
+
w.flush
|
20
|
+
begin
|
21
|
+
@branch.run
|
22
|
+
rescue => e
|
23
|
+
Log.fatal e
|
24
|
+
$stderr.puts e
|
25
|
+
$stderr.puts e.backtrace
|
26
|
+
@branch.kill
|
27
|
+
ensure
|
28
|
+
@branch.finish
|
29
|
+
end
|
37
30
|
end
|
38
31
|
|
39
32
|
def run_branch_in_thread(r,w,opts)
|
40
|
-
|
41
|
-
|
42
|
-
@branch.
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
@branch.finish
|
52
|
-
end
|
53
|
-
#end
|
33
|
+
@branch = Branch.new(opts,r,w)
|
34
|
+
begin
|
35
|
+
@branch.run
|
36
|
+
rescue => e
|
37
|
+
Log.fatal e
|
38
|
+
$stderr.puts e
|
39
|
+
$stderr.puts e.backtrace
|
40
|
+
@branch.kill
|
41
|
+
ensure
|
42
|
+
@branch.finish
|
43
|
+
end
|
54
44
|
end
|
55
45
|
|
56
46
|
end
|
@@ -38,6 +38,7 @@ class Communicator
|
|
38
38
|
attr_reader :id, :host, :ncore, :channel
|
39
39
|
attr_reader :reader, :writer, :handler
|
40
40
|
attr_reader :shells
|
41
|
+
attr_reader :ipaddr
|
41
42
|
|
42
43
|
def initialize(set,id,host,ncore,selector,option)
|
43
44
|
@set = set
|
@@ -47,6 +48,7 @@ class Communicator
|
|
47
48
|
@selector = selector
|
48
49
|
@option = option
|
49
50
|
@shells = {}
|
51
|
+
@ipaddr = []
|
50
52
|
end
|
51
53
|
|
52
54
|
def inspect
|
@@ -58,10 +60,9 @@ class Communicator
|
|
58
60
|
CommChannel.new(@host,i,q,@writer,[@ior,@iow,@ioe])
|
59
61
|
end
|
60
62
|
|
61
|
-
def
|
63
|
+
def setup_pipe(worker_code)
|
62
64
|
rb_cmd = "ruby -e 'eval ARGF.read(#{worker_code.size})'"
|
63
|
-
if [
|
64
|
-
#if /^localhost/ =~ @host
|
65
|
+
if %w[127.0.0.1 ::1].include?(IPSocket.getaddress(@host))
|
65
66
|
cmd = rb_cmd
|
66
67
|
else
|
67
68
|
cmd = "ssh -x -T #{@option[:ssh_option]} #{@host} \"#{rb_cmd}\""
|
@@ -74,18 +75,33 @@ class Communicator
|
|
74
75
|
w0.close
|
75
76
|
w1.close
|
76
77
|
r2.close
|
78
|
+
# send worker_code
|
79
|
+
@iow.write(worker_code)
|
80
|
+
end
|
81
|
+
|
82
|
+
def connect(worker_code)
|
83
|
+
setup_pipe(worker_code)
|
84
|
+
|
85
|
+
# send ncore and options
|
86
|
+
opts = Marshal.dump(@option)
|
87
|
+
s = [@ncore||0, opts.size].pack("V2")
|
88
|
+
@iow.write(s)
|
89
|
+
@iow.write(opts)
|
90
|
+
|
77
91
|
sel = @set.selector
|
78
92
|
@reader = NBIO::MultiReader.new(sel,@ior)
|
79
93
|
@rd_err = NBIO::Reader.new(sel,@ioe)
|
80
94
|
@writer = NBIO::Writer.new(sel,@iow)
|
81
95
|
@handler = NBIO::Handler.new(@reader,@writer,@host)
|
82
|
-
|
83
|
-
@writer.write(worker_code)
|
84
|
-
@writer.write(Marshal.dump(@ncore))
|
85
|
-
@writer.write(Marshal.dump(@option))
|
96
|
+
|
86
97
|
# read ncore
|
87
98
|
while s = @reader.get_line
|
88
|
-
|
99
|
+
case s
|
100
|
+
when /^ip:(.*)$/
|
101
|
+
a = $1
|
102
|
+
@ipaddr.push(a)
|
103
|
+
Log.debug "ip=#{a} @#{@host}"
|
104
|
+
when /^ncore:(.*)$/
|
89
105
|
a = $1
|
90
106
|
Log.debug "ncore=#{a} @#{@host}"
|
91
107
|
if /^(\d+)$/ =~ a
|
@@ -134,9 +150,14 @@ class Communicator
|
|
134
150
|
err_out = []
|
135
151
|
begin
|
136
152
|
finish_shells
|
137
|
-
@handler
|
138
|
-
|
139
|
-
|
153
|
+
if @handler
|
154
|
+
@handler.exit
|
155
|
+
@handler = nil
|
156
|
+
end
|
157
|
+
if @rd_err
|
158
|
+
while s = @rd_err.get_line
|
159
|
+
err_out << s
|
160
|
+
end
|
140
161
|
end
|
141
162
|
rescue => e
|
142
163
|
m = Log.bt(e)
|
@@ -11,10 +11,16 @@ class CommunicatorSet
|
|
11
11
|
@selector = selector
|
12
12
|
@option = option
|
13
13
|
@communicators = {}
|
14
|
+
@error_host = []
|
14
15
|
@initial_communicators = []
|
15
16
|
if hb = @option[:heartbeat]
|
16
17
|
@heartbeat_timeout = hb + 15
|
17
18
|
end
|
19
|
+
init_hosts
|
20
|
+
end
|
21
|
+
|
22
|
+
def init_hosts
|
23
|
+
# for pwrake-mpi
|
18
24
|
end
|
19
25
|
|
20
26
|
attr_reader :selector
|
data/lib/pwrake/logger.rb
CHANGED
@@ -4,10 +4,38 @@ module Pwrake
|
|
4
4
|
|
5
5
|
module Log
|
6
6
|
|
7
|
+
@@logger = nil
|
8
|
+
|
7
9
|
module_function
|
8
10
|
|
11
|
+
def set_logger(option)
|
12
|
+
unless @@logger
|
13
|
+
if logdir = option['LOG_DIR']
|
14
|
+
::FileUtils.mkdir_p(logdir)
|
15
|
+
logfile = File.join(logdir, option['LOG_FILE'])
|
16
|
+
@@logger = Logger.new(logfile)
|
17
|
+
else
|
18
|
+
if option['DEBUG']
|
19
|
+
@@logger = Logger.new($stderr)
|
20
|
+
else
|
21
|
+
@@logger = Logger.new(File::NULL)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
if option['DEBUG']
|
26
|
+
@@logger.level = Logger::DEBUG
|
27
|
+
else
|
28
|
+
@@logger.level = Logger::INFO
|
29
|
+
end
|
30
|
+
|
31
|
+
at_exit{@@logger.close}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
9
35
|
def method_missing(meth_id,*args)
|
10
|
-
|
36
|
+
if @@logger
|
37
|
+
@@logger.send(meth_id,*args)
|
38
|
+
end
|
11
39
|
end
|
12
40
|
|
13
41
|
def bt(e)
|