pwrake 0.9.5 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +83 -4
- data/lib/pwrake/application.rb +34 -0
- data/lib/pwrake/file_utils.rb +43 -16
- data/lib/pwrake/gfarm_feature.rb +79 -40
- data/lib/pwrake/locality_aware_queue.rb +141 -111
- data/lib/pwrake/logger.rb +7 -1
- data/lib/pwrake/master.rb +21 -2
- data/lib/pwrake/option.rb +106 -56
- data/lib/pwrake/profiler.rb +44 -12
- data/lib/pwrake/report/parallelism.rb +262 -0
- data/lib/pwrake/report/report.rb +355 -0
- data/lib/pwrake/report/report_multi.rb +196 -0
- data/lib/pwrake/report/stat.rb +115 -0
- data/lib/pwrake/report.rb +6 -0
- data/lib/pwrake/shell.rb +30 -10
- data/lib/pwrake/task_algorithm.rb +174 -50
- data/lib/pwrake/task_queue.rb +130 -50
- data/lib/pwrake/version.rb +1 -1
- data/spec/004/Rakefile +1 -1
- data/spec/009/pwrake_conf.yaml +1 -0
- data/spec/011/Rakefile +15 -0
- data/spec/helper.rb +7 -3
- data/spec/pwrake_spec.rb +12 -0
- metadata +15 -9
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6779de017cf5e05367aecc0f5fef0f12bfd4c54d
|
4
|
+
data.tar.gz: f9aa0b823cf6d147ae595c86643859ebb283992c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: df46550084799df1303f17a4dafbe5ee7ed2e2589a165bb2c340f37d4972b37a195da8d3f03922a79cca1c21f9ddb794a4042fa085437820d4093a24cfccd8d3
|
7
|
+
data.tar.gz: 992425a4aef9202594598b66c18f42dbd3d7d06a50f77315fa4252f72e0e6c9e483965f28ad1c166c3475956758d2e17bbbd2700d7837423682e862a0f73e008
|
data/README.md
CHANGED
@@ -3,6 +3,9 @@
|
|
3
3
|
Parallel workflow extension for Rake
|
4
4
|
* Author: Masahiro Tanaka
|
5
5
|
|
6
|
+
([日本語README](https://github.com/masa16/pwrake/wiki/Pwrake.ja)),
|
7
|
+
([GitHub Repository](https://github.com/masa16/pwrake))
|
8
|
+
|
6
9
|
## Features
|
7
10
|
|
8
11
|
* Parallelize all tasks; no need to modify Rakefile, no need to use `multitask`.
|
@@ -47,11 +50,87 @@ Or, gem install:
|
|
47
50
|
|
48
51
|
$ pwrake --hostfile=hosts
|
49
52
|
|
53
|
+
## Options
|
54
|
+
|
55
|
+
### Command line option
|
56
|
+
|
57
|
+
-F, --hostfile FILE [Pw] Read hostnames from FILE
|
58
|
+
-j, --jobs [N] [Pw] Number of threads at localhost (default: # of processors)
|
59
|
+
-L, --logfile [FILE] [Pw] Write log to FILE
|
60
|
+
--ssh-opt, --ssh-option OPTION
|
61
|
+
[Pw] Option passed to SSH
|
62
|
+
--filesystem FILESYSTEM [Pw] Specify FILESYSTEM (nfs|gfarm)
|
63
|
+
--gfarm [Pw] FILESYSTEM=gfarm
|
64
|
+
-A, --disable-affinity [Pw] Turn OFF affinity (AFFINITY=off)
|
65
|
+
-S, --disable-steal [Pw] Turn OFF task steal
|
66
|
+
-d, --debug [Pw] Output Debug messages
|
67
|
+
--pwrake-conf [FILE] [Pw] Pwrake configuation file in YAML
|
68
|
+
--show-conf, --show-config [Pw] Show Pwrake configuration options
|
69
|
+
--report LOG [Pw] Report profile HTML from LOG and exit.
|
70
|
+
|
71
|
+
### pwrake_conf.yaml
|
72
|
+
|
73
|
+
* If `pwrake_conf.yaml` exists at current directory, Pwrake reads options from it.
|
74
|
+
* Example (in YAML form):
|
75
|
+
|
76
|
+
HOSTFILE : hosts
|
77
|
+
LOGFILE : true
|
78
|
+
TASKLOG : true
|
79
|
+
PROFILE : true
|
80
|
+
GNU_TIME : true
|
81
|
+
PLOT_PARALLELISM : true
|
82
|
+
DISABLE_AFFINITY: true
|
83
|
+
DISABLE_STEAL: true
|
84
|
+
FAILED_TARGET : delete
|
85
|
+
PASS_ENV :
|
86
|
+
- ENV1
|
87
|
+
- ENV2
|
88
|
+
|
89
|
+
* Option list:
|
90
|
+
|
91
|
+
HOSTFILE, HOSTS default=false
|
92
|
+
LOGFILE, LOG default=none, string=filename, true="Pwrake%Y%m%d-%H%M%S_%$.log"
|
93
|
+
TASKLOG default=none, string=filename, true="Pwrake%Y%m%d-%H%M%S_%$.task"
|
94
|
+
PROFILE default=none, string=filename, true="Pwrake%Y%m%d-%H%M%S_%$.csv"
|
95
|
+
WORK_DIR default=$PWD
|
96
|
+
FILESYSTEM default=nil (autodetect)
|
97
|
+
SSH_OPTION (String) SSH option
|
98
|
+
PASS_ENV (Array) Environment variables passed to SSH
|
99
|
+
GNU_TIME If true, obtains PROFILEs using GNU time
|
100
|
+
PLOT_PARALLELISM If true, plot parallelism using GNUPLOT
|
101
|
+
FAILED_TARGET ( rename(default) | delete | leave ) failed files
|
102
|
+
QUEUE_PRIORITY ( DFS(default) | FIFO )
|
103
|
+
|
104
|
+
for Gfarm system:
|
105
|
+
|
106
|
+
DISABLE_AFFINITY default=false
|
107
|
+
DISABLE_STEAL default=false
|
108
|
+
STEAL_WAIT default=0 (sec)
|
109
|
+
STEAL_WAIT_MAX default=10 (sec)
|
110
|
+
: Wait min(STEAL_WAIT*2**n, STEAL_WAIT_MAX) sec for task steal.
|
111
|
+
GFARM_BASEDIR default="/tmp"
|
112
|
+
GFARM_PREFIX default="pwrake_$USER"
|
113
|
+
GFARM_SUBDIR default='/'
|
114
|
+
|
115
|
+
## Note for Gfarm
|
116
|
+
|
117
|
+
* `gfwhere-pipe` command is required for file-affinity scheduling.
|
118
|
+
|
119
|
+
wget https://gist.github.com/masa16/5787473/raw/6df5deeb80a4cea6b9d1d1ce01f390f65d650717/gfwhere-pipe.patch
|
120
|
+
cd gfarm-2.5.8.1
|
121
|
+
patch -p1 < ../gfwhere-pipe.patch
|
122
|
+
./configure --prefix=...
|
123
|
+
make
|
124
|
+
make install
|
125
|
+
|
50
126
|
## Tested Platform
|
51
127
|
|
52
|
-
* Ruby
|
53
|
-
* Rake 0.9.
|
54
|
-
*
|
128
|
+
* Ruby 2.0.0
|
129
|
+
* Rake 0.9.6
|
130
|
+
* CentOS 6.4
|
55
131
|
|
56
|
-
##
|
132
|
+
## Acknowledgment
|
57
133
|
|
134
|
+
This work is supported by
|
135
|
+
* JST CREST, research area: "Development of System Software Technologies for Post-Peta Scale High Performance Computing," and
|
136
|
+
* MEXT Promotion of Research for Next Generation IT Infrastructure "Resources Linkage for e-Science (RENKEI)."
|
data/lib/pwrake/application.rb
CHANGED
@@ -17,10 +17,24 @@ module Pwrake
|
|
17
17
|
|
18
18
|
class Application < ::Rake::Application
|
19
19
|
|
20
|
+
def filesystem
|
21
|
+
@master.filesystem
|
22
|
+
end
|
23
|
+
|
20
24
|
def task_queue
|
21
25
|
@master.task_queue
|
22
26
|
end
|
23
27
|
|
28
|
+
def finish_queue
|
29
|
+
@master.finish_queue
|
30
|
+
end
|
31
|
+
|
32
|
+
def postprocess(t)
|
33
|
+
if @master.postprocess
|
34
|
+
@master.postprocess.postprocess(t)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
24
38
|
def thread_loop(*args)
|
25
39
|
@master.thread_loop(*args)
|
26
40
|
end
|
@@ -33,6 +47,18 @@ module Pwrake
|
|
33
47
|
@master.start
|
34
48
|
end
|
35
49
|
|
50
|
+
def core_list
|
51
|
+
@master.core_list
|
52
|
+
end
|
53
|
+
|
54
|
+
def task_logger
|
55
|
+
@master.task_logger
|
56
|
+
end
|
57
|
+
|
58
|
+
def task_id_counter
|
59
|
+
@master.task_id_counter
|
60
|
+
end
|
61
|
+
|
36
62
|
# Run the Pwrake application.
|
37
63
|
def run
|
38
64
|
standard_exception_handling do
|
@@ -144,7 +170,15 @@ module Pwrake
|
|
144
170
|
['--show-conf','--show-config',
|
145
171
|
"[Pw] Show Pwrake configuration options",
|
146
172
|
lambda {|value| options.show_conf = true }
|
173
|
+
],
|
174
|
+
['--report LOG', "[Pw] Report profile HTML from LOG and exit.",
|
175
|
+
lambda { |value|
|
176
|
+
require 'pwrake/report'
|
177
|
+
Report.new(File.basename(value.sub(/\.[^.]+$/,"")),[]).report_html
|
178
|
+
exit
|
179
|
+
}
|
147
180
|
]
|
181
|
+
|
148
182
|
])
|
149
183
|
opts
|
150
184
|
end
|
data/lib/pwrake/file_utils.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
+
module Pwrake
|
1
2
|
module FileUtils
|
2
|
-
|
3
|
-
alias sh_orig :sh
|
3
|
+
module_function
|
4
4
|
|
5
5
|
def sh(*cmd, &block)
|
6
6
|
options = (Hash === cmd.last) ? cmd.pop : {}
|
@@ -17,14 +17,38 @@ module FileUtils
|
|
17
17
|
options[:verbose] ||= RakeFileUtils.verbose_flag
|
18
18
|
end
|
19
19
|
options[:noop] ||= RakeFileUtils.nowrite_flag
|
20
|
-
rake_check_options options, :noop, :verbose
|
21
|
-
Pwrake::Log.
|
20
|
+
Rake.rake_check_options options, :noop, :verbose
|
21
|
+
Pwrake::Log.stderr_puts cmd.join(" ") if options[:verbose]
|
22
22
|
unless options[:noop]
|
23
|
-
res,status = pwrake_system(*cmd)
|
23
|
+
res,status = Pwrake::FileUtils.pwrake_system(*cmd)
|
24
24
|
block.call(res, status)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
+
def bq(*cmd, &block)
|
29
|
+
options = (Hash === cmd.last) ? cmd.pop : {}
|
30
|
+
unless block_given?
|
31
|
+
show_command = cmd.join(" ")
|
32
|
+
show_command = show_command[0,42] + "..."
|
33
|
+
block = lambda { |ok, status|
|
34
|
+
ok or fail "Command failed with status (#{status.exitstatus}): [#{show_command}]"
|
35
|
+
}
|
36
|
+
end
|
37
|
+
if RakeFileUtils.verbose_flag == :default
|
38
|
+
options[:verbose] = true
|
39
|
+
else
|
40
|
+
options[:verbose] ||= RakeFileUtils.verbose_flag
|
41
|
+
end
|
42
|
+
options[:noop] ||= RakeFileUtils.nowrite_flag
|
43
|
+
Rake.rake_check_options options, :noop, :verbose
|
44
|
+
Pwrake::Log.stderr_puts cmd.join(" ") if options[:verbose]
|
45
|
+
unless options[:noop]
|
46
|
+
res,status = Pwrake::FileUtils.pwrake_backquote(*cmd)
|
47
|
+
block.call(res, status)
|
48
|
+
end
|
49
|
+
res
|
50
|
+
end
|
51
|
+
|
28
52
|
def pwrake_system(*cmd)
|
29
53
|
cmd_log = cmd.join(" ").inspect
|
30
54
|
tm = Pwrake::Timer.new("sh",cmd_log)
|
@@ -42,8 +66,6 @@ module FileUtils
|
|
42
66
|
tm.finish("status=%s cmd=%s"%[status.exitstatus,cmd_log])
|
43
67
|
[res,status]
|
44
68
|
end
|
45
|
-
private :pwrake_system
|
46
|
-
|
47
69
|
|
48
70
|
# Pwrake version of backquote command
|
49
71
|
def pwrake_backquote(cmd)
|
@@ -53,18 +75,23 @@ module FileUtils
|
|
53
75
|
conn = Pwrake.current_shell
|
54
76
|
if conn.kind_of?(Pwrake::Shell)
|
55
77
|
res = conn.backquote(*cmd)
|
56
|
-
status = conn.status
|
78
|
+
status = Rake::PseudoStatus.new(conn.status)
|
57
79
|
else
|
58
80
|
res = `#{cmd}`
|
59
|
-
|
60
|
-
|
61
|
-
else
|
62
|
-
status = $?.exitstatus
|
63
|
-
end
|
81
|
+
status = $?
|
82
|
+
status = Rake::PseudoStatus.new(1) if status.nil?
|
64
83
|
end
|
65
84
|
|
66
|
-
tm.finish("status=%s cmd=%s"%[status,cmd_log])
|
67
|
-
res
|
85
|
+
tm.finish("status=%s cmd=%s"%[status.exitstatus,cmd_log])
|
86
|
+
[res,status]
|
68
87
|
end
|
69
88
|
|
70
|
-
end # module FileUtils
|
89
|
+
end # module Pwrake::FileUtils
|
90
|
+
end
|
91
|
+
|
92
|
+
module Rake
|
93
|
+
module DSL
|
94
|
+
include Pwrake::FileUtils
|
95
|
+
private(*Pwrake::FileUtils.instance_methods(false))
|
96
|
+
end
|
97
|
+
end
|
data/lib/pwrake/gfarm_feature.rb
CHANGED
@@ -87,7 +87,20 @@ module Pwrake
|
|
87
87
|
x.to_s
|
88
88
|
end
|
89
89
|
|
90
|
+
def gfpath(file='.')
|
91
|
+
begin
|
92
|
+
IO.popen("gfstat '#{file}'") do |f|
|
93
|
+
if /File: "([^"]+)"/ =~ f.gets #"
|
94
|
+
return $1
|
95
|
+
end
|
96
|
+
end
|
97
|
+
rescue
|
98
|
+
end
|
99
|
+
nil
|
100
|
+
end
|
101
|
+
|
90
102
|
def gfwhere(list)
|
103
|
+
system "sync"
|
91
104
|
result = {}
|
92
105
|
count = 0
|
93
106
|
cmd = "gfwhere"
|
@@ -95,7 +108,7 @@ module Pwrake
|
|
95
108
|
if count==1
|
96
109
|
result[cmd[8..-1]] = x.split
|
97
110
|
else
|
98
|
-
x.scan(/^([^\n]+):\n([^\n]*)$/m) do |file,hosts|
|
111
|
+
x.scan(/^(?:gfarm:\/\/[^\/]+)?([^\n]+):\n([^\n]*)$/m) do |file,hosts|
|
99
112
|
h = hosts.split
|
100
113
|
result[file] = h if !h.empty?
|
101
114
|
end
|
@@ -106,7 +119,7 @@ module Pwrake
|
|
106
119
|
if a
|
107
120
|
path = local_to_fs(a)
|
108
121
|
if cmd.size + path.size + 1 > 20480 # 131000
|
109
|
-
x = `#{cmd}`
|
122
|
+
x = `#{cmd} 2> /dev/null`
|
110
123
|
parse_proc.call(x)
|
111
124
|
cmd = "gfwhere"
|
112
125
|
count = 0
|
@@ -117,7 +130,7 @@ module Pwrake
|
|
117
130
|
end
|
118
131
|
end
|
119
132
|
if count > 0
|
120
|
-
x = `#{cmd}`
|
133
|
+
x = `#{cmd} 2> /dev/null`
|
121
134
|
parse_proc.call(x)
|
122
135
|
end
|
123
136
|
result
|
@@ -153,16 +166,22 @@ module Pwrake
|
|
153
166
|
open(system_cmd)
|
154
167
|
cd
|
155
168
|
if not _system "test -d #{@remote_mountpoint}"
|
156
|
-
_system "mkdir -p #{@remote_mountpoint}"
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
_system "
|
169
|
+
_system "mkdir -p #{@remote_mountpoint}" or die
|
170
|
+
else
|
171
|
+
lines = _backquote("sync; mount")
|
172
|
+
if /#{@remote_mountpoint} (?:type )?(\S+)/om =~ lines
|
173
|
+
_system "sync; fusermount -u #{@remote_mountpoint}"
|
174
|
+
_system "sync"
|
162
175
|
end
|
163
176
|
end
|
177
|
+
subdir = GfarmPath.subdir
|
178
|
+
if ["/","",nil].include?(subdir)
|
179
|
+
_system "gfarm2fs #{@remote_mountpoint}"
|
180
|
+
else
|
181
|
+
_system "gfarm2fs -o modules=subdir,subdir=#{subdir} #{@remote_mountpoint}"
|
182
|
+
end
|
164
183
|
path = ENV['PATH'].gsub( /#{GfarmPath.mountpoint}/, @remote_mountpoint )
|
165
|
-
_system "export PATH=#{path}"
|
184
|
+
_system "export PATH=#{path}" or die
|
166
185
|
cd_work_dir
|
167
186
|
end
|
168
187
|
|
@@ -185,45 +204,65 @@ module Pwrake
|
|
185
204
|
end
|
186
205
|
|
187
206
|
|
188
|
-
class
|
207
|
+
class GfarmPostprocess
|
189
208
|
|
190
|
-
def
|
191
|
-
|
192
|
-
|
193
|
-
|
209
|
+
def initialize
|
210
|
+
@lock = Mutex.new
|
211
|
+
@io = IO.popen('gfwhere-pipe','r+')
|
212
|
+
@io.sync = true
|
213
|
+
end
|
214
|
+
|
215
|
+
def gfwhere(file)
|
216
|
+
return [] if file==''
|
217
|
+
@lock.synchronize do
|
218
|
+
@io.puts(file)
|
219
|
+
@io.flush
|
220
|
+
s = @io.gets
|
221
|
+
if s.nil?
|
222
|
+
raise "gfwhere: unexpected end"
|
223
|
+
end
|
224
|
+
s.chomp!
|
225
|
+
if s != file
|
226
|
+
raise "gfwhere: file=#{file}, result=#{s}"
|
227
|
+
end
|
228
|
+
while s = @io.gets
|
229
|
+
s.chomp!
|
230
|
+
case s
|
231
|
+
when ""
|
232
|
+
next
|
233
|
+
when /^gfarm:\/\//
|
234
|
+
next
|
235
|
+
when /^Error:/
|
236
|
+
return []
|
237
|
+
else
|
238
|
+
return s.split(/\s+/)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
194
242
|
end
|
195
243
|
|
196
|
-
def
|
197
|
-
if
|
198
|
-
|
199
|
-
return tasks
|
244
|
+
def postprocess(t)
|
245
|
+
if t.kind_of? Rake::FileTask
|
246
|
+
t.location = gfwhere(t.name)
|
200
247
|
end
|
248
|
+
end
|
201
249
|
|
202
|
-
|
203
|
-
|
204
|
-
#if Pwrake.manager.gfarm and Pwrake.manager.affinity
|
205
|
-
gfwhere_result = {}
|
206
|
-
filenames = []
|
250
|
+
def postprocess_bulk(tasks)
|
251
|
+
list = []
|
207
252
|
tasks.each do |t|
|
208
|
-
|
209
|
-
name = t.prerequisites[0] and
|
210
|
-
!filenames.include?(name)
|
211
|
-
filenames << name
|
212
|
-
end
|
253
|
+
list << t.name if t.kind_of? Rake::FileTask
|
213
254
|
end
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
255
|
+
if !list.empty?
|
256
|
+
Log.info "-- after_check: size=#{list.size} #{list.inspect}"
|
257
|
+
gfwhere_result = GfarmPath.gfwhere(list)
|
258
|
+
tasks.each do |t|
|
259
|
+
if t.kind_of? Rake::FileTask
|
260
|
+
t.location = gfwhere_result[GfarmPath.local_to_fs(t.name)]
|
261
|
+
end
|
262
|
+
end
|
263
|
+
#puts "'#{self.name}' exist? => #{File.exist?(self.name)} loc => #{loc}"
|
222
264
|
end
|
223
|
-
Log.info "-- GfarmQueue#where %.6fs %s" % [Time.now-start_time,abr_msg(filenames)]
|
224
|
-
tasks
|
225
265
|
end
|
226
266
|
|
227
267
|
end
|
228
|
-
|
229
268
|
end
|