pwrake 2.1.3 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +25 -12
- data/bin/pwrake-mpi +41 -0
- data/bin/pwrake-mpi-run +16 -0
- data/lib/pwrake/branch/branch.rb +17 -29
- data/lib/pwrake/branch/branch_application.rb +31 -41
- data/lib/pwrake/branch/communicator.rb +32 -11
- data/lib/pwrake/branch/communicator_set.rb +6 -0
- data/lib/pwrake/logger.rb +29 -1
- data/lib/pwrake/master/master.rb +51 -64
- data/lib/pwrake/master/master_application.rb +4 -9
- data/lib/pwrake/mpi/branch.rb +76 -0
- data/lib/pwrake/mpi/worker.rb +42 -0
- data/lib/pwrake/nbio.rb +60 -62
- data/lib/pwrake/option/host_map.rb +50 -9
- data/lib/pwrake/option/option.rb +55 -66
- data/lib/pwrake/option/option_default_filesystem.rb +48 -0
- data/lib/pwrake/option/option_gfarm.rb +1 -0
- data/lib/pwrake/option/option_gfarm2fs.rb +101 -0
- data/lib/pwrake/queue/locality_aware_queue.rb +7 -11
- data/lib/pwrake/report/task_stat.rb +4 -5
- data/lib/pwrake/task/task_wrapper.rb +57 -34
- data/lib/pwrake/version.rb +1 -1
- data/lib/pwrake/worker/executor.rb +32 -14
- data/lib/pwrake/worker/invoker.rb +61 -34
- data/lib/pwrake/worker/worker_main.rb +5 -5
- data/lib/pwrake/worker/writer.rb +27 -20
- metadata +11 -5
- data/lib/pwrake/option/option_filesystem.rb +0 -123
- data/lib/pwrake/worker/load.rb +0 -14
- data/lib/pwrake/worker/reader.rb +0 -73
@@ -1,7 +1,16 @@
|
|
1
|
+
require "socket"
|
2
|
+
|
1
3
|
module Pwrake
|
2
4
|
|
3
5
|
class HostInfo
|
4
6
|
|
7
|
+
@@local_ip = nil
|
8
|
+
|
9
|
+
def self.local_ip
|
10
|
+
@@local_ip ||=
|
11
|
+
Socket.getifaddrs.select{|a| a.addr.ip?}.map{|a| a.addr.ip_address}
|
12
|
+
end
|
13
|
+
|
5
14
|
def initialize(name,id,ncore,weight,group=nil)
|
6
15
|
@name = name
|
7
16
|
@ncore = ncore
|
@@ -11,17 +20,28 @@ module Pwrake
|
|
11
20
|
@continuous_fail = 0
|
12
21
|
@total_fail = 0
|
13
22
|
@count_task = 0
|
23
|
+
@ipaddr = []
|
14
24
|
end
|
15
25
|
|
16
26
|
attr_reader :name, :ncore, :weight, :group, :id, :steal_flag
|
27
|
+
attr_reader :ipaddr
|
17
28
|
attr_accessor :idle_cores
|
18
29
|
|
30
|
+
def local?
|
31
|
+
ipa = IPSocket.getaddress(@name)
|
32
|
+
HostInfo.local_ip.include?(ipa)
|
33
|
+
end
|
34
|
+
|
19
35
|
def set_ncore(n)
|
20
36
|
@retire = 0
|
21
37
|
@busy_cores = 0
|
22
38
|
@ncore = @idle_cores = n
|
23
39
|
end
|
24
40
|
|
41
|
+
def set_ip(ipa)
|
42
|
+
@ipaddr.push(ipa)
|
43
|
+
end
|
44
|
+
|
25
45
|
def idle(n)
|
26
46
|
@busy_cores -= n
|
27
47
|
@idle_cores += n
|
@@ -70,11 +90,17 @@ module Pwrake
|
|
70
90
|
|
71
91
|
class HostMap < Hash
|
72
92
|
|
93
|
+
def self.ipmatch_for_name(name)
|
94
|
+
@@hostmap.ipmatch_for_name(name)
|
95
|
+
end
|
96
|
+
|
73
97
|
def initialize(arg=nil)
|
74
98
|
@host_map = {}
|
75
99
|
@by_id = []
|
76
100
|
@by_name = {}
|
77
|
-
|
101
|
+
@is_local = false
|
102
|
+
@ipmatch_for_name = {}
|
103
|
+
@@hostmap = self
|
78
104
|
case arg
|
79
105
|
when /\.yaml$/
|
80
106
|
read_yaml(arg)
|
@@ -86,12 +112,21 @@ module Pwrake
|
|
86
112
|
parse_hosts(["localhost 1"])
|
87
113
|
else
|
88
114
|
raise ArgumentError, "arg=#{arg.inspect}"
|
89
|
-
|
90
|
-
|
115
|
+
end
|
116
|
+
|
117
|
+
# local check
|
118
|
+
if @by_id.size == 1
|
119
|
+
if @by_id[0].local?
|
120
|
+
@is_local = true
|
121
|
+
end
|
91
122
|
end
|
92
123
|
end
|
93
124
|
attr_reader :by_id, :by_name
|
94
125
|
|
126
|
+
def local?
|
127
|
+
@is_local
|
128
|
+
end
|
129
|
+
|
95
130
|
def host_count
|
96
131
|
@by_id.size
|
97
132
|
end
|
@@ -120,6 +155,18 @@ module Pwrake
|
|
120
155
|
a
|
121
156
|
end
|
122
157
|
|
158
|
+
def ipmatch_for_name(node)
|
159
|
+
unless a = @ipmatch_for_name[node]
|
160
|
+
@ipmatch_for_name[node] = a = []
|
161
|
+
ip = IPSocket.getaddress(node)
|
162
|
+
@by_id.each_with_index do |h,id|
|
163
|
+
a << id if h.ipaddr.include?(ip)
|
164
|
+
end
|
165
|
+
Log.debug "node:#{node} hosts:#{a.map{|id|@by_id[id].name}.inspect}"
|
166
|
+
end
|
167
|
+
a
|
168
|
+
end
|
169
|
+
|
123
170
|
private
|
124
171
|
|
125
172
|
def read_host(file)
|
@@ -137,7 +184,6 @@ module Pwrake
|
|
137
184
|
end
|
138
185
|
|
139
186
|
def parse_hosts(hosts)
|
140
|
-
#p hosts
|
141
187
|
if hosts.kind_of? Array
|
142
188
|
hosts = {"localhost"=>hosts}
|
143
189
|
end
|
@@ -166,11 +212,6 @@ module Pwrake
|
|
166
212
|
hosts = [host]
|
167
213
|
end
|
168
214
|
hosts.each do |host|
|
169
|
-
begin
|
170
|
-
host = Socket.gethostbyname(host)[0]
|
171
|
-
rescue
|
172
|
-
Log.warn "FQDN not resoved : #{host}"
|
173
|
-
end
|
174
215
|
ncore &&= ncore.to_i
|
175
216
|
weitht &&= weight.to_i
|
176
217
|
#weight = (weight || 1).to_f
|
data/lib/pwrake/option/option.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require "pathname"
|
2
2
|
require "yaml"
|
3
|
+
require "parallel"
|
3
4
|
require "pwrake/option/host_map"
|
4
5
|
|
5
6
|
module Pwrake
|
@@ -10,6 +11,7 @@ module Pwrake
|
|
10
11
|
|
11
12
|
def initialize
|
12
13
|
load_pwrake_conf
|
14
|
+
init_filesystem
|
13
15
|
init_options
|
14
16
|
init_pass_env
|
15
17
|
if self['SHOW_CONF']
|
@@ -21,24 +23,11 @@ module Pwrake
|
|
21
23
|
Report.new(self,[]).report_html
|
22
24
|
exit
|
23
25
|
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def init
|
27
|
-
Log.info "Options:"
|
28
|
-
self.each do |k,v|
|
29
|
-
Log.info " #{k} = #{v.inspect}"
|
30
|
-
end
|
31
|
-
#@counter = Counter.new
|
32
26
|
setup_hosts
|
33
|
-
|
34
|
-
#
|
35
|
-
if self['LOG_DIR'] && self['GC_LOG_FILE']
|
36
|
-
GC::Profiler.enable
|
37
|
-
end
|
27
|
+
set_filesystem_option
|
38
28
|
end
|
39
29
|
|
40
30
|
attr_reader :counter
|
41
|
-
attr_reader :logger
|
42
31
|
attr_accessor :total_cores
|
43
32
|
|
44
33
|
DEFAULT_CONFFILES = ["pwrake_conf.yaml","PwrakeConf.yaml"]
|
@@ -59,12 +48,52 @@ module Pwrake
|
|
59
48
|
if pwrake_conf.nil?
|
60
49
|
@yaml = {}
|
61
50
|
else
|
62
|
-
#Log.debug "load pwrake_conf=#{pwrake_conf}"
|
63
51
|
require "yaml"
|
64
52
|
@yaml = open(pwrake_conf){|f| YAML.load(f) }
|
65
53
|
end
|
66
54
|
end
|
67
55
|
|
56
|
+
# ----------------------------------------------------------
|
57
|
+
|
58
|
+
def init_filesystem
|
59
|
+
@filesystem = Rake.application.options.filesystem
|
60
|
+
@filesystem ||= mount_type.sub(/fuse\./,"")
|
61
|
+
begin
|
62
|
+
require "pwrake/option/option_#{@filesystem}"
|
63
|
+
rescue LoadError
|
64
|
+
require "pwrake/option/option_default_filesystem"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
attr_reader :worker_progs
|
68
|
+
attr_reader :worker_option
|
69
|
+
attr_reader :queue_class
|
70
|
+
|
71
|
+
def mount_type(d=nil)
|
72
|
+
mtab = '/etc/mtab'
|
73
|
+
if File.exist?(mtab)
|
74
|
+
d ||= mountpoint_of_cwd
|
75
|
+
open(mtab,'r') do |f|
|
76
|
+
f.each_line do |l|
|
77
|
+
a = l.split
|
78
|
+
if a[1] == d
|
79
|
+
return a[2]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
|
87
|
+
def mountpoint_of_cwd
|
88
|
+
d = Pathname.pwd
|
89
|
+
while !d.mountpoint?
|
90
|
+
d = d.parent
|
91
|
+
end
|
92
|
+
d.to_s
|
93
|
+
end
|
94
|
+
|
95
|
+
# ----------------------------------------------------------
|
96
|
+
|
68
97
|
def init_options
|
69
98
|
option_data.each do |a|
|
70
99
|
prc = nil
|
@@ -109,14 +138,8 @@ module Pwrake
|
|
109
138
|
'TRACE_OUTPUT',
|
110
139
|
'TRACE_RULES',
|
111
140
|
|
112
|
-
'FILESYSTEM',
|
113
141
|
'SSH_OPTION',
|
114
142
|
'PASS_ENV',
|
115
|
-
['SHELL_COMMAND', proc{|v| v||ENV['SHELL']}],
|
116
|
-
['SHELL_RC','SHELLRC'],
|
117
|
-
'GFARM2FS_OPTION',
|
118
|
-
'GFARM2FS_DEBUG',
|
119
|
-
['GFARM2FS_DEBUG_WAIT', proc{|v| v ? v.to_i : 1}],
|
120
143
|
'GNU_TIME',
|
121
144
|
'DEBUG',
|
122
145
|
'PLOT_PARALLELISM',
|
@@ -127,7 +150,6 @@ module Pwrake
|
|
127
150
|
'FAILURE_TERMINATION', # wait, kill, continue
|
128
151
|
'QUEUE_PRIORITY', # RANK(default), FIFO, LIFO, DFS
|
129
152
|
'NOACTION_QUEUE_PRIORITY', # FIFO(default), LIFO, RAND
|
130
|
-
#'NUM_NOACTION_THREADS', # default=4 when gfarm, else 1
|
131
153
|
'GRAPH_PARTITION',
|
132
154
|
'PLOT_PARTITION',
|
133
155
|
|
@@ -185,18 +207,12 @@ module Pwrake
|
|
185
207
|
['SHELL_START_INTERVAL', proc{|v| (v || 0.012).to_f}],
|
186
208
|
['HEARTBEAT', proc{|v| (v || 240).to_i}],
|
187
209
|
['RETRY', proc{|v| (v || 1).to_i}],
|
188
|
-
['DISABLE_AFFINITY', proc{|v| v || ENV['AFFINITY']=='off'}],
|
189
|
-
['DISABLE_STEAL', proc{|v| v || ENV['STEAL']=='off'}],
|
190
|
-
['GFARM_BASEDIR', proc{|v| v || '/tmp'}],
|
191
|
-
['GFARM_PREFIX', proc{|v| v || "pwrake_#{ENV['USER']}"}],
|
192
|
-
['GFARM_SUBDIR', proc{|v| v || '/'}],
|
193
|
-
['MAX_GFWHERE_WORKER', proc{|v| (v || 8).to_i}],
|
194
210
|
['MASTER_HOSTNAME', proc{|v| (v || begin;`hostname -f`;rescue;end || '').chomp}],
|
195
211
|
['WORK_DIR', proc{|v|
|
196
212
|
v ||= '%CWD_RELATIVE_TO_HOME'
|
197
|
-
v.sub('%CWD_RELATIVE_TO_HOME',
|
213
|
+
v.sub('%CWD_RELATIVE_TO_HOME',cwd_relative_if_under_home)
|
198
214
|
}],
|
199
|
-
]
|
215
|
+
].concat(option_data_filesystem)
|
200
216
|
end
|
201
217
|
|
202
218
|
def format_time_pid(v)
|
@@ -281,7 +297,7 @@ module Pwrake
|
|
281
297
|
return pwd.relative_path_from(home).to_s
|
282
298
|
end
|
283
299
|
|
284
|
-
#
|
300
|
+
# ----------------------------------------------------------
|
285
301
|
|
286
302
|
def init_pass_env
|
287
303
|
if envs = self['PASS_ENV']
|
@@ -314,6 +330,7 @@ module Pwrake
|
|
314
330
|
end
|
315
331
|
end
|
316
332
|
|
333
|
+
# ----------------------------------------------------------
|
317
334
|
|
318
335
|
def setup_hosts
|
319
336
|
if @hostfile && @num_threads
|
@@ -323,44 +340,16 @@ module Pwrake
|
|
323
340
|
end
|
324
341
|
attr_reader :host_map
|
325
342
|
|
343
|
+
# ----------------------------------------------------------
|
326
344
|
|
327
|
-
def
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
for i in #{d}*; do
|
332
|
-
if [ -d \"$i\" ]; then
|
333
|
-
case \"$i\" in
|
334
|
-
*_000) ;;
|
335
|
-
*) fusermount -u $i; rmdir $i ;;
|
336
|
-
esac
|
337
|
-
fi
|
338
|
-
done
|
339
|
-
sleep 1
|
340
|
-
for i in #{d}*_000; do
|
341
|
-
if [ -d \"$i\" ]; then
|
342
|
-
fusermount -u $i; rmdir $i
|
343
|
-
fi
|
344
|
-
done
|
345
|
-
"
|
346
|
-
threads = []
|
347
|
-
@host_map.each do |k,hosts|
|
348
|
-
hosts.each do |info|
|
349
|
-
threads << Thread.new do
|
350
|
-
system "ssh #{info.name} '#{rcmd}'"
|
351
|
-
end
|
352
|
-
end
|
345
|
+
def put_log
|
346
|
+
Log.info "Options:"
|
347
|
+
self.each do |k,v|
|
348
|
+
Log.info " #{k} = #{v.inspect}"
|
353
349
|
end
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
# ----- finish -----
|
358
|
-
|
359
|
-
def finish_option
|
360
|
-
Log.close
|
350
|
+
Log.debug "@queue_class=#{@queue_class}"
|
351
|
+
Log.debug "@filesystem=#{@filesystem}"
|
361
352
|
end
|
362
353
|
|
363
354
|
end
|
364
355
|
end
|
365
|
-
|
366
|
-
require "pwrake/option/option_filesystem"
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require "pwrake/master/postprocess"
|
2
|
+
|
3
|
+
module Pwrake
|
4
|
+
|
5
|
+
module DefaultFileSystemOption
|
6
|
+
|
7
|
+
def option_data_filesystem
|
8
|
+
[]
|
9
|
+
end
|
10
|
+
|
11
|
+
def set_filesystem_option
|
12
|
+
@worker_progs = %w[
|
13
|
+
parallel/processor_count.rb
|
14
|
+
pwrake/nbio
|
15
|
+
pwrake/branch/fiber_queue
|
16
|
+
pwrake/worker/writer
|
17
|
+
pwrake/worker/log_executor
|
18
|
+
pwrake/worker/executor
|
19
|
+
pwrake/worker/invoker
|
20
|
+
pwrake/worker/shared_directory
|
21
|
+
pwrake/worker/worker_main
|
22
|
+
]
|
23
|
+
@worker_option = {
|
24
|
+
:base_dir => "",
|
25
|
+
:work_dir => self['WORK_DIR'],
|
26
|
+
:log_dir => self['LOG_DIR'],
|
27
|
+
:pass_env => self['PASS_ENV'],
|
28
|
+
:ssh_option => self['SSH_OPTION'],
|
29
|
+
:heartbeat => self['HEARTBEAT'],
|
30
|
+
:shared_directory => "SharedDirectory"
|
31
|
+
}
|
32
|
+
@filesystem = "default"
|
33
|
+
@queue_class = "TaskQueue"
|
34
|
+
end
|
35
|
+
|
36
|
+
def max_postprocess_pool
|
37
|
+
1
|
38
|
+
end
|
39
|
+
|
40
|
+
def postprocess(runner)
|
41
|
+
Postprocess.new(runner)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class Option
|
46
|
+
include DefaultFileSystemOption
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
lib/pwrake/option/option_gfarm2fs.rb
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require "pwrake/queue/locality_aware_queue"
|
2
|
+
require "pwrake/gfarm/gfarm_path"
|
3
|
+
require "pwrake/gfarm/gfarm_postprocess"
|
4
|
+
|
5
|
+
module Pwrake
|
6
|
+
|
7
|
+
module GfarmFileSystemOption
|
8
|
+
|
9
|
+
def option_data_filesystem
|
10
|
+
[
|
11
|
+
'GFARM2FS_OPTION',
|
12
|
+
'GFARM2FS_DEBUG',
|
13
|
+
['GFARM2FS_DEBUG_WAIT', proc{|v| v ? v.to_i : 1}],
|
14
|
+
['DISABLE_AFFINITY', proc{|v| v || ENV['AFFINITY']=='off'}],
|
15
|
+
['DISABLE_STEAL', proc{|v| v || ENV['STEAL']=='off'}],
|
16
|
+
['GFARM_BASEDIR', proc{|v| v || '/tmp'}],
|
17
|
+
['GFARM_PREFIX', proc{|v| v || "pwrake_#{ENV['USER']}"}],
|
18
|
+
['GFARM_SUBDIR', proc{|v| v || '/'}],
|
19
|
+
['MAX_GFWHERE_WORKER', proc{|v| (v || 8).to_i}],
|
20
|
+
]
|
21
|
+
end
|
22
|
+
|
23
|
+
def set_filesystem_option
|
24
|
+
@filesystem = 'gfarm'
|
25
|
+
GfarmPath.subdir = self['GFARM_SUBDIR']
|
26
|
+
@worker_option = {
|
27
|
+
:log_dir => self['LOG_DIR'],
|
28
|
+
:pass_env => self['PASS_ENV'],
|
29
|
+
:ssh_option => self['SSH_OPTION'],
|
30
|
+
:heartbeat => self['HEARTBEAT'],
|
31
|
+
#
|
32
|
+
:shared_directory => "GfarmDirectory",
|
33
|
+
:base_dir => self['GFARM_BASEDIR']+"/"+self['GFARM_PREFIX'],
|
34
|
+
:work_dir => GfarmPath.pwd.to_s,
|
35
|
+
:gfarm2fs_option => self['GFARM2FS_OPTION'],
|
36
|
+
:gfarm2fs_debug => self['GFARM2FS_DEBUG'],
|
37
|
+
:gfarm2fs_debug_wait => self['GFARM2FS_DEBUG_WAIT'],
|
38
|
+
:single_mp => self['GFARM_SINGLE_MP']
|
39
|
+
}
|
40
|
+
@worker_progs = %w[
|
41
|
+
parallel/processor_count.rb
|
42
|
+
pwrake/nbio
|
43
|
+
pwrake/branch/fiber_queue
|
44
|
+
pwrake/worker/writer
|
45
|
+
pwrake/worker/log_executor
|
46
|
+
pwrake/worker/executor
|
47
|
+
pwrake/worker/invoker
|
48
|
+
pwrake/worker/shared_directory
|
49
|
+
pwrake/worker/gfarm_directory
|
50
|
+
pwrake/worker/worker_main
|
51
|
+
]
|
52
|
+
if self['DISABLE_AFFINITY']
|
53
|
+
@queue_class = "TaskQueue"
|
54
|
+
else
|
55
|
+
@queue_class = "LocalityAwareQueue"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def max_postprocess_pool
|
60
|
+
self['MAX_GFWHERE_WORKER']
|
61
|
+
end
|
62
|
+
|
63
|
+
def postprocess(runner)
|
64
|
+
GfarmPostprocess.new(runner)
|
65
|
+
end
|
66
|
+
|
67
|
+
def clear_gfarm2fs
|
68
|
+
setup_hosts
|
69
|
+
d = File.join(self['GFARM_BASEDIR'],self['GFARM_PREFIX'])
|
70
|
+
rcmd = "
|
71
|
+
for i in #{d}*; do
|
72
|
+
if [ -d \"$i\" ]; then
|
73
|
+
case \"$i\" in
|
74
|
+
*_000) ;;
|
75
|
+
*) fusermount -u $i; rmdir $i ;;
|
76
|
+
esac
|
77
|
+
fi
|
78
|
+
done
|
79
|
+
sleep 1
|
80
|
+
for i in #{d}*_000; do
|
81
|
+
if [ -d \"$i\" ]; then
|
82
|
+
fusermount -u $i; rmdir $i
|
83
|
+
fi
|
84
|
+
done
|
85
|
+
"
|
86
|
+
threads = []
|
87
|
+
@host_map.each do |k,hosts|
|
88
|
+
hosts.each do |info|
|
89
|
+
threads << Thread.new do
|
90
|
+
system "ssh #{info.name} '#{rcmd}'"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
threads.each{|t| t.join}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class Option
|
99
|
+
include GfarmFileSystemOption
|
100
|
+
end
|
101
|
+
end
|