pwrake 2.1.3 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +25 -12
- data/bin/pwrake-mpi +41 -0
- data/bin/pwrake-mpi-run +16 -0
- data/lib/pwrake/branch/branch.rb +17 -29
- data/lib/pwrake/branch/branch_application.rb +31 -41
- data/lib/pwrake/branch/communicator.rb +32 -11
- data/lib/pwrake/branch/communicator_set.rb +6 -0
- data/lib/pwrake/logger.rb +29 -1
- data/lib/pwrake/master/master.rb +51 -64
- data/lib/pwrake/master/master_application.rb +4 -9
- data/lib/pwrake/mpi/branch.rb +76 -0
- data/lib/pwrake/mpi/worker.rb +42 -0
- data/lib/pwrake/nbio.rb +60 -62
- data/lib/pwrake/option/host_map.rb +50 -9
- data/lib/pwrake/option/option.rb +55 -66
- data/lib/pwrake/option/option_default_filesystem.rb +48 -0
- data/lib/pwrake/option/option_gfarm.rb +1 -0
- data/lib/pwrake/option/option_gfarm2fs.rb +101 -0
- data/lib/pwrake/queue/locality_aware_queue.rb +7 -11
- data/lib/pwrake/report/task_stat.rb +4 -5
- data/lib/pwrake/task/task_wrapper.rb +57 -34
- data/lib/pwrake/version.rb +1 -1
- data/lib/pwrake/worker/executor.rb +32 -14
- data/lib/pwrake/worker/invoker.rb +61 -34
- data/lib/pwrake/worker/worker_main.rb +5 -5
- data/lib/pwrake/worker/writer.rb +27 -20
- metadata +11 -5
- data/lib/pwrake/option/option_filesystem.rb +0 -123
- data/lib/pwrake/worker/load.rb +0 -14
- data/lib/pwrake/worker/reader.rb +0 -73
@@ -1,7 +1,16 @@
|
|
1
|
+
require "socket"
|
2
|
+
|
1
3
|
module Pwrake
|
2
4
|
|
3
5
|
class HostInfo
|
4
6
|
|
7
|
+
@@local_ip = nil
|
8
|
+
|
9
|
+
def self.local_ip
|
10
|
+
@@local_ip ||=
|
11
|
+
Socket.getifaddrs.select{|a| a.addr.ip?}.map{|a| a.addr.ip_address}
|
12
|
+
end
|
13
|
+
|
5
14
|
def initialize(name,id,ncore,weight,group=nil)
|
6
15
|
@name = name
|
7
16
|
@ncore = ncore
|
@@ -11,17 +20,28 @@ module Pwrake
|
|
11
20
|
@continuous_fail = 0
|
12
21
|
@total_fail = 0
|
13
22
|
@count_task = 0
|
23
|
+
@ipaddr = []
|
14
24
|
end
|
15
25
|
|
16
26
|
attr_reader :name, :ncore, :weight, :group, :id, :steal_flag
|
27
|
+
attr_reader :ipaddr
|
17
28
|
attr_accessor :idle_cores
|
18
29
|
|
30
|
+
def local?
|
31
|
+
ipa = IPSocket.getaddress(@name)
|
32
|
+
HostInfo.local_ip.include?(ipa)
|
33
|
+
end
|
34
|
+
|
19
35
|
def set_ncore(n)
|
20
36
|
@retire = 0
|
21
37
|
@busy_cores = 0
|
22
38
|
@ncore = @idle_cores = n
|
23
39
|
end
|
24
40
|
|
41
|
+
def set_ip(ipa)
|
42
|
+
@ipaddr.push(ipa)
|
43
|
+
end
|
44
|
+
|
25
45
|
def idle(n)
|
26
46
|
@busy_cores -= n
|
27
47
|
@idle_cores += n
|
@@ -70,11 +90,17 @@ module Pwrake
|
|
70
90
|
|
71
91
|
class HostMap < Hash
|
72
92
|
|
93
|
+
def self.ipmatch_for_name(name)
|
94
|
+
@@hostmap.ipmatch_for_name(name)
|
95
|
+
end
|
96
|
+
|
73
97
|
def initialize(arg=nil)
|
74
98
|
@host_map = {}
|
75
99
|
@by_id = []
|
76
100
|
@by_name = {}
|
77
|
-
|
101
|
+
@is_local = false
|
102
|
+
@ipmatch_for_name = {}
|
103
|
+
@@hostmap = self
|
78
104
|
case arg
|
79
105
|
when /\.yaml$/
|
80
106
|
read_yaml(arg)
|
@@ -86,12 +112,21 @@ module Pwrake
|
|
86
112
|
parse_hosts(["localhost 1"])
|
87
113
|
else
|
88
114
|
raise ArgumentError, "arg=#{arg.inspect}"
|
89
|
-
|
90
|
-
|
115
|
+
end
|
116
|
+
|
117
|
+
# local check
|
118
|
+
if @by_id.size == 1
|
119
|
+
if @by_id[0].local?
|
120
|
+
@is_local = true
|
121
|
+
end
|
91
122
|
end
|
92
123
|
end
|
93
124
|
attr_reader :by_id, :by_name
|
94
125
|
|
126
|
+
def local?
|
127
|
+
@is_local
|
128
|
+
end
|
129
|
+
|
95
130
|
def host_count
|
96
131
|
@by_id.size
|
97
132
|
end
|
@@ -120,6 +155,18 @@ module Pwrake
|
|
120
155
|
a
|
121
156
|
end
|
122
157
|
|
158
|
+
def ipmatch_for_name(node)
|
159
|
+
unless a = @ipmatch_for_name[node]
|
160
|
+
@ipmatch_for_name[node] = a = []
|
161
|
+
ip = IPSocket.getaddress(node)
|
162
|
+
@by_id.each_with_index do |h,id|
|
163
|
+
a << id if h.ipaddr.include?(ip)
|
164
|
+
end
|
165
|
+
Log.debug "node:#{node} hosts:#{a.map{|id|@by_id[id].name}.inspect}"
|
166
|
+
end
|
167
|
+
a
|
168
|
+
end
|
169
|
+
|
123
170
|
private
|
124
171
|
|
125
172
|
def read_host(file)
|
@@ -137,7 +184,6 @@ module Pwrake
|
|
137
184
|
end
|
138
185
|
|
139
186
|
def parse_hosts(hosts)
|
140
|
-
#p hosts
|
141
187
|
if hosts.kind_of? Array
|
142
188
|
hosts = {"localhost"=>hosts}
|
143
189
|
end
|
@@ -166,11 +212,6 @@ module Pwrake
|
|
166
212
|
hosts = [host]
|
167
213
|
end
|
168
214
|
hosts.each do |host|
|
169
|
-
begin
|
170
|
-
host = Socket.gethostbyname(host)[0]
|
171
|
-
rescue
|
172
|
-
Log.warn "FQDN not resoved : #{host}"
|
173
|
-
end
|
174
215
|
ncore &&= ncore.to_i
|
175
216
|
weitht &&= weight.to_i
|
176
217
|
#weight = (weight || 1).to_f
|
data/lib/pwrake/option/option.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require "pathname"
|
2
2
|
require "yaml"
|
3
|
+
require "parallel"
|
3
4
|
require "pwrake/option/host_map"
|
4
5
|
|
5
6
|
module Pwrake
|
@@ -10,6 +11,7 @@ module Pwrake
|
|
10
11
|
|
11
12
|
def initialize
|
12
13
|
load_pwrake_conf
|
14
|
+
init_filesystem
|
13
15
|
init_options
|
14
16
|
init_pass_env
|
15
17
|
if self['SHOW_CONF']
|
@@ -21,24 +23,11 @@ module Pwrake
|
|
21
23
|
Report.new(self,[]).report_html
|
22
24
|
exit
|
23
25
|
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def init
|
27
|
-
Log.info "Options:"
|
28
|
-
self.each do |k,v|
|
29
|
-
Log.info " #{k} = #{v.inspect}"
|
30
|
-
end
|
31
|
-
#@counter = Counter.new
|
32
26
|
setup_hosts
|
33
|
-
|
34
|
-
#
|
35
|
-
if self['LOG_DIR'] && self['GC_LOG_FILE']
|
36
|
-
GC::Profiler.enable
|
37
|
-
end
|
27
|
+
set_filesystem_option
|
38
28
|
end
|
39
29
|
|
40
30
|
attr_reader :counter
|
41
|
-
attr_reader :logger
|
42
31
|
attr_accessor :total_cores
|
43
32
|
|
44
33
|
DEFAULT_CONFFILES = ["pwrake_conf.yaml","PwrakeConf.yaml"]
|
@@ -59,12 +48,52 @@ module Pwrake
|
|
59
48
|
if pwrake_conf.nil?
|
60
49
|
@yaml = {}
|
61
50
|
else
|
62
|
-
#Log.debug "load pwrake_conf=#{pwrake_conf}"
|
63
51
|
require "yaml"
|
64
52
|
@yaml = open(pwrake_conf){|f| YAML.load(f) }
|
65
53
|
end
|
66
54
|
end
|
67
55
|
|
56
|
+
# ----------------------------------------------------------
|
57
|
+
|
58
|
+
def init_filesystem
|
59
|
+
@filesystem = Rake.application.options.filesystem
|
60
|
+
@filesystem ||= mount_type.sub(/fuse\./,"")
|
61
|
+
begin
|
62
|
+
require "pwrake/option/option_#{@filesystem}"
|
63
|
+
rescue LoadError
|
64
|
+
require "pwrake/option/option_default_filesystem"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
attr_reader :worker_progs
|
68
|
+
attr_reader :worker_option
|
69
|
+
attr_reader :queue_class
|
70
|
+
|
71
|
+
def mount_type(d=nil)
|
72
|
+
mtab = '/etc/mtab'
|
73
|
+
if File.exist?(mtab)
|
74
|
+
d ||= mountpoint_of_cwd
|
75
|
+
open(mtab,'r') do |f|
|
76
|
+
f.each_line do |l|
|
77
|
+
a = l.split
|
78
|
+
if a[1] == d
|
79
|
+
return a[2]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
|
87
|
+
def mountpoint_of_cwd
|
88
|
+
d = Pathname.pwd
|
89
|
+
while !d.mountpoint?
|
90
|
+
d = d.parent
|
91
|
+
end
|
92
|
+
d.to_s
|
93
|
+
end
|
94
|
+
|
95
|
+
# ----------------------------------------------------------
|
96
|
+
|
68
97
|
def init_options
|
69
98
|
option_data.each do |a|
|
70
99
|
prc = nil
|
@@ -109,14 +138,8 @@ module Pwrake
|
|
109
138
|
'TRACE_OUTPUT',
|
110
139
|
'TRACE_RULES',
|
111
140
|
|
112
|
-
'FILESYSTEM',
|
113
141
|
'SSH_OPTION',
|
114
142
|
'PASS_ENV',
|
115
|
-
['SHELL_COMMAND', proc{|v| v||ENV['SHELL']}],
|
116
|
-
['SHELL_RC','SHELLRC'],
|
117
|
-
'GFARM2FS_OPTION',
|
118
|
-
'GFARM2FS_DEBUG',
|
119
|
-
['GFARM2FS_DEBUG_WAIT', proc{|v| v ? v.to_i : 1}],
|
120
143
|
'GNU_TIME',
|
121
144
|
'DEBUG',
|
122
145
|
'PLOT_PARALLELISM',
|
@@ -127,7 +150,6 @@ module Pwrake
|
|
127
150
|
'FAILURE_TERMINATION', # wait, kill, continue
|
128
151
|
'QUEUE_PRIORITY', # RANK(default), FIFO, LIFO, DFS
|
129
152
|
'NOACTION_QUEUE_PRIORITY', # FIFO(default), LIFO, RAND
|
130
|
-
#'NUM_NOACTION_THREADS', # default=4 when gfarm, else 1
|
131
153
|
'GRAPH_PARTITION',
|
132
154
|
'PLOT_PARTITION',
|
133
155
|
|
@@ -185,18 +207,12 @@ module Pwrake
|
|
185
207
|
['SHELL_START_INTERVAL', proc{|v| (v || 0.012).to_f}],
|
186
208
|
['HEARTBEAT', proc{|v| (v || 240).to_i}],
|
187
209
|
['RETRY', proc{|v| (v || 1).to_i}],
|
188
|
-
['DISABLE_AFFINITY', proc{|v| v || ENV['AFFINITY']=='off'}],
|
189
|
-
['DISABLE_STEAL', proc{|v| v || ENV['STEAL']=='off'}],
|
190
|
-
['GFARM_BASEDIR', proc{|v| v || '/tmp'}],
|
191
|
-
['GFARM_PREFIX', proc{|v| v || "pwrake_#{ENV['USER']}"}],
|
192
|
-
['GFARM_SUBDIR', proc{|v| v || '/'}],
|
193
|
-
['MAX_GFWHERE_WORKER', proc{|v| (v || 8).to_i}],
|
194
210
|
['MASTER_HOSTNAME', proc{|v| (v || begin;`hostname -f`;rescue;end || '').chomp}],
|
195
211
|
['WORK_DIR', proc{|v|
|
196
212
|
v ||= '%CWD_RELATIVE_TO_HOME'
|
197
|
-
v.sub('%CWD_RELATIVE_TO_HOME',
|
213
|
+
v.sub('%CWD_RELATIVE_TO_HOME',cwd_relative_if_under_home)
|
198
214
|
}],
|
199
|
-
]
|
215
|
+
].concat(option_data_filesystem)
|
200
216
|
end
|
201
217
|
|
202
218
|
def format_time_pid(v)
|
@@ -281,7 +297,7 @@ module Pwrake
|
|
281
297
|
return pwd.relative_path_from(home).to_s
|
282
298
|
end
|
283
299
|
|
284
|
-
#
|
300
|
+
# ----------------------------------------------------------
|
285
301
|
|
286
302
|
def init_pass_env
|
287
303
|
if envs = self['PASS_ENV']
|
@@ -314,6 +330,7 @@ module Pwrake
|
|
314
330
|
end
|
315
331
|
end
|
316
332
|
|
333
|
+
# ----------------------------------------------------------
|
317
334
|
|
318
335
|
def setup_hosts
|
319
336
|
if @hostfile && @num_threads
|
@@ -323,44 +340,16 @@ module Pwrake
|
|
323
340
|
end
|
324
341
|
attr_reader :host_map
|
325
342
|
|
343
|
+
# ----------------------------------------------------------
|
326
344
|
|
327
|
-
def
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
for i in #{d}*; do
|
332
|
-
if [ -d \"$i\" ]; then
|
333
|
-
case \"$i\" in
|
334
|
-
*_000) ;;
|
335
|
-
*) fusermount -u $i; rmdir $i ;;
|
336
|
-
esac
|
337
|
-
fi
|
338
|
-
done
|
339
|
-
sleep 1
|
340
|
-
for i in #{d}*_000; do
|
341
|
-
if [ -d \"$i\" ]; then
|
342
|
-
fusermount -u $i; rmdir $i
|
343
|
-
fi
|
344
|
-
done
|
345
|
-
"
|
346
|
-
threads = []
|
347
|
-
@host_map.each do |k,hosts|
|
348
|
-
hosts.each do |info|
|
349
|
-
threads << Thread.new do
|
350
|
-
system "ssh #{info.name} '#{rcmd}'"
|
351
|
-
end
|
352
|
-
end
|
345
|
+
def put_log
|
346
|
+
Log.info "Options:"
|
347
|
+
self.each do |k,v|
|
348
|
+
Log.info " #{k} = #{v.inspect}"
|
353
349
|
end
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
# ----- finish -----
|
358
|
-
|
359
|
-
def finish_option
|
360
|
-
Log.close
|
350
|
+
Log.debug "@queue_class=#{@queue_class}"
|
351
|
+
Log.debug "@filesystem=#{@filesystem}"
|
361
352
|
end
|
362
353
|
|
363
354
|
end
|
364
355
|
end
|
365
|
-
|
366
|
-
require "pwrake/option/option_filesystem"
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require "pwrake/master/postprocess"
|
2
|
+
|
3
|
+
module Pwrake
|
4
|
+
|
5
|
+
module DefaultFileSystemOption
|
6
|
+
|
7
|
+
def option_data_filesystem
|
8
|
+
[]
|
9
|
+
end
|
10
|
+
|
11
|
+
def set_filesystem_option
|
12
|
+
@worker_progs = %w[
|
13
|
+
parallel/processor_count.rb
|
14
|
+
pwrake/nbio
|
15
|
+
pwrake/branch/fiber_queue
|
16
|
+
pwrake/worker/writer
|
17
|
+
pwrake/worker/log_executor
|
18
|
+
pwrake/worker/executor
|
19
|
+
pwrake/worker/invoker
|
20
|
+
pwrake/worker/shared_directory
|
21
|
+
pwrake/worker/worker_main
|
22
|
+
]
|
23
|
+
@worker_option = {
|
24
|
+
:base_dir => "",
|
25
|
+
:work_dir => self['WORK_DIR'],
|
26
|
+
:log_dir => self['LOG_DIR'],
|
27
|
+
:pass_env => self['PASS_ENV'],
|
28
|
+
:ssh_option => self['SSH_OPTION'],
|
29
|
+
:heartbeat => self['HEARTBEAT'],
|
30
|
+
:shared_directory => "SharedDirectory"
|
31
|
+
}
|
32
|
+
@filesystem = "default"
|
33
|
+
@queue_class = "TaskQueue"
|
34
|
+
end
|
35
|
+
|
36
|
+
def max_postprocess_pool
|
37
|
+
1
|
38
|
+
end
|
39
|
+
|
40
|
+
def postprocess(runner)
|
41
|
+
Postprocess.new(runner)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class Option
|
46
|
+
include DefaultFileSystemOption
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
lib/pwrake/option/option_gfarm2fs.rb
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require "pwrake/queue/locality_aware_queue"
|
2
|
+
require "pwrake/gfarm/gfarm_path"
|
3
|
+
require "pwrake/gfarm/gfarm_postprocess"
|
4
|
+
|
5
|
+
module Pwrake
|
6
|
+
|
7
|
+
module GfarmFileSystemOption
|
8
|
+
|
9
|
+
def option_data_filesystem
|
10
|
+
[
|
11
|
+
'GFARM2FS_OPTION',
|
12
|
+
'GFARM2FS_DEBUG',
|
13
|
+
['GFARM2FS_DEBUG_WAIT', proc{|v| v ? v.to_i : 1}],
|
14
|
+
['DISABLE_AFFINITY', proc{|v| v || ENV['AFFINITY']=='off'}],
|
15
|
+
['DISABLE_STEAL', proc{|v| v || ENV['STEAL']=='off'}],
|
16
|
+
['GFARM_BASEDIR', proc{|v| v || '/tmp'}],
|
17
|
+
['GFARM_PREFIX', proc{|v| v || "pwrake_#{ENV['USER']}"}],
|
18
|
+
['GFARM_SUBDIR', proc{|v| v || '/'}],
|
19
|
+
['MAX_GFWHERE_WORKER', proc{|v| (v || 8).to_i}],
|
20
|
+
]
|
21
|
+
end
|
22
|
+
|
23
|
+
def set_filesystem_option
|
24
|
+
@filesystem = 'gfarm'
|
25
|
+
GfarmPath.subdir = self['GFARM_SUBDIR']
|
26
|
+
@worker_option = {
|
27
|
+
:log_dir => self['LOG_DIR'],
|
28
|
+
:pass_env => self['PASS_ENV'],
|
29
|
+
:ssh_option => self['SSH_OPTION'],
|
30
|
+
:heartbeat => self['HEARTBEAT'],
|
31
|
+
#
|
32
|
+
:shared_directory => "GfarmDirectory",
|
33
|
+
:base_dir => self['GFARM_BASEDIR']+"/"+self['GFARM_PREFIX'],
|
34
|
+
:work_dir => GfarmPath.pwd.to_s,
|
35
|
+
:gfarm2fs_option => self['GFARM2FS_OPTION'],
|
36
|
+
:gfarm2fs_debug => self['GFARM2FS_DEBUG'],
|
37
|
+
:gfarm2fs_debug_wait => self['GFARM2FS_DEBUG_WAIT'],
|
38
|
+
:single_mp => self['GFARM_SINGLE_MP']
|
39
|
+
}
|
40
|
+
@worker_progs = %w[
|
41
|
+
parallel/processor_count.rb
|
42
|
+
pwrake/nbio
|
43
|
+
pwrake/branch/fiber_queue
|
44
|
+
pwrake/worker/writer
|
45
|
+
pwrake/worker/log_executor
|
46
|
+
pwrake/worker/executor
|
47
|
+
pwrake/worker/invoker
|
48
|
+
pwrake/worker/shared_directory
|
49
|
+
pwrake/worker/gfarm_directory
|
50
|
+
pwrake/worker/worker_main
|
51
|
+
]
|
52
|
+
if self['DISABLE_AFFINITY']
|
53
|
+
@queue_class = "TaskQueue"
|
54
|
+
else
|
55
|
+
@queue_class = "LocalityAwareQueue"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def max_postprocess_pool
|
60
|
+
self['MAX_GFWHERE_WORKER']
|
61
|
+
end
|
62
|
+
|
63
|
+
def postprocess(runner)
|
64
|
+
GfarmPostprocess.new(runner)
|
65
|
+
end
|
66
|
+
|
67
|
+
def clear_gfarm2fs
|
68
|
+
setup_hosts
|
69
|
+
d = File.join(self['GFARM_BASEDIR'],self['GFARM_PREFIX'])
|
70
|
+
rcmd = "
|
71
|
+
for i in #{d}*; do
|
72
|
+
if [ -d \"$i\" ]; then
|
73
|
+
case \"$i\" in
|
74
|
+
*_000) ;;
|
75
|
+
*) fusermount -u $i; rmdir $i ;;
|
76
|
+
esac
|
77
|
+
fi
|
78
|
+
done
|
79
|
+
sleep 1
|
80
|
+
for i in #{d}*_000; do
|
81
|
+
if [ -d \"$i\" ]; then
|
82
|
+
fusermount -u $i; rmdir $i
|
83
|
+
fi
|
84
|
+
done
|
85
|
+
"
|
86
|
+
threads = []
|
87
|
+
@host_map.each do |k,hosts|
|
88
|
+
hosts.each do |info|
|
89
|
+
threads << Thread.new do
|
90
|
+
system "ssh #{info.name} '#{rcmd}'"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
threads.each{|t| t.join}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class Option
|
99
|
+
include GfarmFileSystemOption
|
100
|
+
end
|
101
|
+
end
|