autoflow 0.3.5 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/autoflow.gemspec +2 -0
- data/bin/AutoFlow +163 -34
- data/lib/autoflow/batch.rb +342 -0
- data/lib/autoflow/program.rb +19 -16
- data/lib/autoflow/queue_manager.rb +75 -108
- data/lib/autoflow/queue_managers/bash_manager.rb +4 -4
- data/lib/autoflow/queue_managers/slurm_manager.rb +12 -14
- data/lib/autoflow/stack.rb +210 -146
- data/lib/autoflow/version.rb +1 -1
- metadata +19 -2
@@ -1,21 +1,10 @@
|
|
1
1
|
class QueueManager
|
2
2
|
|
3
|
-
########################################################################################
|
4
|
-
## SELECT AND PREPARE MANAGER
|
5
|
-
########################################################################################
|
6
3
|
def initialize(exec_folder, options, commands, persist_variables)
|
7
4
|
@exec_folder = exec_folder
|
8
5
|
@commands = commands
|
9
6
|
@persist_variables = persist_variables
|
10
7
|
@verbose = options[:verbose]
|
11
|
-
@cpus = options[:cpus]
|
12
|
-
@exp_cpu = options[:exp_cpu]
|
13
|
-
@count_cpu = 0
|
14
|
-
@time = options[:time]
|
15
|
-
@memory = options[:memory]
|
16
|
-
@node_type = options[:node_type]
|
17
|
-
@use_multinode = options[:use_multinode]
|
18
|
-
@use_ntasks = options[:use_ntasks]
|
19
8
|
@job_identifier = options[:identifier]
|
20
9
|
@files = {}
|
21
10
|
@remote = options[:remote]
|
@@ -23,11 +12,15 @@ class QueueManager
|
|
23
12
|
@external_dependencies = options[:external_dependencies]
|
24
13
|
end
|
25
14
|
|
15
|
+
########################################################################################
|
16
|
+
## SELECT AND PREPARE MANAGER
|
17
|
+
########################################################################################
|
18
|
+
|
26
19
|
def self.descendants
|
27
20
|
ObjectSpace.each_object(Class).select { |klass| klass < self }
|
28
21
|
end
|
29
22
|
|
30
|
-
def self.select_queue_manager(
|
23
|
+
def self.select_queue_manager(stack, options)
|
31
24
|
path_managers = File.join(File.dirname(__FILE__),'queue_managers')
|
32
25
|
Dir.glob(path_managers+'/*').each do |manager|
|
33
26
|
require manager
|
@@ -37,7 +30,7 @@ class QueueManager
|
|
37
30
|
else
|
38
31
|
queue_manager = select_manager(options)
|
39
32
|
end
|
40
|
-
return queue_manager.new(exec_folder, options,
|
33
|
+
return queue_manager.new(stack.exec_folder, options, stack.jobs, stack.persist_variables)
|
41
34
|
end
|
42
35
|
|
43
36
|
def self.select_manager(options)
|
@@ -59,71 +52,101 @@ class QueueManager
|
|
59
52
|
def exec
|
60
53
|
create_folder(@exec_folder)
|
61
54
|
make_environment_file if !@persist_variables.empty?
|
55
|
+
create_file('versions', @exec_folder)
|
56
|
+
write_file('versions',"autoflow\t#{Autoflow::VERSION}")
|
57
|
+
close_file('versions')
|
62
58
|
create_file('index_execution', @exec_folder)
|
63
|
-
|
59
|
+
launch_all_jobs
|
64
60
|
close_file('index_execution')
|
65
61
|
end
|
66
62
|
|
67
|
-
def
|
68
|
-
|
69
|
-
|
70
|
-
write_file('index_execution', "#{
|
71
|
-
if
|
63
|
+
def launch_all_jobs
|
64
|
+
buffered_jobs = []
|
65
|
+
sort_jobs_by_dependencies.each do |name, job|
|
66
|
+
write_file('index_execution', "#{name}\t#{job.attrib[:exec_folder]}")
|
67
|
+
if job.attrib[:done]
|
72
68
|
next
|
73
69
|
else
|
74
|
-
rm_done_dependencies(
|
70
|
+
rm_done_dependencies(job)
|
75
71
|
end
|
76
|
-
|
72
|
+
buffered_jobs = launch_job_in_folder(job, name, buffered_jobs)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def sort_jobs_by_dependencies
|
77
|
+
ar_jobs = @commands.to_a
|
78
|
+
sorted_jobs = []
|
79
|
+
jobs_without_dep = ar_jobs.select{|job| job.last.dependencies.empty?}
|
80
|
+
sorted_jobs.concat(jobs_without_dep)
|
81
|
+
while ar_jobs.length != sorted_jobs.length
|
82
|
+
ids = sorted_jobs.map{|job| job.first}
|
83
|
+
ar_jobs.each do |job|
|
84
|
+
if !sorted_jobs.include?(job)
|
85
|
+
deps = job.last.dependencies - ids
|
86
|
+
sorted_jobs << job if deps.empty?
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
return sorted_jobs
|
91
|
+
end
|
92
|
+
|
93
|
+
def rm_done_dependencies(job)
|
94
|
+
remove=[]
|
95
|
+
job.dependencies.each do |dependency|
|
96
|
+
remove << dependency if @commands[dependency].attrib[:done]
|
97
|
+
end
|
98
|
+
remove.each do |rm|
|
99
|
+
job.dependencies.delete(rm)
|
77
100
|
end
|
78
101
|
end
|
79
102
|
|
80
|
-
def
|
81
|
-
create_folder(
|
82
|
-
if
|
83
|
-
launch2queue_system(
|
84
|
-
|
85
|
-
else # Buffer
|
86
|
-
|
103
|
+
def launch_job_in_folder(job, id, buffered_jobs)
|
104
|
+
create_folder(job.attrib[:exec_folder])
|
105
|
+
if !job.attrib[:buffer] # Launch with queue_system the job and all buffered jobs
|
106
|
+
launch2queue_system(job, id, buffered_jobs)
|
107
|
+
buffered_jobs = []#Clean buffer
|
108
|
+
else # Buffer job
|
109
|
+
buffered_jobs << [id, job]
|
87
110
|
end
|
88
|
-
return
|
111
|
+
return buffered_jobs
|
89
112
|
end
|
90
113
|
|
91
114
|
|
92
|
-
def launch2queue_system(
|
115
|
+
def launch2queue_system(job, id, buffered_jobs)
|
93
116
|
# Write sh file
|
94
117
|
#--------------------------------
|
95
118
|
log_folder = File.join(@exec_folder, 'log')
|
96
|
-
sh_name =
|
97
|
-
create_file(sh_name,
|
119
|
+
sh_name = job.name+'.sh'
|
120
|
+
create_file(sh_name, job.attrib[:exec_folder])
|
98
121
|
write_file(sh_name, '#!/usr/bin/env bash')
|
99
122
|
write_file(sh_name, '##JOB_GROUP_ID='+@job_identifier)
|
100
|
-
write_header(id,
|
123
|
+
write_header(id, job, sh_name)
|
101
124
|
|
102
125
|
#Get dependencies
|
103
126
|
#------------------------------------
|
104
|
-
ar_dependencies = get_dependencies(
|
105
|
-
|
106
|
-
|
127
|
+
ar_dependencies = get_dependencies(job, id)
|
128
|
+
buffered_jobs.each do |id_buff_job, buff_job|
|
129
|
+
write_job(buff_job, sh_name)
|
107
130
|
ar_dependencies += get_dependencies(buff_node, id_buff_node)
|
108
|
-
|
131
|
+
buff_job.attrib[:exec_folder] = job.attrib[:exec_folder]
|
109
132
|
end
|
110
133
|
ar_dependencies.uniq!
|
111
134
|
|
112
135
|
#Write sh body
|
113
136
|
#--------------------------------
|
114
137
|
write_file(sh_name, 'hostname')
|
115
|
-
write_file(sh_name, "echo -e \"STARTED #{id
|
138
|
+
write_file(sh_name, "echo -e \"STARTED #{id} #{job.parameters.split.first}:\\t`date`\" >> #{log_folder}")
|
116
139
|
write_file(sh_name, "source #{File.join(@exec_folder, 'env_file')}") if !@persist_variables.empty?
|
117
|
-
|
118
|
-
write_file(sh_name, "echo -e \"FINISHED #{id
|
140
|
+
write_job(job, sh_name)
|
141
|
+
write_file(sh_name, "echo -e \"FINISHED #{id} #{job.parameters.split.first}:\\t`date`\" >> #{log_folder}")
|
119
142
|
close_file(sh_name, 0755)
|
120
143
|
|
121
144
|
#Submit node
|
122
145
|
#-----------------------------------
|
123
146
|
if !@verbose
|
124
|
-
queue_id =
|
125
|
-
|
126
|
-
asign_queue_id(
|
147
|
+
queue_id = submit_job(job, ar_dependencies)
|
148
|
+
job.queue_id = queue_id # Returns id of running tag on queue system
|
149
|
+
asign_queue_id(buffered_jobs, queue_id)
|
127
150
|
end
|
128
151
|
end
|
129
152
|
|
@@ -185,77 +208,21 @@ class QueueManager
|
|
185
208
|
return call
|
186
209
|
end
|
187
210
|
|
188
|
-
def
|
189
|
-
|
190
|
-
|
191
|
-
if @commands[dependency].done
|
192
|
-
remove << dependency
|
193
|
-
end
|
194
|
-
end
|
195
|
-
remove.each do |rm|
|
196
|
-
node.dependencies.delete(rm)
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
def sort_commands_by_dependencies
|
201
|
-
ar_commands = @commands.to_a
|
202
|
-
sorted_commands = []
|
203
|
-
task_without_dep = ar_commands.select{|node| node.last.dependencies.empty?}
|
204
|
-
sorted_commands.concat(task_without_dep)
|
205
|
-
while ar_commands.length != sorted_commands.length
|
206
|
-
ids = sorted_commands.map{|command| command.first}
|
207
|
-
ar_commands.each do |com|
|
208
|
-
if !sorted_commands.include?(com)
|
209
|
-
deps = com.last.dependencies - ids
|
210
|
-
sorted_commands << com if deps.empty?
|
211
|
-
end
|
212
|
-
end
|
213
|
-
end
|
214
|
-
return sorted_commands
|
215
|
-
end
|
216
|
-
|
217
|
-
def write_node(tag, sh_name)
|
218
|
-
write_file(sh_name, tag.initialization) if !tag.initialization.nil?
|
219
|
-
cmd = 'time '
|
220
|
-
if !tag.monocpu
|
221
|
-
if tag.parameters.include?('[lcpu]')
|
222
|
-
string = '[lcpu]'
|
223
|
-
used_cpu = 'workers'
|
224
|
-
elsif tag.parameters.include?('[cpu]')
|
225
|
-
string = '[cpu]'
|
226
|
-
used_cpu = @cpus.to_s
|
227
|
-
end
|
228
|
-
tag.parameters.gsub!(string, used_cpu) #Use asigned cpus
|
229
|
-
end
|
230
|
-
cmd << tag.parameters
|
231
|
-
write_file(sh_name, cmd)
|
232
|
-
end
|
233
|
-
|
234
|
-
def asign_cpu(node)
|
235
|
-
used_cpu = 1
|
236
|
-
if !node.monocpu
|
237
|
-
if @exp_cpu == 0
|
238
|
-
used_cpu = @cpus
|
239
|
-
else #asign exponential cpus
|
240
|
-
if @exp_cpu**(@count_cpu) < @cpus
|
241
|
-
@count_cpu +=1
|
242
|
-
end
|
243
|
-
used_cpu = @exp_cpu**@count_cpu
|
244
|
-
end
|
245
|
-
end
|
246
|
-
return used_cpu
|
211
|
+
def write_job(job, sh_name)
|
212
|
+
write_file(sh_name, job.initialization) if !job.initialization.nil?
|
213
|
+
write_file(sh_name, 'time ' + job.parameters)
|
247
214
|
end
|
248
215
|
|
249
|
-
def get_dependencies(
|
216
|
+
def get_dependencies(job, id = nil)
|
250
217
|
ar_dependencies = []
|
251
|
-
ar_dependencies +=
|
218
|
+
ar_dependencies += job.dependencies
|
252
219
|
ar_dependencies.delete(id) if !id.nil? #Delete autodependency
|
253
220
|
return ar_dependencies
|
254
221
|
end
|
255
222
|
|
256
|
-
def asign_queue_id(
|
257
|
-
|
258
|
-
|
223
|
+
def asign_queue_id(ar_jobs, id)
|
224
|
+
ar_jobs.each do |id_job, job|
|
225
|
+
job.queue_id=id
|
259
226
|
end
|
260
227
|
end
|
261
228
|
|
@@ -281,7 +248,7 @@ class QueueManager
|
|
281
248
|
|
282
249
|
end
|
283
250
|
|
284
|
-
def
|
251
|
+
def submit_job(job, ar_dependencies)
|
285
252
|
|
286
253
|
end
|
287
254
|
|
@@ -10,7 +10,7 @@ class BashManager < QueueManager
|
|
10
10
|
write_file('execution.sh', '#! /usr/bin/env bash')
|
11
11
|
end
|
12
12
|
|
13
|
-
def
|
13
|
+
def launch_all_jobs
|
14
14
|
super
|
15
15
|
close_file('execution.sh', 0755)
|
16
16
|
system_call("#{@path2execution_script} > #{File.join(File.dirname(@path2execution_script),'output')} & ", @exec_folder)
|
@@ -20,7 +20,7 @@ class BashManager < QueueManager
|
|
20
20
|
@queued << id # For dependencies purposes
|
21
21
|
end
|
22
22
|
|
23
|
-
def
|
23
|
+
def submit_job(job, ar_dependencies)
|
24
24
|
write_file('execution.sh','')
|
25
25
|
if !ar_dependencies.empty?
|
26
26
|
deps = ar_dependencies - @last_deps
|
@@ -31,8 +31,8 @@ class BashManager < QueueManager
|
|
31
31
|
end
|
32
32
|
@last_deps.concat(ar_dependencies)
|
33
33
|
@last_deps.uniq!
|
34
|
-
write_file('execution.sh', "cd #{
|
35
|
-
write_file('execution.sh', "./#{
|
34
|
+
write_file('execution.sh', "cd #{job.attrib[:exec_folder]}")
|
35
|
+
write_file('execution.sh', "./#{job.name}.sh &")
|
36
36
|
return nil
|
37
37
|
end
|
38
38
|
|
@@ -1,28 +1,26 @@
|
|
1
1
|
require 'queue_manager'
|
2
2
|
class SlurmManager < QueueManager
|
3
|
-
def write_header(id,
|
4
|
-
|
5
|
-
|
6
|
-
write_file(sh_name, "#SBATCH --cpus=#{used_cpu}")
|
3
|
+
def write_header(id, job, sh_name)
|
4
|
+
if !job.attrib[:ntask]
|
5
|
+
write_file(sh_name, "#SBATCH --cpus=#{job.attrib[:cpu]}")
|
7
6
|
else
|
8
|
-
write_file(sh_name, "#SBATCH --ntasks=#{
|
9
|
-
write_file(sh_name, "#SBATCH --nodes=#{
|
10
|
-
write_file(sh_name, 'srun hostname -s > workers') if
|
7
|
+
write_file(sh_name, "#SBATCH --ntasks=#{job.attrib[:cpu]}")
|
8
|
+
write_file(sh_name, "#SBATCH --nodes=#{job.attrib[:multinode]}") if job.attrib[:multinode] > 0
|
9
|
+
write_file(sh_name, 'srun hostname -s > workers') if job.attrib[:cpu_asign] == 'list'
|
11
10
|
end
|
12
|
-
|
13
|
-
write_file(sh_name,
|
14
|
-
write_file(sh_name,
|
15
|
-
write_file(sh_name, "#{constraint}")
|
11
|
+
write_file(sh_name, "#SBATCH --mem=#{job.attrib[:mem]}")
|
12
|
+
write_file(sh_name, "#SBATCH --time=#{job.attrib[:time]}")
|
13
|
+
write_file(sh_name, "#SBATCH --constraint=#{job.attrib[:node]}") if !job.attrib[:node].nil?
|
16
14
|
write_file(sh_name, '#SBATCH --error=job.%J.err')
|
17
15
|
write_file(sh_name, '#SBATCH --output=job.%J.out')
|
18
16
|
end
|
19
17
|
|
20
|
-
def
|
18
|
+
def submit_job(job, ar_dependencies)
|
21
19
|
final_dep = get_all_deps(ar_dependencies)
|
22
20
|
dependencies = nil
|
23
21
|
dependencies='--dependency=afterok:'+final_dep.join(':') if !final_dep.empty?
|
24
|
-
cmd = "sbatch #{dependencies} #{
|
25
|
-
queue_id = get_queue_system_id(system_call(cmd,
|
22
|
+
cmd = "sbatch #{dependencies} #{job.name}.sh"
|
23
|
+
queue_id = get_queue_system_id(system_call(cmd, job.attrib[:exec_folder]))
|
26
24
|
return queue_id
|
27
25
|
end
|
28
26
|
|