autoflow 0.3.5 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,21 +1,10 @@
1
1
  class QueueManager
2
2
 
3
- ########################################################################################
4
- ## SELECT AND PREPARE MANAGER
5
- ########################################################################################
6
3
  def initialize(exec_folder, options, commands, persist_variables)
7
4
  @exec_folder = exec_folder
8
5
  @commands = commands
9
6
  @persist_variables = persist_variables
10
7
  @verbose = options[:verbose]
11
- @cpus = options[:cpus]
12
- @exp_cpu = options[:exp_cpu]
13
- @count_cpu = 0
14
- @time = options[:time]
15
- @memory = options[:memory]
16
- @node_type = options[:node_type]
17
- @use_multinode = options[:use_multinode]
18
- @use_ntasks = options[:use_ntasks]
19
8
  @job_identifier = options[:identifier]
20
9
  @files = {}
21
10
  @remote = options[:remote]
@@ -23,11 +12,15 @@ class QueueManager
23
12
  @external_dependencies = options[:external_dependencies]
24
13
  end
25
14
 
15
+ ########################################################################################
16
+ ## SELECT AND PREPARE MANAGER
17
+ ########################################################################################
18
+
26
19
  def self.descendants
27
20
  ObjectSpace.each_object(Class).select { |klass| klass < self }
28
21
  end
29
22
 
30
- def self.select_queue_manager(exec_folder, options, commands, persist_variables)
23
+ def self.select_queue_manager(stack, options)
31
24
  path_managers = File.join(File.dirname(__FILE__),'queue_managers')
32
25
  Dir.glob(path_managers+'/*').each do |manager|
33
26
  require manager
@@ -37,7 +30,7 @@ class QueueManager
37
30
  else
38
31
  queue_manager = select_manager(options)
39
32
  end
40
- return queue_manager.new(exec_folder, options, commands, persist_variables)
33
+ return queue_manager.new(stack.exec_folder, options, stack.jobs, stack.persist_variables)
41
34
  end
42
35
 
43
36
  def self.select_manager(options)
@@ -59,71 +52,101 @@ class QueueManager
59
52
  def exec
60
53
  create_folder(@exec_folder)
61
54
  make_environment_file if !@persist_variables.empty?
55
+ create_file('versions', @exec_folder)
56
+ write_file('versions',"autoflow\t#{Autoflow::VERSION}")
57
+ close_file('versions')
62
58
  create_file('index_execution', @exec_folder)
63
- launch_all_tasks
59
+ launch_all_jobs
64
60
  close_file('index_execution')
65
61
  end
66
62
 
67
- def launch_all_tasks
68
- buffered_nodes = []
69
- sort_commands_by_dependencies.each do |id, node|
70
- write_file('index_execution', "#{id}\t#{node.exec_folder_program}")
71
- if node.done
63
+ def launch_all_jobs
64
+ buffered_jobs = []
65
+ sort_jobs_by_dependencies.each do |name, job|
66
+ write_file('index_execution', "#{name}\t#{job.attrib[:exec_folder]}")
67
+ if job.attrib[:done]
72
68
  next
73
69
  else
74
- rm_done_dependencies(node)
70
+ rm_done_dependencies(job)
75
71
  end
76
- buffered_nodes = launch_task_in_folder(node, id, buffered_nodes)
72
+ buffered_jobs = launch_job_in_folder(job, name, buffered_jobs)
73
+ end
74
+ end
75
+
76
+ def sort_jobs_by_dependencies
77
+ ar_jobs = @commands.to_a
78
+ sorted_jobs = []
79
+ jobs_without_dep = ar_jobs.select{|job| job.last.dependencies.empty?}
80
+ sorted_jobs.concat(jobs_without_dep)
81
+ while ar_jobs.length != sorted_jobs.length
82
+ ids = sorted_jobs.map{|job| job.first}
83
+ ar_jobs.each do |job|
84
+ if !sorted_jobs.include?(job)
85
+ deps = job.last.dependencies - ids
86
+ sorted_jobs << job if deps.empty?
87
+ end
88
+ end
89
+ end
90
+ return sorted_jobs
91
+ end
92
+
93
+ def rm_done_dependencies(job)
94
+ remove=[]
95
+ job.dependencies.each do |dependency|
96
+ remove << dependency if @commands[dependency].attrib[:done]
97
+ end
98
+ remove.each do |rm|
99
+ job.dependencies.delete(rm)
77
100
  end
78
101
  end
79
102
 
80
- def launch_task_in_folder(node, id, buffered_nodes)
81
- create_folder(node.exec_folder_program)
82
- if node.no_buffer_node # Launch with queue_system nodes and all buffered
83
- launch2queue_system(id, node, buffered_nodes)
84
- buffered_nodes = []#Clean buffer
85
- else # Buffer node
86
- buffered_nodes << [id, node]
103
+ def launch_job_in_folder(job, id, buffered_jobs)
104
+ create_folder(job.attrib[:exec_folder])
105
+ if !job.attrib[:buffer] # Launch with queue_system the job and all buffered jobs
106
+ launch2queue_system(job, id, buffered_jobs)
107
+ buffered_jobs = []#Clean buffer
108
+ else # Buffer job
109
+ buffered_jobs << [id, job]
87
110
  end
88
- return buffered_nodes
111
+ return buffered_jobs
89
112
  end
90
113
 
91
114
 
92
- def launch2queue_system(id, node, buffered_nodes)
115
+ def launch2queue_system(job, id, buffered_jobs)
93
116
  # Write sh file
94
117
  #--------------------------------
95
118
  log_folder = File.join(@exec_folder, 'log')
96
- sh_name = node.name+'.sh'
97
- create_file(sh_name, node.exec_folder_program)
119
+ sh_name = job.name+'.sh'
120
+ create_file(sh_name, job.attrib[:exec_folder])
98
121
  write_file(sh_name, '#!/usr/bin/env bash')
99
122
  write_file(sh_name, '##JOB_GROUP_ID='+@job_identifier)
100
- write_header(id, node, sh_name)
123
+ write_header(id, job, sh_name)
101
124
 
102
125
  #Get dependencies
103
126
  #------------------------------------
104
- ar_dependencies = get_dependencies(node, id)
105
- buffered_nodes.each do |id_buff_node,buff_node|
106
- write_node(buff_node, sh_name)
127
+ ar_dependencies = get_dependencies(job, id)
128
+ buffered_jobs.each do |id_buff_job, buff_job|
129
+ write_job(buff_job, sh_name)
107
130
  ar_dependencies += get_dependencies(buff_node, id_buff_node)
108
- buff_node.exec_folder_program = node.exec_folder_program
131
+ buff_job.attrib[:exec_folder] = job.attrib[:exec_folder]
109
132
  end
110
133
  ar_dependencies.uniq!
111
134
 
112
135
  #Write sh body
113
136
  #--------------------------------
114
137
  write_file(sh_name, 'hostname')
115
- write_file(sh_name, "echo -e \"STARTED #{id.gsub(')','')} #{node.name}:\\t`date`\" >> #{log_folder}")
138
+ write_file(sh_name, "echo -e \"STARTED #{id} #{job.parameters.split.first}:\\t`date`\" >> #{log_folder}")
116
139
  write_file(sh_name, "source #{File.join(@exec_folder, 'env_file')}") if !@persist_variables.empty?
117
- write_node(node, sh_name)
118
- write_file(sh_name, "echo -e \"FINISHED #{id.gsub(')','')} #{node.name}:\\t`date`\" >> #{log_folder}")
140
+ write_job(job, sh_name)
141
+ write_file(sh_name, "echo -e \"FINISHED #{id} #{job.parameters.split.first}:\\t`date`\" >> #{log_folder}")
119
142
  close_file(sh_name, 0755)
120
143
 
121
144
  #Submit node
122
145
  #-----------------------------------
123
146
  if !@verbose
124
- queue_id = submit_node(node, ar_dependencies)
125
- node.queue_id = queue_id # Returns id of running tag on queue system
126
- asign_queue_id(buffered_nodes, queue_id)
147
+ queue_id = submit_job(job, ar_dependencies)
148
+ job.queue_id = queue_id # Returns id of running tag on queue system
149
+ asign_queue_id(buffered_jobs, queue_id)
127
150
  end
128
151
  end
129
152
 
@@ -185,77 +208,21 @@ class QueueManager
185
208
  return call
186
209
  end
187
210
 
188
- def rm_done_dependencies(node)
189
- remove=[]
190
- node.dependencies.each do |dependency|
191
- if @commands[dependency].done
192
- remove << dependency
193
- end
194
- end
195
- remove.each do |rm|
196
- node.dependencies.delete(rm)
197
- end
198
- end
199
-
200
- def sort_commands_by_dependencies
201
- ar_commands = @commands.to_a
202
- sorted_commands = []
203
- task_without_dep = ar_commands.select{|node| node.last.dependencies.empty?}
204
- sorted_commands.concat(task_without_dep)
205
- while ar_commands.length != sorted_commands.length
206
- ids = sorted_commands.map{|command| command.first}
207
- ar_commands.each do |com|
208
- if !sorted_commands.include?(com)
209
- deps = com.last.dependencies - ids
210
- sorted_commands << com if deps.empty?
211
- end
212
- end
213
- end
214
- return sorted_commands
215
- end
216
-
217
- def write_node(tag, sh_name)
218
- write_file(sh_name, tag.initialization) if !tag.initialization.nil?
219
- cmd = 'time '
220
- if !tag.monocpu
221
- if tag.parameters.include?('[lcpu]')
222
- string = '[lcpu]'
223
- used_cpu = 'workers'
224
- elsif tag.parameters.include?('[cpu]')
225
- string = '[cpu]'
226
- used_cpu = @cpus.to_s
227
- end
228
- tag.parameters.gsub!(string, used_cpu) #Use asigned cpus
229
- end
230
- cmd << tag.parameters
231
- write_file(sh_name, cmd)
232
- end
233
-
234
- def asign_cpu(node)
235
- used_cpu = 1
236
- if !node.monocpu
237
- if @exp_cpu == 0
238
- used_cpu = @cpus
239
- else #asign exponential cpus
240
- if @exp_cpu**(@count_cpu) < @cpus
241
- @count_cpu +=1
242
- end
243
- used_cpu = @exp_cpu**@count_cpu
244
- end
245
- end
246
- return used_cpu
211
+ def write_job(job, sh_name)
212
+ write_file(sh_name, job.initialization) if !job.initialization.nil?
213
+ write_file(sh_name, 'time ' + job.parameters)
247
214
  end
248
215
 
249
- def get_dependencies(node, id = nil)
216
+ def get_dependencies(job, id = nil)
250
217
  ar_dependencies = []
251
- ar_dependencies += node.dependencies
218
+ ar_dependencies += job.dependencies
252
219
  ar_dependencies.delete(id) if !id.nil? #Delete autodependency
253
220
  return ar_dependencies
254
221
  end
255
222
 
256
- def asign_queue_id(ar_tags,id)
257
- ar_tags.each do |id_ar_tag, ar_tag|
258
- ar_tag.queue_id=id
223
+ def asign_queue_id(ar_jobs, id)
224
+ ar_jobs.each do |id_job, job|
225
+ job.queue_id=id
259
226
  end
260
227
  end
261
228
 
@@ -281,7 +248,7 @@ class QueueManager
281
248
 
282
249
  end
283
250
 
284
- def submit_node(node, ar_dependencies)
251
+ def submit_job(job, ar_dependencies)
285
252
 
286
253
  end
287
254
 
@@ -10,7 +10,7 @@ class BashManager < QueueManager
10
10
  write_file('execution.sh', '#! /usr/bin/env bash')
11
11
  end
12
12
 
13
- def launch_all_tasks
13
+ def launch_all_jobs
14
14
  super
15
15
  close_file('execution.sh', 0755)
16
16
  system_call("#{@path2execution_script} > #{File.join(File.dirname(@path2execution_script),'output')} & ", @exec_folder)
@@ -20,7 +20,7 @@ class BashManager < QueueManager
20
20
  @queued << id # For dependencies purposes
21
21
  end
22
22
 
23
- def submit_node(node, ar_dependencies)
23
+ def submit_job(job, ar_dependencies)
24
24
  write_file('execution.sh','')
25
25
  if !ar_dependencies.empty?
26
26
  deps = ar_dependencies - @last_deps
@@ -31,8 +31,8 @@ class BashManager < QueueManager
31
31
  end
32
32
  @last_deps.concat(ar_dependencies)
33
33
  @last_deps.uniq!
34
- write_file('execution.sh', "cd #{node.exec_folder_program}")
35
- write_file('execution.sh', "./#{node.name}.sh &")
34
+ write_file('execution.sh', "cd #{job.attrib[:exec_folder]}")
35
+ write_file('execution.sh', "./#{job.name}.sh &")
36
36
  return nil
37
37
  end
38
38
 
@@ -1,28 +1,26 @@
1
1
  require 'queue_manager'
2
2
  class SlurmManager < QueueManager
3
- def write_header(id, node, sh_name)
4
- used_cpu = asign_cpu(node)
5
- if !@use_ntasks
6
- write_file(sh_name, "#SBATCH --cpus=#{used_cpu}")
3
+ def write_header(id, job, sh_name)
4
+ if !job.attrib[:ntask]
5
+ write_file(sh_name, "#SBATCH --cpus=#{job.attrib[:cpu]}")
7
6
  else
8
- write_file(sh_name, "#SBATCH --ntasks=#{used_cpu}")
9
- write_file(sh_name, "#SBATCH --nodes=#{@use_multinode}") if @use_multinode > 0
10
- write_file(sh_name, 'srun hostname -s > workers') if node.parameters.include?('[lcpu]')
7
+ write_file(sh_name, "#SBATCH --ntasks=#{job.attrib[:cpu]}")
8
+ write_file(sh_name, "#SBATCH --nodes=#{job.attrib[:multinode]}") if job.attrib[:multinode] > 0
9
+ write_file(sh_name, 'srun hostname -s > workers') if job.attrib[:cpu_asign] == 'list'
11
10
  end
12
- constraint = '#SBATCH --constraint='+@node_type if !@node_type.nil?
13
- write_file(sh_name, "#SBATCH --mem=#{@memory}")
14
- write_file(sh_name, "#SBATCH --time=#{@time}")
15
- write_file(sh_name, "#{constraint}")
11
+ write_file(sh_name, "#SBATCH --mem=#{job.attrib[:mem]}")
12
+ write_file(sh_name, "#SBATCH --time=#{job.attrib[:time]}")
13
+ write_file(sh_name, "#SBATCH --constraint=#{job.attrib[:node]}") if !job.attrib[:node].nil?
16
14
  write_file(sh_name, '#SBATCH --error=job.%J.err')
17
15
  write_file(sh_name, '#SBATCH --output=job.%J.out')
18
16
  end
19
17
 
20
- def submit_node(node, ar_dependencies)
18
+ def submit_job(job, ar_dependencies)
21
19
  final_dep = get_all_deps(ar_dependencies)
22
20
  dependencies = nil
23
21
  dependencies='--dependency=afterok:'+final_dep.join(':') if !final_dep.empty?
24
- cmd = "sbatch #{dependencies} #{node.name}.sh"
25
- queue_id = get_queue_system_id(system_call(cmd, node.exec_folder_program))
22
+ cmd = "sbatch #{dependencies} #{job.name}.sh"
23
+ queue_id = get_queue_system_id(system_call(cmd, job.attrib[:exec_folder]))
26
24
  return queue_id
27
25
  end
28
26