autoflow 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in autoflow.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 TODO: Write your name
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Autoflow
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'autoflow'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install autoflow
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/autoflow.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'autoflow/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "autoflow"
8
+ spec.version = Autoflow::VERSION
9
+ spec.authors = ["Pedro Seoane"]
10
+ spec.email = ["seoanezonjic@hotmail.com"]
11
+ spec.description = %q{"Autoflow makes easy to launch big pipelines on a queue system. Only works with SLURM & PBS"}
12
+ spec.summary = %q{"This gem take a pipeline and launch it on a queue system"}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'net-ssh', '>= 2.8.0'
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake"
24
+ end
data/bin/AutoFlow ADDED
@@ -0,0 +1,167 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ROOT_PATH=File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, "../lib/"))
5
+ $: << File.expand_path(File.join(ROOT_PATH, "../lib/autoflow/"))
6
+ $: << File.expand_path(File.join(ROOT_PATH, "../lib/autoflow/queue_managers"))
7
+
8
+ require 'optparse'
9
+ require 'autoflow'
10
+ require 'io/console'
11
+ require 'net/ssh'
12
+
13
+ #################################################################################################
14
+ # INPUT PARSING
15
+ #################################################################################################
16
+ options = {}
17
+ template_file = ''
18
+ optparse = OptionParser.new do |opts|
19
+ options[:batch] = FALSE
20
+ opts.on( '-b', '--batch', 'Workflow execution using batch' ) do
21
+ options[:batch] = TRUE
22
+ end
23
+
24
+ options[:cpus] = 16
25
+ opts.on( '-c', '--cpus INTEGER', 'Max cpus can be used in all workflow' ) do |cpus|
26
+ options[:cpus] = cpus.to_i
27
+ end
28
+
29
+ options[:external_dependencies] = []
30
+ opts.on( '-d', '--external_dependencies STRING', 'The workflow will start when indicated jobs finish on queue system. Format: \'id1,id2,id3..\'') do |external_dependencies|
31
+ options[:external_dependencies] = external_dependencies.split(',')
32
+ end
33
+
34
+ options[:exp_cpu] = 0
35
+ opts.on( '-e', '--exp_cpu INTEGER', 'Exponent of cpu assigment series' ) do |exp_cpu|
36
+ options[:exp_cpu] = exp_cpu.to_i
37
+ end
38
+
39
+ options[:retry] = FALSE
40
+ opts.on( '-f', '--force', 'Execute all jobs, included the jobs commented with %' ) do
41
+ options[:retry] = TRUE
42
+ end
43
+
44
+ options[:graph] = nil
45
+ opts.on( '-g', '--graph STRING', 'Draw the template. t for use tag like names or f for use folders names instead' ) do |graph|
46
+ options[:graph] = graph
47
+ end
48
+
49
+ options[:memory] = '4gb'
50
+ opts.on( '-m', '--memory STRING', 'Max memory can be used in a task' ) do |mem|
51
+ options[:memory] = mem
52
+ end
53
+
54
+ options[:node_type] = nil
55
+ opts.on( '-n', '--node_type STRING', 'Apply constraint attribute to tasks' ) do |node_type|
56
+ options[:node_type] = node_type
57
+ end
58
+
59
+ options[:output] = 'exec'
60
+ opts.on( '-o', '--output STRING', 'Output folder of flow' ) do |output|
61
+ options[:output] = output
62
+ end
63
+
64
+ options[:remote] = FALSE
65
+ opts.on( '-r', '--remote', 'Connect with remote machine and launch the workflow' ) do
66
+ options[:remote] = TRUE
67
+ puts 'Host to connect and launch workflow:'
68
+ host = gets.chomp
69
+ puts 'User:'
70
+ user = gets.chomp
71
+ puts 'Password (hidden)'
72
+ password = STDIN.noecho(&:gets).chomp
73
+ options[:ssh] = Net::SSH.start(host, user, :password => password, :auth_methods => ['keyboard-interactive'])
74
+ puts options[:ssh].exec!('hostname')
75
+ end
76
+
77
+ options[:time] = '20:00:00'
78
+ opts.on( '-t', '--time STRING', 'Max time that can be needed in a task' ) do |time|
79
+ options[:time] = time
80
+ end
81
+
82
+ options[:use_multinode] = 0
83
+ opts.on( '-u', '--use_multinode INTEGER', 'For use several nodes on execution' ) do |use_multinode|
84
+ options[:use_multinode] = use_multinode.to_i
85
+ end
86
+
87
+ options[:use_ntasks] = FALSE
88
+ opts.on( '-s', '--use_ntasks', 'Use -ntasks flag with sh' ) do
89
+ options[:use_ntasks] = TRUE
90
+ end
91
+
92
+ options[:verbose] = FALSE
93
+ opts.on( '-v', '--verbose', 'Show info without launch jobs' ) do
94
+ options[:verbose] = TRUE
95
+ end
96
+
97
+ options[:Variables] = nil
98
+ opts.on( '-V', '--Variables STRING', 'Variables to be parsed on template. Format: \'$variable_name1=value1;$variable_name2=value2;...\'' ) do |mem|
99
+ options[:Variables] = mem.split(';')
100
+ end
101
+
102
+ options[:workflow] = FALSE
103
+ opts.on( '-w', '--workflow FILE', 'Input workflow file' ) do |workflow|
104
+ options[:workflow] = workflow
105
+ template_file = workflow
106
+ end
107
+
108
+ options[:identifier] = FALSE
109
+ opts.on( '-i', '--job_identifier STRING', 'Identifier tag for each launching script' ) do |identifier|
110
+ options[:identifier] = identifier
111
+ end
112
+
113
+ # Set a banner, displayed at the top of the help screen.
114
+ opts.banner = "Usage: AutoFlow.rb -w worflow_file -c n_cpus \n\n"
115
+
116
+ # This displays the help screen
117
+ opts.on( '-h', '--help', 'Display this screen' ) do
118
+ puts opts
119
+ exit
120
+ end
121
+
122
+ end # End opts
123
+
124
+ # parse options and remove from ARGV
125
+ optparse.parse!
126
+
127
+ if !options[:workflow] || !File.exists?(options[:workflow])
128
+ puts 'Workflow file not especified or not exists'
129
+ Process.exit(-1)
130
+ else
131
+ options[:identifier] = "#{options[:workflow]}_#{Time.new.to_i}" if !options[:identifier]
132
+ options[:workflow] = File.open(options[:workflow]).read
133
+ end
134
+
135
+ #################################################################################################
136
+ # MAIN
137
+ #################################################################################################
138
+ if options[:remote]
139
+ main_path = options[:ssh].exec!('pwd').chomp
140
+ else
141
+ main_path = Dir.pwd
142
+ end
143
+
144
+ if options[:output] == 'exec'
145
+ exec_folder = File.join(main_path,'exec')
146
+ else
147
+ exec_folder = options[:output]
148
+ exec_folder = File.join(main_path, options[:output]) if exec_folder[0] != '/' && exec_folder[0] != '~'
149
+ end
150
+
151
+ #--------------------------------------------------------------------------------
152
+ # Flow parse
153
+ #--------------------------------------------------------------------------------
154
+ stack=Stack.new(exec_folder, options)
155
+ stack.draw(template_file, options[:graph]) if !options[:graph].nil?
156
+
157
+ #--------------------------------------------------------------------------------
158
+ # Flow exec
159
+ #--------------------------------------------------------------------------------
160
+ if options[:verbose]
161
+ stack.inspect
162
+ stack.send
163
+ elsif options[:graph].nil?
164
+ stack.send
165
+ end
166
+ options[:ssh].close if options[:remote]
167
+
data/bin/env_manager ADDED
@@ -0,0 +1,23 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ # Parse enviroment variables file
4
+ new_values = {}
5
+ env_path = '../env_file' # A level up
6
+ if !File.exists?(env_path)
7
+ env_path ='env_path' # Local
8
+ if !File.exists?(env_path)
9
+ raise 'Enviroment file not found'
10
+ end
11
+ end
12
+ pairs = File.open(env_path).read.scan(/export ([^=]+)=([\S]+)/)
13
+ pairs.map{|pair| new_values[pair.first] = pair.last}
14
+
15
+ # Change enviroment variables (Commandline must be 'var=value;var1=value2...varN=valueN;')
16
+ ARGV[0].scan(/([^=]+)=([^;]+);/).map{|new_pair| new_values[new_pair.first]=new_pair.last}
17
+
18
+ # Save modified enviroment variables
19
+ new_enviroment = File.open(env_path,'w')
20
+ new_values.each do |env_var, value|
21
+ new_enviroment.puts "export #{env_var}=#{value}"
22
+ end
23
+ new_enviroment.close
data/bin/flow_time ADDED
@@ -0,0 +1,52 @@
1
+ #! /usr/bin/env ruby
2
+ require 'scbi_plot'
3
+
4
+ times = {}
5
+ task = nil
6
+ status = nil
7
+ beg_time = Time.now
8
+ end_time = Time.new(0)
9
+ File.open(ARGV[0]).read.each_line do |line|
10
+ line.chomp!
11
+ description, date = line.split("\t")
12
+ status, task, program = description.split(' ')
13
+ day, month, n_day, time, mode, year = date.split(' ')
14
+ hours, minutes, seconds = time.split(':')
15
+ task_time = Time.local(year.to_i, month, n_day.to_i, hours.to_i, minutes.to_i, seconds.to_i)
16
+ if task_time > end_time
17
+ end_time = task_time
18
+ end
19
+ if task_time < beg_time
20
+ beg_time = task_time
21
+ end
22
+ if status == 'STARTED'
23
+ times[task] = [task_time]
24
+ else
25
+ times[task] << task_time
26
+ end
27
+ end
28
+
29
+ tag_task = []
30
+ plot_times = []
31
+ total_time = (end_time - beg_time).to_i/3600.0
32
+ tag_task << 'Total_time'
33
+ plot_times << total_time
34
+ puts "Total time: #{total_time} hours"
35
+ times.each do |task, interval|
36
+ if interval.length == 2
37
+ task_time = (interval.last - interval.first).to_i/3600.0
38
+ tag_task << task
39
+ plot_times << task_time
40
+ puts "#{task}\t#{task_time} hours"
41
+ else
42
+ puts "#{task}\tStarted at #{interval.first}. Currently running"
43
+ end
44
+ end
45
+
46
+ # create Histogram
47
+ tag_task.map!{|tag| tag.gsub('e','\\e')}
48
+ tag_task.map!{|tag| tag.gsub('E','\\E')}
49
+ p=ScbiPlot::Histogram.new('time_graph.png','Flow stats (hours)')
50
+ p.add_x(tag_task)
51
+ p.add_y(plot_times)
52
+ p.do_graph
@@ -0,0 +1,22 @@
1
+ class Program
2
+ attr_accessor :name, :exec_folder_program, :parameters, :initialization, :queue_id, :done, :dependencies, :monocpu, :no_buffer_node,:cloned_id
3
+ def initialize(name, parameters, initialization, exec_folder_program, dependencies, done, no_buffer_node)
4
+ @name=name
5
+ @parameters=parameters
6
+ @initialization=initialization
7
+ @exec_folder_program=exec_folder_program
8
+ @queue_id=nil
9
+ @done=done
10
+ @dependencies=dependencies
11
+ @no_buffer_node=no_buffer_node
12
+ @monocpu=TRUE
13
+ @cloned_id = nil
14
+ if @parameters =~ /\[cpu|lcpu\]/ #||@parameters =~ /\[lcpu\]/
15
+ @monocpu=FALSE
16
+ end
17
+ end
18
+
19
+ def inspect
20
+ string="\e[31m#{@name}\e[0m\n\t\e[33m#{@parameters.gsub("\n","\n\t")}\e[0m\t\e[34m#{@exec_folder_program}\e[0m"
21
+ end
22
+ end
@@ -0,0 +1,299 @@
1
+ class QueueManager
2
+
3
+ ########################################################################################
4
+ ## SELECT AND PREPARE MANAGER
5
+ ########################################################################################
6
+ def initialize(exec_folder, options, commands, persist_variables)
7
+ @exec_folder = exec_folder
8
+ @commands = commands
9
+ @persist_variables = persist_variables
10
+ @verbose = options[:verbose]
11
+ @cpus = options[:cpus]
12
+ @exp_cpu = options[:exp_cpu]
13
+ @count_cpu = 0
14
+ @time = options[:time]
15
+ @memory = options[:memory]
16
+ @node_type = options[:node_type]
17
+ @use_multinode = options[:use_multinode]
18
+ @use_ntasks = options[:use_ntasks]
19
+ @job_identifier = options[:identifier]
20
+ @files = {}
21
+ @remote = options[:remote]
22
+ @ssh = options[:ssh]
23
+ @external_dependencies = options[:external_dependencies]
24
+ end
25
+
26
+ def self.descendants
27
+ ObjectSpace.each_object(Class).select { |klass| klass < self }
28
+ end
29
+
30
+ def self.select_queue_manager(exec_folder, options, commands, persist_variables)
31
+ path_managers = File.join(File.dirname(__FILE__),'queue_managers')
32
+ Dir.glob(path_managers+'/*').each do |manager|
33
+ require manager
34
+ end
35
+ if options[:batch]
36
+ queue_manager = BashManager
37
+ else
38
+ queue_manager = select_manager(options)
39
+ end
40
+ return queue_manager.new(exec_folder, options, commands, persist_variables)
41
+ end
42
+
43
+ def self.select_manager(options)
44
+ queue_manager = nil
45
+ priority = 0
46
+ descendants.each do |descendant|
47
+ if descendant.available?(options) && priority <= descendant.priority
48
+ queue_manager = descendant
49
+ priority = descendant.priority
50
+ end
51
+ end
52
+ return queue_manager
53
+ end
54
+
55
+ ########################################################################################
56
+ ## EXECUTING WORKFLOW WITH MANAGER
57
+ ########################################################################################
58
+
59
+ def exec
60
+ create_folder(@exec_folder)
61
+ make_environment_file if !@persist_variables.empty?
62
+ create_file('index_execution', @exec_folder)
63
+ launch_all_tasks
64
+ close_file('index_execution')
65
+ end
66
+
67
+ def launch_all_tasks
68
+ buffered_nodes = []
69
+ sort_commands_by_dependencies.each do |id, node|
70
+ write_file('index_execution', "#{id}\t#{node.exec_folder_program}")
71
+ if node.done
72
+ next
73
+ else
74
+ rm_done_dependencies(node)
75
+ end
76
+ buffered_nodes = launch_task_in_folder(node, id, buffered_nodes)
77
+ end
78
+ end
79
+
80
+ def launch_task_in_folder(node, id, buffered_nodes)
81
+ create_folder(node.exec_folder_program)
82
+ if node.no_buffer_node # Launch with queue_system nodes and all buffered
83
+ launch2queue_system(id, node, buffered_nodes)
84
+ buffered_nodes = []#Clean buffer
85
+ else # Buffer node
86
+ buffered_nodes << [id, node]
87
+ end
88
+ return buffered_nodes
89
+ end
90
+
91
+
92
+ def launch2queue_system(id, node, buffered_nodes)
93
+ # Write sh file
94
+ #--------------------------------
95
+ log_folder = File.join(@exec_folder, 'log')
96
+ sh_name = node.name+'.sh'
97
+ create_file(sh_name, node.exec_folder_program)
98
+ write_file(sh_name, '#!/usr/bin/env bash')
99
+ write_file(sh_name, '##JOB_GROUP_ID='+@job_identifier)
100
+ write_header(id, node, sh_name)
101
+
102
+ #Get dependencies
103
+ #------------------------------------
104
+ ar_dependencies = get_dependencies(node, id)
105
+ buffered_nodes.each do |id_buff_node,buff_node|
106
+ write_node(buff_node, sh_name)
107
+ ar_dependencies += get_dependencies(buff_node, id_buff_node)
108
+ buff_node.exec_folder_program = node.exec_folder_program
109
+ end
110
+ ar_dependencies.uniq!
111
+
112
+ #Write sh body
113
+ #--------------------------------
114
+ write_file(sh_name, 'hostname')
115
+ write_file(sh_name, "echo -e \"STARTED #{id.gsub(')','')} #{node.name}:\\t`date`\" >> #{log_folder}")
116
+ write_file(sh_name, "source #{File.join(@exec_folder, 'env_file')}") if !@persist_variables.empty?
117
+ write_node(node, sh_name)
118
+ write_file(sh_name, "echo -e \"FINISHED #{id.gsub(')','')} #{node.name}:\\t`date`\" >> #{log_folder}")
119
+ close_file(sh_name, 0755)
120
+
121
+ #Submit node
122
+ #-----------------------------------
123
+ if !@verbose
124
+ queue_id = submit_node(node, ar_dependencies)
125
+ node.queue_id = queue_id # Returns id of running tag on queue system
126
+ asign_queue_id(buffered_nodes, queue_id)
127
+ end
128
+ end
129
+
130
+ def make_environment_file
131
+ create_file('env_file', @exec_folder)
132
+ @persist_variables.each do |var, value|
133
+ write_file('env_file', "export #{var}=#{value}")
134
+ end
135
+ close_file('env_file')
136
+ end
137
+
138
+ def create_folder(folder_name)
139
+ if @remote
140
+ @ssh.exec!("if ! [ -d #{folder_name} ]; then mkdir -p #{folder_name}; fi")
141
+ else
142
+ Dir.mkdir(folder_name) if !File.exists?(folder_name)
143
+ end
144
+ end
145
+
146
+ def create_file(file_name, path)
147
+ @files[file_name] = [path, '']
148
+ end
149
+
150
+ def write_file(file_name, content)
151
+ @files[file_name].last << content+"\n"
152
+ end
153
+
154
+ def close_file(file_name, permissions = nil) #SSH
155
+ path, content = @files.delete(file_name)
156
+ file_path = File.join(path, file_name)
157
+ if @remote
158
+ @ssh.exec!("echo '#{content}' > #{file_path}")
159
+ @ssh.exec!("chmod #{permissions} #{file_path}") if !permissions.nil?
160
+ else
161
+ local_file = File.open(file_path,'w')
162
+ local_file.chmod(permissions) if !permissions.nil?
163
+ local_file.print content
164
+ local_file.close
165
+ end
166
+ end
167
+
168
+ def system_call(cmd, path = nil)
169
+ cmd = "cd #{path}; " + cmd if !path.nil?
170
+ if @remote
171
+ call = @ssh.exec!(cmd)
172
+ else
173
+ call = %x[#{cmd}]
174
+ end
175
+ return call
176
+ end
177
+
178
+ def self.system_call(cmd, path = nil, remote = FALSE, ssh = nil)
179
+ cmd = "cd #{path}; " + cmd if !path.nil?
180
+ if remote
181
+ call = ssh.exec!(cmd)
182
+ else
183
+ call = %x[#{cmd}]
184
+ end
185
+ return call
186
+ end
187
+
188
+ def rm_done_dependencies(node)
189
+ remove=[]
190
+ node.dependencies.each do |dependency|
191
+ if @commands[dependency].done
192
+ remove << dependency
193
+ end
194
+ end
195
+ remove.each do |rm|
196
+ node.dependencies.delete(rm)
197
+ end
198
+ end
199
+
200
+ def sort_commands_by_dependencies
201
+ ar_commands = @commands.to_a
202
+ sorted_commands = []
203
+ task_without_dep = ar_commands.select{|node| node.last.dependencies.empty?}
204
+ sorted_commands.concat(task_without_dep)
205
+ while ar_commands.length != sorted_commands.length
206
+ ids = sorted_commands.map{|command| command.first}
207
+ ar_commands.each do |com|
208
+ if !sorted_commands.include?(com)
209
+ deps = com.last.dependencies - ids
210
+ sorted_commands << com if deps.empty?
211
+ end
212
+ end
213
+ end
214
+ return sorted_commands
215
+ end
216
+
217
+ def write_node(tag, sh_name)
218
+ write_file(sh_name, tag.initialization) if !tag.initialization.nil?
219
+ cmd = 'time '
220
+ if !tag.monocpu
221
+ if tag.parameters.include?('[lcpu]')
222
+ string = '[lcpu]'
223
+ used_cpu = 'workers'
224
+ elsif tag.parameters.include?('[cpu]')
225
+ string = '[cpu]'
226
+ used_cpu = @cpus.to_s
227
+ end
228
+ tag.parameters.gsub!(string, used_cpu) #Use asigned cpus
229
+ end
230
+ cmd << tag.parameters
231
+ write_file(sh_name, cmd)
232
+ end
233
+
234
+ def asign_cpu(node)
235
+ used_cpu = 1
236
+ if !node.monocpu
237
+ if @exp_cpu == 0
238
+ used_cpu = @cpus
239
+ else #asign exponential cpus
240
+ if @exp_cpu**(@count_cpu) < @cpus
241
+ @count_cpu +=1
242
+ end
243
+ used_cpu = @exp_cpu**@count_cpu
244
+ end
245
+ end
246
+ return used_cpu
247
+ end
248
+
249
+ def get_dependencies(node, id = nil)
250
+ ar_dependencies = []
251
+ ar_dependencies += node.dependencies
252
+ ar_dependencies.delete(id) if !id.nil? #Delete autodependency
253
+ return ar_dependencies
254
+ end
255
+
256
+ def asign_queue_id(ar_tags,id)
257
+ ar_tags.each do |id_ar_tag, ar_tag|
258
+ ar_tag.queue_id=id
259
+ end
260
+ end
261
+
262
+ def get_queue_system_dependencies(ar_dependencies)
263
+ queue_system_ids=[]
264
+ ar_dependencies.each do |dependency|
265
+ queue_system_ids << @commands[dependency].queue_id
266
+ end
267
+ return queue_system_ids
268
+ end
269
+
270
+ def get_all_deps(ar_dependencies)
271
+ final_dep = []
272
+ final_dep.concat(get_queue_system_dependencies(ar_dependencies)) if !ar_dependencies.empty?
273
+ final_dep.concat(@external_dependencies)
274
+ return final_dep
275
+ end
276
+
277
+ ########################################################################################
278
+ ## QUEUE DEPENDANT METHODS
279
+ ########################################################################################
280
+ def write_header(id, node, sh)
281
+
282
+ end
283
+
284
+ def submit_node(node, ar_dependencies)
285
+
286
+ end
287
+
288
+ def get_queue_system_id(shell_output)
289
+
290
+ end
291
+
292
+ def self.available?
293
+ return FALSE
294
+ end
295
+
296
+ def self.priority
297
+ return -1
298
+ end
299
+ end
@@ -0,0 +1,50 @@
1
+ require 'queue_manager'
2
+ class BashManager < QueueManager
3
+
4
+ def initialize(exec_folder, options, commands, persist_variables)
5
+ super
6
+ @queued = []
7
+ @last_deps = []
8
+ @path2execution_script = File.join(@exec_folder, 'execution.sh')
9
+ create_file('execution.sh', @exec_folder)
10
+ write_file('execution.sh', '#! /usr/bin/env bash')
11
+ end
12
+
13
+ def launch_all_tasks
14
+ super
15
+ close_file('execution.sh', 0755)
16
+ system_call("#{@path2execution_script} > #{File.join(File.dirname(@path2execution_script),'output')} & ", @exec_folder)
17
+ end
18
+
19
+ def write_header(id, node, sh)
20
+ @queued << id # For dependencies purposes
21
+ end
22
+
23
+ def submit_node(node, ar_dependencies)
24
+ write_file('execution.sh','')
25
+ if !ar_dependencies.empty?
26
+ deps = ar_dependencies - @last_deps
27
+ if !deps.empty?
28
+ write_file('execution.sh', 'wait')
29
+ @last_deps.concat(@queued)
30
+ end
31
+ end
32
+ @last_deps.concat(ar_dependencies)
33
+ @last_deps.uniq!
34
+ write_file('execution.sh', "cd #{node.exec_folder_program}")
35
+ write_file('execution.sh', "./#{node.name}.sh &")
36
+ return nil
37
+ end
38
+
39
+ def get_queue_system_id(shell_output)
40
+ return nil
41
+ end
42
+
43
+ def self.available?(options)
44
+ return TRUE
45
+ end
46
+
47
+ def self.priority
48
+ return 0
49
+ end
50
+ end
@@ -0,0 +1,47 @@
1
+ require 'queue_manager'
2
+ class SlurmManager < QueueManager
3
+ def write_header(id, node, sh_name)
4
+ used_cpu = asign_cpu(node)
5
+ if !@use_ntasks
6
+ write_file(sh_name, "#SBATCH --cpus=#{used_cpu}")
7
+ else
8
+ write_file(sh_name, "#SBATCH --ntasks=#{used_cpu}")
9
+ write_file(sh_name, "#SBATCH --nodes=#{@use_multinode}") if @use_multinode > 0
10
+ write_file(sh_name, 'srun hostname -s > workers') if node.parameters.include?('[lcpu]')
11
+ end
12
+ constraint = '#SBATCH --constraint='+@node_type if !@node_type.nil?
13
+ write_file(sh_name, "#SBATCH --mem=#{@memory}")
14
+ write_file(sh_name, "#SBATCH --time=#{@time}")
15
+ write_file(sh_name, "#{constraint}")
16
+ write_file(sh_name, '#SBATCH --error=job.%J.err')
17
+ write_file(sh_name, '#SBATCH --output=job.%J.out')
18
+ end
19
+
20
+ def submit_node(node, ar_dependencies)
21
+ final_dep = get_all_deps(ar_dependencies)
22
+ dependencies = nil
23
+ dependencies='--dependency=afterok:'+final_dep.join(':') if !final_dep.empty?
24
+ cmd = "sbatch #{dependencies} #{node.name}.sh"
25
+ queue_id = get_queue_system_id(system_call(cmd, node.exec_folder_program))
26
+ return queue_id
27
+ end
28
+
29
+ def get_queue_system_id(shell_output)
30
+ queue_id = nil
31
+ shell_output.chomp!
32
+ fields = shell_output.split(' ')
33
+ queue_id = fields[3]
34
+ return queue_id
35
+ end
36
+
37
+ def self.available?(options)
38
+ available = TRUE
39
+ shell_output = system_call("type 'sbatch'", nil, options[:remote], options[:ssh])
40
+ available = FALSE if shell_output.empty?
41
+ return available
42
+ end
43
+
44
+ def self.priority
45
+ return 100
46
+ end
47
+ end
@@ -0,0 +1,272 @@
1
+ require 'program'
2
+ require 'queue_manager'
3
+
4
+
5
+ class Stack
6
+
7
+ TAG = 0
8
+ INIT = 1
9
+ COMMAND = 2
10
+ PROG_NAME = 0
11
+ PROG_PARAM = 1
12
+
13
+ ##########################################################################################
14
+ ## PARSE TEMPLATE
15
+ ##########################################################################################
16
+ def initialize(exec_folder, options)
17
+ @commands = {}
18
+ @variables = {}
19
+ @one2one_dependencies = {}
20
+ @persist_variables = {}
21
+ @dirs = []
22
+ @exec_folder = exec_folder
23
+ @file_workflow = options[:workflow]
24
+ @do_retry = options[:retry]
25
+ parse(options[:workflow], options[:Variables])
26
+ @q_mgr = load_queue_manager(options)
27
+ end
28
+
29
+ def parse(workflow, external_variables)
30
+ #Clean template
31
+ workflow.gsub!(/\#.+$/,'') #Delete comments
32
+ workflow.gsub!("\t",'') #Drop tabs
33
+ workflow.gsub!(/\n+/,"\n") #Drop empty lines
34
+
35
+ #Parse template
36
+ variables_lines = []
37
+ persist_variables_lines = []
38
+ node_lines = []
39
+
40
+ workflow.each_line do |line|
41
+ if line =~ /^\$/
42
+ variables_lines << line
43
+ elsif line =~ /^\@/
44
+ persist_variables_lines << line.gsub('@','')
45
+ else
46
+ node_lines << line
47
+ end
48
+ end
49
+ load_variables(variables_lines, @variables)
50
+ load_variables(external_variables, @variables)
51
+ load_variables(persist_variables_lines, @persist_variables)
52
+ parse_nodes(node_lines)
53
+ end
54
+
55
+ def load_variables(variables_lines, variable_type)
56
+ if !variables_lines.nil?
57
+ variables_lines.each do |line|
58
+ line.chomp!
59
+ line.gsub!(/\s/,'')
60
+ pairs = line.split(';')
61
+ pairs.each do |pair|
62
+ pair =~ /(.+)=(.+)/
63
+ variable_type[$1] = $2
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ def parse_nodes(execution_lines)
70
+ scan_nodes(execution_lines).each do |job_node| #Takes the info of each node of workflow for create the job
71
+ list_dup_nodes_ids(job_node[TAG]).each do |dup_node_id| #if dup_node_id is nil, the node isn't cloned.
72
+ # Set node attributes
73
+ prog_parameters = job_node[COMMAND].split(' ', 2)
74
+ tag_root_name = job_node[TAG].gsub(/\[([^\]]+\])\)/, '')#We remove the clone node expression
75
+ tag_root_name, dup_node_id, folder, no_buffer_node, done = set_node_attributes(tag_root_name, dup_node_id, prog_parameters)
76
+
77
+ #Dependencies
78
+ initialization, init_dependencies = parse_parameters(job_node[INIT].dup, dup_node_id)
79
+ parameters, dependencies = parse_parameters(prog_parameters[PROG_PARAM].dup, dup_node_id)
80
+ all_dependencies = dependencies + init_dependencies
81
+ all_dependencies.uniq!
82
+
83
+ #Create node_job
84
+ command_line = prog_parameters[PROG_NAME]+' '+parameters
85
+ node_name = "#{tag_root_name}#{dup_node_id}"
86
+ node = add_program(node_name, prog_parameters[PROG_NAME], command_line, initialization, folder, all_dependencies, done, no_buffer_node)
87
+ node.cloned_id = dup_node_id
88
+ end
89
+ end
90
+ end
91
+
92
+ def set_node_attributes(tag_root_name, dup_node_id, prog_parameters)
93
+ folder = ''
94
+ no_buffer_node = TRUE
95
+ done = FALSE
96
+
97
+ if tag_root_name =~ /\!/ || (!dup_node_id.nil? && dup_node_id =~ /\!/)
98
+ folder = @exec_folder
99
+ else
100
+ folder = asign_folder(prog_parameters[PROG_NAME])
101
+ end
102
+ if tag_root_name =~ /&/ || (!dup_node_id.nil? && dup_node_id =~ /&/) #TODO comprobar como va esto
103
+ no_buffer_node = FALSE
104
+ folder = @exec_folder#This path is replaced later for the path of the main task that launch all buffered tasks
105
+ end
106
+ done = TRUE if tag_root_name =~ /\%/ && !@do_retry || (!dup_node_id.nil? && dup_node_id =~ /\%/ && !@do_retry)
107
+
108
+ tag_root_name.gsub!(/&|\!|\%|\)/,'')# Delete function characters
109
+ dup_node_id.gsub!(/&|\!|\%|\)/,'') if !dup_node_id.nil?
110
+ return tag_root_name, dup_node_id, folder, no_buffer_node, done
111
+ end
112
+
113
+ def parse_parameters(command, dup_node_id)
114
+ dependencies = []
115
+ if !command.nil?# When command is the initialize, sometimes can be undefined
116
+ command.gsub!(/\(\*\)/, dup_node_id) if !dup_node_id.nil? # Define local parameter for duplicated nodes
117
+ dependencies_direct(command, dependencies)
118
+ if !dup_node_id.nil?
119
+ command.scan(/!([\S]+)\*!/).each do |dependency| # Current node depends on a only node of a batch of duplicated nodes
120
+ dependencies_one2one(command, dependency[0], dependencies, dup_node_id)
121
+ end
122
+ end
123
+
124
+ one2more = command.scan(/!([\S]+)!([^ \n]+)/) # Current node depends on a full batch of duplicated nodes
125
+ one2more.concat(command.scan(/!([\S]+)![ \n]/))
126
+ one2more.each do |dependency|
127
+ dependencies_one2more(command, dependency, dependencies)
128
+ end
129
+
130
+ replace_variables(command)
131
+ end
132
+ #raise 'Missed dependency on: ' + command if command.include?(')')
133
+ return command, dependencies
134
+ end
135
+
136
+ def dependencies_direct(command, dependencies)
137
+ @commands.each do |stage_id, node|
138
+ #Second conditional is triggered when is parsing a batch of tags. The first tag replaces stage_id by path and for second tag, this dependecy is lost
139
+ folder = node.exec_folder_program
140
+ if command.include?(stage_id) || command.include?(folder)
141
+ dependencies << stage_id
142
+ end
143
+ command.gsub!(stage_id+')', folder) #Change tag to folder path
144
+ end
145
+ end
146
+
147
+ def dependencies_one2one(command, dependency, dependencies, dup_node_id)
148
+ @commands.keys.each do |st_id|
149
+ if st_id.include?(dependency) && @commands[st_id].cloned_id == dup_node_id # if a tag id match with expresion, we take it like a dependecy
150
+ dependencies << st_id
151
+ command.gsub!("!#{dependency}*!", @commands[st_id].exec_folder_program)
152
+ break
153
+ end
154
+ end
155
+ end
156
+
157
+ def dependencies_one2more(command, dependency, dependencies)
158
+ batch = []
159
+ @commands.keys.each do |stage_id|
160
+ if stage_id.include?(dependency[0]) # if a tag id match with expresion, we take it like a dependecy
161
+ dependencies << stage_id
162
+ batch << @commands[stage_id].exec_folder_program
163
+ end
164
+ end
165
+ string = batch.map{|tag_id| "#{tag_id}#{dependency[1]}"}.join(' ')
166
+ command.gsub!("!#{dependency[0]}!#{dependency[1]}", string)
167
+ end
168
+
169
+ def list_dup_nodes_ids(tag_node)
170
+ dup_nodes_ids = [nil]
171
+ if tag_node =~ /\[([^\]]+)/
172
+ dup_nodes_ids = []
173
+ $1.split(';').map{|interval|
174
+ if interval.include?('-')
175
+ limits = interval.split('-')
176
+ dup_nodes_ids.concat((limits.first..limits.last).to_a.map{|n| n.to_s})
177
+ else
178
+ dup_nodes_ids << interval
179
+ end
180
+ }
181
+ end
182
+ return dup_nodes_ids
183
+ end
184
+
185
+ def scan_nodes(execution_lines)
186
+ template_executions = execution_lines.join('')
187
+ executions = template_executions.scan(/(^.+\))\s{0,}\{\s{0,}([^\?]{0,})\s{0,}\?([^\}]{1,})\s{0,}\}/)
188
+ return executions
189
+ end
190
+
191
+ def add_program(stage_id, name, parameters, initialization, exec_folder_program, dependencies, done, no_buffer_node)
192
+ tag=Program.new(name, parameters, initialization, exec_folder_program, dependencies, done, no_buffer_node)
193
+ @commands[stage_id]=tag
194
+ return tag
195
+ end
196
+
197
+ def replace_variables(command)
198
+ @variables.each do |name, value|
199
+ command.gsub!(name, value)
200
+ end
201
+ end
202
+
203
+ def asign_folder(program_name)
204
+ folder=File.join(@exec_folder,"#{program_name}_#{"%04d" % 0}")
205
+ count=0
206
+ while @dirs.include?(folder)
207
+ folder=File.join(@exec_folder,"#{program_name}_#{"%04d" % count}")
208
+ count+=1
209
+ end
210
+ @dirs << folder
211
+ return folder
212
+ end
213
+
214
+ ##########################################################################################
215
+ ## LAUNCH WORKFLOW
216
+ ##########################################################################################
217
+
218
+ def load_queue_manager(options)
219
+ return QueueManager.select_queue_manager(@exec_folder, options, @commands, @persist_variables)
220
+ end
221
+
222
+ def send
223
+ @q_mgr.exec
224
+ end
225
+
226
+ ##########################################################################################
227
+ ## WORKFLOW REPRESENTATION
228
+ ##########################################################################################
229
+
230
+ def inspect
231
+ @commands.each do |id, tag|
232
+ puts "#{id} > #{tag.inspect}\t#{tag.done}\n\t\e[32m#{tag.dependencies.join("\n\t")}\e[0m"
233
+ end
234
+ end
235
+
236
+ def draw(name, name_type)
237
+ representation_type = '_structural'
238
+ representation_type = '_semantic' if name_type.include?('t')
239
+ name.gsub!(/\.\S+/,'')
240
+ file = File.open(name+representation_type+'.dot','w')
241
+ file.puts 'digraph G {', 'node[shape=box]'
242
+ all_dependencies = []
243
+ all_tag = []
244
+ @commands.each do |id, tag|
245
+ tag_name = File.basename(tag.exec_folder_program)
246
+ if name_type.include?('t')
247
+ tag_name = id
248
+ end
249
+ all_tag << tag_name
250
+ if tag.dependencies.length > 0
251
+ tag.dependencies.each do |dependencie|
252
+ dependencie_name = File.basename(@commands[dependencie].exec_folder_program)
253
+ if name_type.include?('t')
254
+ dependencie_name = dependencie
255
+ end
256
+ all_dependencies << dependencie_name
257
+ file.puts "\"#{dependencie_name}\"-> \"#{tag_name}\""
258
+ end
259
+ else
260
+ file.puts "\"#{tag_name}\"[color=mediumseagreen, style=filled]"
261
+ end
262
+ end
263
+ all_tag.keep_if{|tag| !all_dependencies.include?(tag)}
264
+ all_tag.each do |tag|
265
+ file.puts "\"#{tag}\"[color=palevioletred, style=filled]"
266
+ end
267
+ file.puts '}'
268
+ file.close
269
+ system('dot -Tpng '+name+representation_type+'.dot -o '+name+representation_type+'.png')
270
+ end
271
+
272
+ end
@@ -0,0 +1,3 @@
1
+ module Autoflow
2
+ VERSION = "0.3.5"
3
+ end
data/lib/autoflow.rb ADDED
@@ -0,0 +1,7 @@
1
+
2
+ require "autoflow/version"
3
+ require "autoflow/stack"
4
+
5
+ module Autoflow
6
+ # Your code goes here...
7
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: autoflow
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.5
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Pedro Seoane
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-07-22 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: net-ssh
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.8.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.8.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: bundler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.3'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.3'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: ! '"Autoflow makes easy to launch big pipelines on a queue system. Only
63
+ works with SLURM & PBS"'
64
+ email:
65
+ - seoanezonjic@hotmail.com
66
+ executables:
67
+ - AutoFlow
68
+ - env_manager
69
+ - flow_time
70
+ extensions: []
71
+ extra_rdoc_files: []
72
+ files:
73
+ - .gitignore
74
+ - Gemfile
75
+ - LICENSE.txt
76
+ - README.md
77
+ - Rakefile
78
+ - autoflow.gemspec
79
+ - bin/AutoFlow
80
+ - bin/env_manager
81
+ - bin/flow_time
82
+ - lib/autoflow.rb
83
+ - lib/autoflow/program.rb
84
+ - lib/autoflow/queue_manager.rb
85
+ - lib/autoflow/queue_managers/bash_manager.rb
86
+ - lib/autoflow/queue_managers/slurm_manager.rb
87
+ - lib/autoflow/stack.rb
88
+ - lib/autoflow/version.rb
89
+ homepage: ''
90
+ licenses:
91
+ - MIT
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ requirements: []
109
+ rubyforge_project:
110
+ rubygems_version: 1.8.23
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: ! '"This gem take a pipeline and launch it on a queue system"'
114
+ test_files: []