autoflow 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in autoflow.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 TODO: Write your name
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Autoflow
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'autoflow'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install autoflow
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/autoflow.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'autoflow/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "autoflow"
8
+ spec.version = Autoflow::VERSION
9
+ spec.authors = ["Pedro Seoane"]
10
+ spec.email = ["seoanezonjic@hotmail.com"]
11
+ spec.description = %q{"Autoflow makes easy to launch big pipelines on a queue system. Only works with SLURM & PBS"}
12
+ spec.summary = %q{"This gem take a pipeline and launch it on a queue system"}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'net-ssh', '>= 2.8.0'
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake"
24
+ end
data/bin/AutoFlow ADDED
@@ -0,0 +1,167 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ROOT_PATH=File.dirname(__FILE__)
4
+ $: << File.expand_path(File.join(ROOT_PATH, "../lib/"))
5
+ $: << File.expand_path(File.join(ROOT_PATH, "../lib/autoflow/"))
6
+ $: << File.expand_path(File.join(ROOT_PATH, "../lib/autoflow/queue_managers"))
7
+
8
+ require 'optparse'
9
+ require 'autoflow'
10
+ require 'io/console'
11
+ require 'net/ssh'
12
+
13
+ #################################################################################################
14
+ # INPUT PARSING
15
+ #################################################################################################
16
+ options = {}
17
+ template_file = ''
18
+ optparse = OptionParser.new do |opts|
19
+ options[:batch] = FALSE
20
+ opts.on( '-b', '--batch', 'Workflow execution using batch' ) do
21
+ options[:batch] = TRUE
22
+ end
23
+
24
+ options[:cpus] = 16
25
+ opts.on( '-c', '--cpus INTEGER', 'Max cpus can be used in all workflow' ) do |cpus|
26
+ options[:cpus] = cpus.to_i
27
+ end
28
+
29
+ options[:external_dependencies] = []
30
+ opts.on( '-d', '--external_dependencies STRING', 'The workflow will start when indicated jobs finish on queue system. Format: \'id1,id2,id3..\'') do |external_dependencies|
31
+ options[:external_dependencies] = external_dependencies.split(',')
32
+ end
33
+
34
+ options[:exp_cpu] = 0
35
+ opts.on( '-e', '--exp_cpu INTEGER', 'Exponent of cpu assigment series' ) do |exp_cpu|
36
+ options[:exp_cpu] = exp_cpu.to_i
37
+ end
38
+
39
+ options[:retry] = FALSE
40
+ opts.on( '-f', '--force', 'Execute all jobs, included the jobs commented with %' ) do
41
+ options[:retry] = TRUE
42
+ end
43
+
44
+ options[:graph] = nil
45
+ opts.on( '-g', '--graph STRING', 'Draw the template. t for use tag like names or f for use folders names instead' ) do |graph|
46
+ options[:graph] = graph
47
+ end
48
+
49
+ options[:memory] = '4gb'
50
+ opts.on( '-m', '--memory STRING', 'Max memory can be used in a task' ) do |mem|
51
+ options[:memory] = mem
52
+ end
53
+
54
+ options[:node_type] = nil
55
+ opts.on( '-n', '--node_type STRING', 'Apply constraint attribute to tasks' ) do |node_type|
56
+ options[:node_type] = node_type
57
+ end
58
+
59
+ options[:output] = 'exec'
60
+ opts.on( '-o', '--output STRING', 'Output folder of flow' ) do |output|
61
+ options[:output] = output
62
+ end
63
+
64
+ options[:remote] = FALSE
65
+ opts.on( '-r', '--remote', 'Connect with remote machine and launch the workflow' ) do
66
+ options[:remote] = TRUE
67
+ puts 'Host to connect and launch workflow:'
68
+ host = gets.chomp
69
+ puts 'User:'
70
+ user = gets.chomp
71
+ puts 'Password (hidden)'
72
+ password = STDIN.noecho(&:gets).chomp
73
+ options[:ssh] = Net::SSH.start(host, user, :password => password, :auth_methods => ['keyboard-interactive'])
74
+ puts options[:ssh].exec!('hostname')
75
+ end
76
+
77
+ options[:time] = '20:00:00'
78
+ opts.on( '-t', '--time STRING', 'Max time that can be needed in a task' ) do |time|
79
+ options[:time] = time
80
+ end
81
+
82
+ options[:use_multinode] = 0
83
+ opts.on( '-u', '--use_multinode INTEGER', 'For use several nodes on execution' ) do |use_multinode|
84
+ options[:use_multinode] = use_multinode.to_i
85
+ end
86
+
87
+ options[:use_ntasks] = FALSE
88
+ opts.on( '-s', '--use_ntasks', 'Use -ntasks flag with sh' ) do
89
+ options[:use_ntasks] = TRUE
90
+ end
91
+
92
+ options[:verbose] = FALSE
93
+ opts.on( '-v', '--verbose', 'Show info without launch jobs' ) do
94
+ options[:verbose] = TRUE
95
+ end
96
+
97
+ options[:Variables] = nil
98
+ opts.on( '-V', '--Variables STRING', 'Variables to be parsed on template. Format: \'$variable_name1=value1;$variable_name2=value2;...\'' ) do |mem|
99
+ options[:Variables] = mem.split(';')
100
+ end
101
+
102
+ options[:workflow] = FALSE
103
+ opts.on( '-w', '--workflow FILE', 'Input workflow file' ) do |workflow|
104
+ options[:workflow] = workflow
105
+ template_file = workflow
106
+ end
107
+
108
+ options[:identifier] = FALSE
109
+ opts.on( '-i', '--job_identifier STRING', 'Identifier tag for each launching script' ) do |identifier|
110
+ options[:identifier] = identifier
111
+ end
112
+
113
+ # Set a banner, displayed at the top of the help screen.
114
+ opts.banner = "Usage: AutoFlow.rb -w worflow_file -c n_cpus \n\n"
115
+
116
+ # This displays the help screen
117
+ opts.on( '-h', '--help', 'Display this screen' ) do
118
+ puts opts
119
+ exit
120
+ end
121
+
122
+ end # End opts
123
+
124
+ # parse options and remove from ARGV
125
+ optparse.parse!
126
+
127
+ if !options[:workflow] || !File.exists?(options[:workflow])
128
+ puts 'Workflow file not especified or not exists'
129
+ Process.exit(-1)
130
+ else
131
+ options[:identifier] = "#{options[:workflow]}_#{Time.new.to_i}" if !options[:identifier]
132
+ options[:workflow] = File.open(options[:workflow]).read
133
+ end
134
+
135
+ #################################################################################################
136
+ # MAIN
137
+ #################################################################################################
138
+ if options[:remote]
139
+ main_path = options[:ssh].exec!('pwd').chomp
140
+ else
141
+ main_path = Dir.pwd
142
+ end
143
+
144
+ if options[:output] == 'exec'
145
+ exec_folder = File.join(main_path,'exec')
146
+ else
147
+ exec_folder = options[:output]
148
+ exec_folder = File.join(main_path, options[:output]) if exec_folder[0] != '/' && exec_folder[0] != '~'
149
+ end
150
+
151
+ #--------------------------------------------------------------------------------
152
+ # Flow parse
153
+ #--------------------------------------------------------------------------------
154
+ stack=Stack.new(exec_folder, options)
155
+ stack.draw(template_file, options[:graph]) if !options[:graph].nil?
156
+
157
+ #--------------------------------------------------------------------------------
158
+ # Flow exec
159
+ #--------------------------------------------------------------------------------
160
+ if options[:verbose]
161
+ stack.inspect
162
+ stack.send
163
+ elsif options[:graph].nil?
164
+ stack.send
165
+ end
166
+ options[:ssh].close if options[:remote]
167
+
data/bin/env_manager ADDED
@@ -0,0 +1,23 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ # Parse enviroment variables file
4
+ new_values = {}
5
+ env_path = '../env_file' # A level up
6
+ if !File.exists?(env_path)
7
+ env_path ='env_path' # Local
8
+ if !File.exists?(env_path)
9
+ raise 'Enviroment file not found'
10
+ end
11
+ end
12
+ pairs = File.open(env_path).read.scan(/export ([^=]+)=([\S]+)/)
13
+ pairs.map{|pair| new_values[pair.first] = pair.last}
14
+
15
+ # Change enviroment variables (Commandline must be 'var=value;var1=value2...varN=valueN;')
16
+ ARGV[0].scan(/([^=]+)=([^;]+);/).map{|new_pair| new_values[new_pair.first]=new_pair.last}
17
+
18
+ # Save modified enviroment variables
19
+ new_enviroment = File.open(env_path,'w')
20
+ new_values.each do |env_var, value|
21
+ new_enviroment.puts "export #{env_var}=#{value}"
22
+ end
23
+ new_enviroment.close
data/bin/flow_time ADDED
@@ -0,0 +1,52 @@
1
+ #! /usr/bin/env ruby
2
+ require 'scbi_plot'
3
+
4
+ times = {}
5
+ task = nil
6
+ status = nil
7
+ beg_time = Time.now
8
+ end_time = Time.new(0)
9
+ File.open(ARGV[0]).read.each_line do |line|
10
+ line.chomp!
11
+ description, date = line.split("\t")
12
+ status, task, program = description.split(' ')
13
+ day, month, n_day, time, mode, year = date.split(' ')
14
+ hours, minutes, seconds = time.split(':')
15
+ task_time = Time.local(year.to_i, month, n_day.to_i, hours.to_i, minutes.to_i, seconds.to_i)
16
+ if task_time > end_time
17
+ end_time = task_time
18
+ end
19
+ if task_time < beg_time
20
+ beg_time = task_time
21
+ end
22
+ if status == 'STARTED'
23
+ times[task] = [task_time]
24
+ else
25
+ times[task] << task_time
26
+ end
27
+ end
28
+
29
+ tag_task = []
30
+ plot_times = []
31
+ total_time = (end_time - beg_time).to_i/3600.0
32
+ tag_task << 'Total_time'
33
+ plot_times << total_time
34
+ puts "Total time: #{total_time} hours"
35
+ times.each do |task, interval|
36
+ if interval.length == 2
37
+ task_time = (interval.last - interval.first).to_i/3600.0
38
+ tag_task << task
39
+ plot_times << task_time
40
+ puts "#{task}\t#{task_time} hours"
41
+ else
42
+ puts "#{task}\tStarted at #{interval.first}. Currently running"
43
+ end
44
+ end
45
+
46
+ # create Histogram
47
+ tag_task.map!{|tag| tag.gsub('e','\\e')}
48
+ tag_task.map!{|tag| tag.gsub('E','\\E')}
49
+ p=ScbiPlot::Histogram.new('time_graph.png','Flow stats (hours)')
50
+ p.add_x(tag_task)
51
+ p.add_y(plot_times)
52
+ p.do_graph
@@ -0,0 +1,22 @@
1
+ class Program
2
+ attr_accessor :name, :exec_folder_program, :parameters, :initialization, :queue_id, :done, :dependencies, :monocpu, :no_buffer_node,:cloned_id
3
+ def initialize(name, parameters, initialization, exec_folder_program, dependencies, done, no_buffer_node)
4
+ @name=name
5
+ @parameters=parameters
6
+ @initialization=initialization
7
+ @exec_folder_program=exec_folder_program
8
+ @queue_id=nil
9
+ @done=done
10
+ @dependencies=dependencies
11
+ @no_buffer_node=no_buffer_node
12
+ @monocpu=TRUE
13
+ @cloned_id = nil
14
+ if @parameters =~ /\[cpu|lcpu\]/ #||@parameters =~ /\[lcpu\]/
15
+ @monocpu=FALSE
16
+ end
17
+ end
18
+
19
+ def inspect
20
+ string="\e[31m#{@name}\e[0m\n\t\e[33m#{@parameters.gsub("\n","\n\t")}\e[0m\t\e[34m#{@exec_folder_program}\e[0m"
21
+ end
22
+ end
@@ -0,0 +1,299 @@
1
+ class QueueManager
2
+
3
+ ########################################################################################
4
+ ## SELECT AND PREPARE MANAGER
5
+ ########################################################################################
6
+ def initialize(exec_folder, options, commands, persist_variables)
7
+ @exec_folder = exec_folder
8
+ @commands = commands
9
+ @persist_variables = persist_variables
10
+ @verbose = options[:verbose]
11
+ @cpus = options[:cpus]
12
+ @exp_cpu = options[:exp_cpu]
13
+ @count_cpu = 0
14
+ @time = options[:time]
15
+ @memory = options[:memory]
16
+ @node_type = options[:node_type]
17
+ @use_multinode = options[:use_multinode]
18
+ @use_ntasks = options[:use_ntasks]
19
+ @job_identifier = options[:identifier]
20
+ @files = {}
21
+ @remote = options[:remote]
22
+ @ssh = options[:ssh]
23
+ @external_dependencies = options[:external_dependencies]
24
+ end
25
+
26
+ def self.descendants
27
+ ObjectSpace.each_object(Class).select { |klass| klass < self }
28
+ end
29
+
30
+ def self.select_queue_manager(exec_folder, options, commands, persist_variables)
31
+ path_managers = File.join(File.dirname(__FILE__),'queue_managers')
32
+ Dir.glob(path_managers+'/*').each do |manager|
33
+ require manager
34
+ end
35
+ if options[:batch]
36
+ queue_manager = BashManager
37
+ else
38
+ queue_manager = select_manager(options)
39
+ end
40
+ return queue_manager.new(exec_folder, options, commands, persist_variables)
41
+ end
42
+
43
+ def self.select_manager(options)
44
+ queue_manager = nil
45
+ priority = 0
46
+ descendants.each do |descendant|
47
+ if descendant.available?(options) && priority <= descendant.priority
48
+ queue_manager = descendant
49
+ priority = descendant.priority
50
+ end
51
+ end
52
+ return queue_manager
53
+ end
54
+
55
+ ########################################################################################
56
+ ## EXECUTING WORKFLOW WITH MANAGER
57
+ ########################################################################################
58
+
59
+ def exec
60
+ create_folder(@exec_folder)
61
+ make_environment_file if !@persist_variables.empty?
62
+ create_file('index_execution', @exec_folder)
63
+ launch_all_tasks
64
+ close_file('index_execution')
65
+ end
66
+
67
+ def launch_all_tasks
68
+ buffered_nodes = []
69
+ sort_commands_by_dependencies.each do |id, node|
70
+ write_file('index_execution', "#{id}\t#{node.exec_folder_program}")
71
+ if node.done
72
+ next
73
+ else
74
+ rm_done_dependencies(node)
75
+ end
76
+ buffered_nodes = launch_task_in_folder(node, id, buffered_nodes)
77
+ end
78
+ end
79
+
80
+ def launch_task_in_folder(node, id, buffered_nodes)
81
+ create_folder(node.exec_folder_program)
82
+ if node.no_buffer_node # Launch with queue_system nodes and all buffered
83
+ launch2queue_system(id, node, buffered_nodes)
84
+ buffered_nodes = []#Clean buffer
85
+ else # Buffer node
86
+ buffered_nodes << [id, node]
87
+ end
88
+ return buffered_nodes
89
+ end
90
+
91
+
92
+ def launch2queue_system(id, node, buffered_nodes)
93
+ # Write sh file
94
+ #--------------------------------
95
+ log_folder = File.join(@exec_folder, 'log')
96
+ sh_name = node.name+'.sh'
97
+ create_file(sh_name, node.exec_folder_program)
98
+ write_file(sh_name, '#!/usr/bin/env bash')
99
+ write_file(sh_name, '##JOB_GROUP_ID='+@job_identifier)
100
+ write_header(id, node, sh_name)
101
+
102
+ #Get dependencies
103
+ #------------------------------------
104
+ ar_dependencies = get_dependencies(node, id)
105
+ buffered_nodes.each do |id_buff_node,buff_node|
106
+ write_node(buff_node, sh_name)
107
+ ar_dependencies += get_dependencies(buff_node, id_buff_node)
108
+ buff_node.exec_folder_program = node.exec_folder_program
109
+ end
110
+ ar_dependencies.uniq!
111
+
112
+ #Write sh body
113
+ #--------------------------------
114
+ write_file(sh_name, 'hostname')
115
+ write_file(sh_name, "echo -e \"STARTED #{id.gsub(')','')} #{node.name}:\\t`date`\" >> #{log_folder}")
116
+ write_file(sh_name, "source #{File.join(@exec_folder, 'env_file')}") if !@persist_variables.empty?
117
+ write_node(node, sh_name)
118
+ write_file(sh_name, "echo -e \"FINISHED #{id.gsub(')','')} #{node.name}:\\t`date`\" >> #{log_folder}")
119
+ close_file(sh_name, 0755)
120
+
121
+ #Submit node
122
+ #-----------------------------------
123
+ if !@verbose
124
+ queue_id = submit_node(node, ar_dependencies)
125
+ node.queue_id = queue_id # Returns id of running tag on queue system
126
+ asign_queue_id(buffered_nodes, queue_id)
127
+ end
128
+ end
129
+
130
+ def make_environment_file
131
+ create_file('env_file', @exec_folder)
132
+ @persist_variables.each do |var, value|
133
+ write_file('env_file', "export #{var}=#{value}")
134
+ end
135
+ close_file('env_file')
136
+ end
137
+
138
+ def create_folder(folder_name)
139
+ if @remote
140
+ @ssh.exec!("if ! [ -d #{folder_name} ]; then mkdir -p #{folder_name}; fi")
141
+ else
142
+ Dir.mkdir(folder_name) if !File.exists?(folder_name)
143
+ end
144
+ end
145
+
146
+ def create_file(file_name, path)
147
+ @files[file_name] = [path, '']
148
+ end
149
+
150
+ def write_file(file_name, content)
151
+ @files[file_name].last << content+"\n"
152
+ end
153
+
154
+ def close_file(file_name, permissions = nil) #SSH
155
+ path, content = @files.delete(file_name)
156
+ file_path = File.join(path, file_name)
157
+ if @remote
158
+ @ssh.exec!("echo '#{content}' > #{file_path}")
159
+ @ssh.exec!("chmod #{permissions} #{file_path}") if !permissions.nil?
160
+ else
161
+ local_file = File.open(file_path,'w')
162
+ local_file.chmod(permissions) if !permissions.nil?
163
+ local_file.print content
164
+ local_file.close
165
+ end
166
+ end
167
+
168
+ def system_call(cmd, path = nil)
169
+ cmd = "cd #{path}; " + cmd if !path.nil?
170
+ if @remote
171
+ call = @ssh.exec!(cmd)
172
+ else
173
+ call = %x[#{cmd}]
174
+ end
175
+ return call
176
+ end
177
+
178
+ def self.system_call(cmd, path = nil, remote = FALSE, ssh = nil)
179
+ cmd = "cd #{path}; " + cmd if !path.nil?
180
+ if remote
181
+ call = ssh.exec!(cmd)
182
+ else
183
+ call = %x[#{cmd}]
184
+ end
185
+ return call
186
+ end
187
+
188
+ def rm_done_dependencies(node)
189
+ remove=[]
190
+ node.dependencies.each do |dependency|
191
+ if @commands[dependency].done
192
+ remove << dependency
193
+ end
194
+ end
195
+ remove.each do |rm|
196
+ node.dependencies.delete(rm)
197
+ end
198
+ end
199
+
200
+ def sort_commands_by_dependencies
201
+ ar_commands = @commands.to_a
202
+ sorted_commands = []
203
+ task_without_dep = ar_commands.select{|node| node.last.dependencies.empty?}
204
+ sorted_commands.concat(task_without_dep)
205
+ while ar_commands.length != sorted_commands.length
206
+ ids = sorted_commands.map{|command| command.first}
207
+ ar_commands.each do |com|
208
+ if !sorted_commands.include?(com)
209
+ deps = com.last.dependencies - ids
210
+ sorted_commands << com if deps.empty?
211
+ end
212
+ end
213
+ end
214
+ return sorted_commands
215
+ end
216
+
217
+ def write_node(tag, sh_name)
218
+ write_file(sh_name, tag.initialization) if !tag.initialization.nil?
219
+ cmd = 'time '
220
+ if !tag.monocpu
221
+ if tag.parameters.include?('[lcpu]')
222
+ string = '[lcpu]'
223
+ used_cpu = 'workers'
224
+ elsif tag.parameters.include?('[cpu]')
225
+ string = '[cpu]'
226
+ used_cpu = @cpus.to_s
227
+ end
228
+ tag.parameters.gsub!(string, used_cpu) #Use asigned cpus
229
+ end
230
+ cmd << tag.parameters
231
+ write_file(sh_name, cmd)
232
+ end
233
+
234
+ def asign_cpu(node)
235
+ used_cpu = 1
236
+ if !node.monocpu
237
+ if @exp_cpu == 0
238
+ used_cpu = @cpus
239
+ else #asign exponential cpus
240
+ if @exp_cpu**(@count_cpu) < @cpus
241
+ @count_cpu +=1
242
+ end
243
+ used_cpu = @exp_cpu**@count_cpu
244
+ end
245
+ end
246
+ return used_cpu
247
+ end
248
+
249
+ def get_dependencies(node, id = nil)
250
+ ar_dependencies = []
251
+ ar_dependencies += node.dependencies
252
+ ar_dependencies.delete(id) if !id.nil? #Delete autodependency
253
+ return ar_dependencies
254
+ end
255
+
256
+ def asign_queue_id(ar_tags,id)
257
+ ar_tags.each do |id_ar_tag, ar_tag|
258
+ ar_tag.queue_id=id
259
+ end
260
+ end
261
+
262
+ def get_queue_system_dependencies(ar_dependencies)
263
+ queue_system_ids=[]
264
+ ar_dependencies.each do |dependency|
265
+ queue_system_ids << @commands[dependency].queue_id
266
+ end
267
+ return queue_system_ids
268
+ end
269
+
270
+ def get_all_deps(ar_dependencies)
271
+ final_dep = []
272
+ final_dep.concat(get_queue_system_dependencies(ar_dependencies)) if !ar_dependencies.empty?
273
+ final_dep.concat(@external_dependencies)
274
+ return final_dep
275
+ end
276
+
277
+ ########################################################################################
278
+ ## QUEUE DEPENDANT METHODS
279
+ ########################################################################################
280
+ def write_header(id, node, sh)
281
+
282
+ end
283
+
284
+ def submit_node(node, ar_dependencies)
285
+
286
+ end
287
+
288
+ def get_queue_system_id(shell_output)
289
+
290
+ end
291
+
292
+ def self.available?
293
+ return FALSE
294
+ end
295
+
296
+ def self.priority
297
+ return -1
298
+ end
299
+ end
@@ -0,0 +1,50 @@
1
+ require 'queue_manager'
2
+ class BashManager < QueueManager
3
+
4
+ def initialize(exec_folder, options, commands, persist_variables)
5
+ super
6
+ @queued = []
7
+ @last_deps = []
8
+ @path2execution_script = File.join(@exec_folder, 'execution.sh')
9
+ create_file('execution.sh', @exec_folder)
10
+ write_file('execution.sh', '#! /usr/bin/env bash')
11
+ end
12
+
13
+ def launch_all_tasks
14
+ super
15
+ close_file('execution.sh', 0755)
16
+ system_call("#{@path2execution_script} > #{File.join(File.dirname(@path2execution_script),'output')} & ", @exec_folder)
17
+ end
18
+
19
+ def write_header(id, node, sh)
20
+ @queued << id # For dependencies purposes
21
+ end
22
+
23
+ def submit_node(node, ar_dependencies)
24
+ write_file('execution.sh','')
25
+ if !ar_dependencies.empty?
26
+ deps = ar_dependencies - @last_deps
27
+ if !deps.empty?
28
+ write_file('execution.sh', 'wait')
29
+ @last_deps.concat(@queued)
30
+ end
31
+ end
32
+ @last_deps.concat(ar_dependencies)
33
+ @last_deps.uniq!
34
+ write_file('execution.sh', "cd #{node.exec_folder_program}")
35
+ write_file('execution.sh', "./#{node.name}.sh &")
36
+ return nil
37
+ end
38
+
39
+ def get_queue_system_id(shell_output)
40
+ return nil
41
+ end
42
+
43
+ def self.available?(options)
44
+ return TRUE
45
+ end
46
+
47
+ def self.priority
48
+ return 0
49
+ end
50
+ end
@@ -0,0 +1,47 @@
1
+ require 'queue_manager'
2
+ class SlurmManager < QueueManager
3
+ def write_header(id, node, sh_name)
4
+ used_cpu = asign_cpu(node)
5
+ if !@use_ntasks
6
+ write_file(sh_name, "#SBATCH --cpus=#{used_cpu}")
7
+ else
8
+ write_file(sh_name, "#SBATCH --ntasks=#{used_cpu}")
9
+ write_file(sh_name, "#SBATCH --nodes=#{@use_multinode}") if @use_multinode > 0
10
+ write_file(sh_name, 'srun hostname -s > workers') if node.parameters.include?('[lcpu]')
11
+ end
12
+ constraint = '#SBATCH --constraint='+@node_type if !@node_type.nil?
13
+ write_file(sh_name, "#SBATCH --mem=#{@memory}")
14
+ write_file(sh_name, "#SBATCH --time=#{@time}")
15
+ write_file(sh_name, "#{constraint}")
16
+ write_file(sh_name, '#SBATCH --error=job.%J.err')
17
+ write_file(sh_name, '#SBATCH --output=job.%J.out')
18
+ end
19
+
20
+ def submit_node(node, ar_dependencies)
21
+ final_dep = get_all_deps(ar_dependencies)
22
+ dependencies = nil
23
+ dependencies='--dependency=afterok:'+final_dep.join(':') if !final_dep.empty?
24
+ cmd = "sbatch #{dependencies} #{node.name}.sh"
25
+ queue_id = get_queue_system_id(system_call(cmd, node.exec_folder_program))
26
+ return queue_id
27
+ end
28
+
29
+ def get_queue_system_id(shell_output)
30
+ queue_id = nil
31
+ shell_output.chomp!
32
+ fields = shell_output.split(' ')
33
+ queue_id = fields[3]
34
+ return queue_id
35
+ end
36
+
37
+ def self.available?(options)
38
+ available = TRUE
39
+ shell_output = system_call("type 'sbatch'", nil, options[:remote], options[:ssh])
40
+ available = FALSE if shell_output.empty?
41
+ return available
42
+ end
43
+
44
+ def self.priority
45
+ return 100
46
+ end
47
+ end
@@ -0,0 +1,272 @@
1
+ require 'program'
2
+ require 'queue_manager'
3
+
4
+
5
+ class Stack
6
+
7
+ TAG = 0
8
+ INIT = 1
9
+ COMMAND = 2
10
+ PROG_NAME = 0
11
+ PROG_PARAM = 1
12
+
13
+ ##########################################################################################
14
+ ## PARSE TEMPLATE
15
+ ##########################################################################################
16
+ def initialize(exec_folder, options)
17
+ @commands = {}
18
+ @variables = {}
19
+ @one2one_dependencies = {}
20
+ @persist_variables = {}
21
+ @dirs = []
22
+ @exec_folder = exec_folder
23
+ @file_workflow = options[:workflow]
24
+ @do_retry = options[:retry]
25
+ parse(options[:workflow], options[:Variables])
26
+ @q_mgr = load_queue_manager(options)
27
+ end
28
+
29
+ def parse(workflow, external_variables)
30
+ #Clean template
31
+ workflow.gsub!(/\#.+$/,'') #Delete comments
32
+ workflow.gsub!("\t",'') #Drop tabs
33
+ workflow.gsub!(/\n+/,"\n") #Drop empty lines
34
+
35
+ #Parse template
36
+ variables_lines = []
37
+ persist_variables_lines = []
38
+ node_lines = []
39
+
40
+ workflow.each_line do |line|
41
+ if line =~ /^\$/
42
+ variables_lines << line
43
+ elsif line =~ /^\@/
44
+ persist_variables_lines << line.gsub('@','')
45
+ else
46
+ node_lines << line
47
+ end
48
+ end
49
+ load_variables(variables_lines, @variables)
50
+ load_variables(external_variables, @variables)
51
+ load_variables(persist_variables_lines, @persist_variables)
52
+ parse_nodes(node_lines)
53
+ end
54
+
55
+ def load_variables(variables_lines, variable_type)
56
+ if !variables_lines.nil?
57
+ variables_lines.each do |line|
58
+ line.chomp!
59
+ line.gsub!(/\s/,'')
60
+ pairs = line.split(';')
61
+ pairs.each do |pair|
62
+ pair =~ /(.+)=(.+)/
63
+ variable_type[$1] = $2
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ def parse_nodes(execution_lines)
70
+ scan_nodes(execution_lines).each do |job_node| #Takes the info of each node of workflow for create the job
71
+ list_dup_nodes_ids(job_node[TAG]).each do |dup_node_id| #if dup_node_id is nil, the node isn't cloned.
72
+ # Set node attributes
73
+ prog_parameters = job_node[COMMAND].split(' ', 2)
74
+ tag_root_name = job_node[TAG].gsub(/\[([^\]]+\])\)/, '')#We remove the clone node expression
75
+ tag_root_name, dup_node_id, folder, no_buffer_node, done = set_node_attributes(tag_root_name, dup_node_id, prog_parameters)
76
+
77
+ #Dependencies
78
+ initialization, init_dependencies = parse_parameters(job_node[INIT].dup, dup_node_id)
79
+ parameters, dependencies = parse_parameters(prog_parameters[PROG_PARAM].dup, dup_node_id)
80
+ all_dependencies = dependencies + init_dependencies
81
+ all_dependencies.uniq!
82
+
83
+ #Create node_job
84
+ command_line = prog_parameters[PROG_NAME]+' '+parameters
85
+ node_name = "#{tag_root_name}#{dup_node_id}"
86
+ node = add_program(node_name, prog_parameters[PROG_NAME], command_line, initialization, folder, all_dependencies, done, no_buffer_node)
87
+ node.cloned_id = dup_node_id
88
+ end
89
+ end
90
+ end
91
+
92
+ def set_node_attributes(tag_root_name, dup_node_id, prog_parameters)
93
+ folder = ''
94
+ no_buffer_node = TRUE
95
+ done = FALSE
96
+
97
+ if tag_root_name =~ /\!/ || (!dup_node_id.nil? && dup_node_id =~ /\!/)
98
+ folder = @exec_folder
99
+ else
100
+ folder = asign_folder(prog_parameters[PROG_NAME])
101
+ end
102
+ if tag_root_name =~ /&/ || (!dup_node_id.nil? && dup_node_id =~ /&/) #TODO comprobar como va esto
103
+ no_buffer_node = FALSE
104
+ folder = @exec_folder#This path is replaced later for the path of the main task that launch all buffered tasks
105
+ end
106
+ done = TRUE if tag_root_name =~ /\%/ && !@do_retry || (!dup_node_id.nil? && dup_node_id =~ /\%/ && !@do_retry)
107
+
108
+ tag_root_name.gsub!(/&|\!|\%|\)/,'')# Delete function characters
109
+ dup_node_id.gsub!(/&|\!|\%|\)/,'') if !dup_node_id.nil?
110
+ return tag_root_name, dup_node_id, folder, no_buffer_node, done
111
+ end
112
+
113
+ def parse_parameters(command, dup_node_id)
114
+ dependencies = []
115
+ if !command.nil?# When command is the initialize, sometimes can be undefined
116
+ command.gsub!(/\(\*\)/, dup_node_id) if !dup_node_id.nil? # Define local parameter for duplicated nodes
117
+ dependencies_direct(command, dependencies)
118
+ if !dup_node_id.nil?
119
+ command.scan(/!([\S]+)\*!/).each do |dependency| # Current node depends on a only node of a batch of duplicated nodes
120
+ dependencies_one2one(command, dependency[0], dependencies, dup_node_id)
121
+ end
122
+ end
123
+
124
+ one2more = command.scan(/!([\S]+)!([^ \n]+)/) # Current node depends on a full batch of duplicated nodes
125
+ one2more.concat(command.scan(/!([\S]+)![ \n]/))
126
+ one2more.each do |dependency|
127
+ dependencies_one2more(command, dependency, dependencies)
128
+ end
129
+
130
+ replace_variables(command)
131
+ end
132
+ #raise 'Missed dependency on: ' + command if command.include?(')')
133
+ return command, dependencies
134
+ end
135
+
136
+ def dependencies_direct(command, dependencies)
137
+ @commands.each do |stage_id, node|
138
+ #Second conditional is triggered when is parsing a batch of tags. The first tag replaces stage_id by path and for second tag, this dependecy is lost
139
+ folder = node.exec_folder_program
140
+ if command.include?(stage_id) || command.include?(folder)
141
+ dependencies << stage_id
142
+ end
143
+ command.gsub!(stage_id+')', folder) #Change tag to folder path
144
+ end
145
+ end
146
+
147
+ def dependencies_one2one(command, dependency, dependencies, dup_node_id)
148
+ @commands.keys.each do |st_id|
149
+ if st_id.include?(dependency) && @commands[st_id].cloned_id == dup_node_id # if a tag id match with expresion, we take it like a dependecy
150
+ dependencies << st_id
151
+ command.gsub!("!#{dependency}*!", @commands[st_id].exec_folder_program)
152
+ break
153
+ end
154
+ end
155
+ end
156
+
157
+ def dependencies_one2more(command, dependency, dependencies)
158
+ batch = []
159
+ @commands.keys.each do |stage_id|
160
+ if stage_id.include?(dependency[0]) # if a tag id match with expresion, we take it like a dependecy
161
+ dependencies << stage_id
162
+ batch << @commands[stage_id].exec_folder_program
163
+ end
164
+ end
165
+ string = batch.map{|tag_id| "#{tag_id}#{dependency[1]}"}.join(' ')
166
+ command.gsub!("!#{dependency[0]}!#{dependency[1]}", string)
167
+ end
168
+
169
+ def list_dup_nodes_ids(tag_node)
170
+ dup_nodes_ids = [nil]
171
+ if tag_node =~ /\[([^\]]+)/
172
+ dup_nodes_ids = []
173
+ $1.split(';').map{|interval|
174
+ if interval.include?('-')
175
+ limits = interval.split('-')
176
+ dup_nodes_ids.concat((limits.first..limits.last).to_a.map{|n| n.to_s})
177
+ else
178
+ dup_nodes_ids << interval
179
+ end
180
+ }
181
+ end
182
+ return dup_nodes_ids
183
+ end
184
+
185
+ def scan_nodes(execution_lines)
186
+ template_executions = execution_lines.join('')
187
+ executions = template_executions.scan(/(^.+\))\s{0,}\{\s{0,}([^\?]{0,})\s{0,}\?([^\}]{1,})\s{0,}\}/)
188
+ return executions
189
+ end
190
+
191
+ def add_program(stage_id, name, parameters, initialization, exec_folder_program, dependencies, done, no_buffer_node)
192
+ tag=Program.new(name, parameters, initialization, exec_folder_program, dependencies, done, no_buffer_node)
193
+ @commands[stage_id]=tag
194
+ return tag
195
+ end
196
+
197
+ def replace_variables(command)
198
+ @variables.each do |name, value|
199
+ command.gsub!(name, value)
200
+ end
201
+ end
202
+
203
+ def asign_folder(program_name)
204
+ folder=File.join(@exec_folder,"#{program_name}_#{"%04d" % 0}")
205
+ count=0
206
+ while @dirs.include?(folder)
207
+ folder=File.join(@exec_folder,"#{program_name}_#{"%04d" % count}")
208
+ count+=1
209
+ end
210
+ @dirs << folder
211
+ return folder
212
+ end
213
+
214
+ ##########################################################################################
215
+ ## LAUNCH WORKFLOW
216
+ ##########################################################################################
217
+
218
+ def load_queue_manager(options)
219
+ return QueueManager.select_queue_manager(@exec_folder, options, @commands, @persist_variables)
220
+ end
221
+
222
+ def send
223
+ @q_mgr.exec
224
+ end
225
+
226
+ ##########################################################################################
227
+ ## WORKFLOW REPRESENTATION
228
+ ##########################################################################################
229
+
230
+ def inspect
231
+ @commands.each do |id, tag|
232
+ puts "#{id} > #{tag.inspect}\t#{tag.done}\n\t\e[32m#{tag.dependencies.join("\n\t")}\e[0m"
233
+ end
234
+ end
235
+
236
+ def draw(name, name_type)
237
+ representation_type = '_structural'
238
+ representation_type = '_semantic' if name_type.include?('t')
239
+ name.gsub!(/\.\S+/,'')
240
+ file = File.open(name+representation_type+'.dot','w')
241
+ file.puts 'digraph G {', 'node[shape=box]'
242
+ all_dependencies = []
243
+ all_tag = []
244
+ @commands.each do |id, tag|
245
+ tag_name = File.basename(tag.exec_folder_program)
246
+ if name_type.include?('t')
247
+ tag_name = id
248
+ end
249
+ all_tag << tag_name
250
+ if tag.dependencies.length > 0
251
+ tag.dependencies.each do |dependencie|
252
+ dependencie_name = File.basename(@commands[dependencie].exec_folder_program)
253
+ if name_type.include?('t')
254
+ dependencie_name = dependencie
255
+ end
256
+ all_dependencies << dependencie_name
257
+ file.puts "\"#{dependencie_name}\"-> \"#{tag_name}\""
258
+ end
259
+ else
260
+ file.puts "\"#{tag_name}\"[color=mediumseagreen, style=filled]"
261
+ end
262
+ end
263
+ all_tag.keep_if{|tag| !all_dependencies.include?(tag)}
264
+ all_tag.each do |tag|
265
+ file.puts "\"#{tag}\"[color=palevioletred, style=filled]"
266
+ end
267
+ file.puts '}'
268
+ file.close
269
+ system('dot -Tpng '+name+representation_type+'.dot -o '+name+representation_type+'.png')
270
+ end
271
+
272
+ end
@@ -0,0 +1,3 @@
1
+ module Autoflow
2
+ VERSION = "0.3.5"
3
+ end
data/lib/autoflow.rb ADDED
@@ -0,0 +1,7 @@
1
+
2
+ require "autoflow/version"
3
+ require "autoflow/stack"
4
+
5
+ module Autoflow
6
+ # Your code goes here...
7
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: autoflow
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.5
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Pedro Seoane
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-07-22 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: net-ssh
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.8.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.8.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: bundler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.3'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.3'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: ! '"Autoflow makes easy to launch big pipelines on a queue system. Only
63
+ works with SLURM & PBS"'
64
+ email:
65
+ - seoanezonjic@hotmail.com
66
+ executables:
67
+ - AutoFlow
68
+ - env_manager
69
+ - flow_time
70
+ extensions: []
71
+ extra_rdoc_files: []
72
+ files:
73
+ - .gitignore
74
+ - Gemfile
75
+ - LICENSE.txt
76
+ - README.md
77
+ - Rakefile
78
+ - autoflow.gemspec
79
+ - bin/AutoFlow
80
+ - bin/env_manager
81
+ - bin/flow_time
82
+ - lib/autoflow.rb
83
+ - lib/autoflow/program.rb
84
+ - lib/autoflow/queue_manager.rb
85
+ - lib/autoflow/queue_managers/bash_manager.rb
86
+ - lib/autoflow/queue_managers/slurm_manager.rb
87
+ - lib/autoflow/stack.rb
88
+ - lib/autoflow/version.rb
89
+ homepage: ''
90
+ licenses:
91
+ - MIT
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ requirements: []
109
+ rubyforge_project:
110
+ rubygems_version: 1.8.23
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: ! '"This gem take a pipeline and launch it on a queue system"'
114
+ test_files: []