bio-pipengine 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.6.0
data/bin/pipengine ADDED
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:<< File.expand_path(File.join(File.dirname(File.dirname __FILE__),"lib"))
4
+ require 'bio-pipengine'
5
+
6
+ banner_text = "\nLauncher for Complex Biological Pipelines . Copyright(C) 2013 Francesco Strozzi\n\n"
7
+ version_text = File.read File.expand_path(File.join(File.dirname(File.dirname __FILE__),"VERSION"))
8
+ SUB_COMMANDS = %w(run jobs)
9
+
10
+
11
+ Bio::Pipengine.check_config
12
+
13
+ options = {}
14
+ cmd = ARGV.first # get the subcommand
15
+ opts = case cmd
16
+ when "run"
17
+ options[:run] = true
18
+ ARGV.shift
19
+ Trollop::options do
20
+ opt :pipeline, "YAML file with pipeline and sample details", :short => "p", :type => :string, :default => "pipeline.yml"
21
+ opt :samples_file, "YAML file with samples name and directory paths", :short => "f", :type => :string, :default => "samples.yml"
22
+ opt :samples, "List of sample names to run the pipeline", :type => :strings, :short => "l"
23
+ opt :steps, "List of steps to be executed", :type => :strings, :short => "s"
24
+ opt :dry,"Dry run. Just create the job script without submitting it to the batch system", :short => "d"
25
+ opt :spooler,"Destination spooler PBS, plain shell script", :short =>"x", :type => :string, :default => "pbs"
26
+ opt :tmp, "Temporary output folder", :type => :string, :short => "t"
27
+ opt :create_samples, "Create samples.yml file from a Sample directory (only for CASAVA projects)", :short => "c", :type => :strings
28
+ opt :multi, "List of samples to be processed by a given step (the order matters)", :short => "m", :type => :strings
29
+ opt :group, "Specify the group of samples to run the pipeline steps on (do not specify --multi)", :short => "g", :type => :string
30
+ opt :name, "Analysis name", :short => "n", :type => :string
31
+ opt :output_dir, "Output directory (override standard output directory names)", :short => "o", :type => :string
32
+ opt :pbs_opts, "PBS options", :type => :strings, :short => "b"
33
+ opt :pbs_queue, "PBS queue", :type => :string, :short => "q"
34
+ opt :inspect_pipeline, "Show steps", :short => "i", :type => :string
35
+ opt :mail_exit, "Send an Email when the job terminates", :type => :string
36
+ opt :mail_start, "Send an Email when the job starts", :type => :string
37
+ opt :log, "Log script activities, by default stdin. Options are fluentd", :type => :string, :default => "stdin"
38
+ opt :log_adapter, "(stdin|syslog|fluentd) In case of fluentd use http://destination.hostname:port/yourtag", :type => :string
39
+ end
40
+ when "jobs"
41
+ ARGV.shift
42
+ options[:jobs] = true
43
+ Trollop::options do
44
+ opt :job_id, "Search submitted jobs by Job ID", :type => :strings, :short => "i"
45
+ opt :job_name, "Search submitted jobs by Job Name", :type => :strings, :short => "n"
46
+ opt :delete, "Delete submitted jobs ('all' to erase everything or type one or more job IDs)", :short => "d", :type => :strings
47
+ end
48
+ when "-h"
49
+ puts banner_text
50
+ puts "List of available commands:\n\trun\tSubmit pipelines to the job scheduler\n\tjobs\tShow statistics and interact with running jobs\n"
51
+ exit
52
+ else
53
+ global_opts = Trollop::options do
54
+ banner banner_text
55
+ version "PipEngine v#{version_text}"
56
+ end
57
+ end
58
+
59
+
60
+
61
+ options = options.merge opts
62
+ Trollop::die :multi, "Specifing both --group and --multi is not allowed" if options[:multi] and options[:group]
63
+
64
+ if options[:create_samples]
65
+ Bio::Pipengine.create_samples options[:create_samples]
66
+ elsif options[:jobs]
67
+ if options[:job_id]
68
+ Bio::Pipengine.show_stats(options[:job_id])
69
+ elsif options[:job_name]
70
+ warn "Not yet implemented"
71
+ exit
72
+ elsif options[:delete]
73
+ if options[:delete].empty?
74
+ warn "Provide one or more Job IDs or write 'all' to delete all your running jobs".red
75
+ exit
76
+ end
77
+ puts "Warning: this will delete the following running jobs: ".light_blue + "#{options[:delete].join(",")}".green
78
+ print "Are you sure? (y|n):"
79
+ answer = gets.chomp
80
+ if answer == "y"
81
+ Bio::Pipengine.delete_jobs(options[:delete])
82
+ else
83
+ puts "Aborting..."
84
+ exit
85
+ end
86
+ else
87
+ Bio::Pipengine.show_stats(["all"])
88
+ end
89
+ elsif options[:pipeline] && options[:samples_file]
90
+ if options[:inspect_pipeline]
91
+ Bio::Pipengine.inspect_steps(options[:inspect_pipeline])
92
+ exit
93
+ else
94
+ abort("File not found: #{options[:pipeline]}".red) unless File.exists? options[:pipeline]
95
+ abort("File not found: #{options[:samples_file]}".red) unless File.exists? options[:samples_file]
96
+ abort("Please provide a valid step name with the --step parameter".red) unless options[:steps]
97
+ Bio::Pipengine.run(options)
98
+ end
99
+ end
100
+
101
+
@@ -0,0 +1,271 @@
1
+ module Bio
2
+ module Pipengine
3
+
4
+ class Job
5
+
6
+ # a Job object holds information on a job to be submitted
7
+ # samples_groups and samples_obj are used to store information in case of steps that require to combine info
8
+ # from multiple samples
9
+ attr_accessor :name, :cpus, :nodes, :mem, :resources, :command_line, :local,
10
+ :multi_samples, :samples_obj, :custom_output, :custom_name,
11
+ :log, :log_adapter
12
+ def initialize(name)
13
+ @name = generate_uuid + "-" + name
14
+ @shortname = name
15
+ @command_line = []
16
+ @resources = {}
17
+ @cpus = 1
18
+ @nodes = "1"
19
+ @log = "stdin"
20
+ @log_adapter = nil
21
+ end
22
+
23
+ def add_resources(resources)
24
+ self.resources.merge! resources
25
+ end
26
+
27
+ def output
28
+ self.resources["output"]
29
+ end
30
+
31
+ # add all the command lines for a given step
32
+ def add_step(step,sample)
33
+
34
+ # setting job working directory
35
+ working_dir = ""
36
+ if self.local
37
+ working_dir = self.local+"/"+self.name
38
+ else
39
+ working_dir = self.output
40
+
41
+ if step.is_multi?
42
+ folder = (self.custom_output) ? self.custom_output : @shortname
43
+ working_dir += "/#{folder}"
44
+ else
45
+ folder =
46
+ if self.custom_output
47
+ self.custom_output
48
+ elsif self.custom_name
49
+ self.custom_name
50
+ else
51
+ step.name
52
+ end
53
+ working_dir += "/#{sample.name}/#{folder}"
54
+ end
55
+
56
+ end
57
+
58
+ # set job cpus number to the higher step cpus (this in case of multiple steps)
59
+ self.cpus = step.cpus if step.cpus > self.cpus
60
+
61
+ # set number of nodes for job
62
+ self.nodes = (step.nodes) ? step.nodes : @nodes
63
+
64
+ # set the memory used
65
+ self.mem = step.mem
66
+
67
+ # adding job working directory
68
+ unless step.name.start_with? "_"
69
+ self.command_line << "if [ ! -f #{working_dir}/checkpoint ]"
70
+ self.command_line << "then"
71
+ self.command_line << logger(step, "start")
72
+ self.command_line << "\nmkdir -p #{working_dir}"
73
+ self.command_line << "cd #{working_dir}"
74
+ end
75
+
76
+ # generate command lines for this step
77
+ if step.run.kind_of? Array
78
+ step.run.each_with_index do |cmd, i|
79
+ command = generate_cmd_line(cmd,sample,step)
80
+ # TODO verify that logger works in this case
81
+ # self.command_line << "#{command} || { echo \"FAILED `date`: #{step.name}:#{i}\" ; exit 1; }"
82
+ self.command_line << "#{command} || { #{logger(step, "FAILED #{i}" )}; exit 1; }"
83
+ end
84
+ else
85
+ command = generate_cmd_line(step.run,sample,step)
86
+ # TODO verify that logger works in this case
87
+ # self.command_line << "#{command} || { echo \"FAILED `date`: #{step.name} \" ; exit 1; }"
88
+ self.command_line << "#{command} || { #{logger(step, "FAILED" )}; exit 1; }"
89
+ end
90
+ self.command_line << logger(step, "finished")
91
+ self.command_line << "touch #{working_dir}/checkpoint"
92
+ self.command_line << "else"
93
+ self.command_line << logger(step, "already executed, skipping this step")
94
+ self.command_line << "fi"
95
+
96
+ # check if a temporary (i.e. different from 'output') directory is set
97
+ if self.local
98
+ final_output = ""
99
+
100
+ if step.is_multi?
101
+ folder = (self.custom_output) ? self.custom_output : @shortname
102
+ final_output = self.output+"/#{folder}"
103
+ else
104
+ folder = (self.custom_output) ? self.custom_output : step.name
105
+ final_output = self.output+"/#{sample.name}/#{folder}"
106
+ end
107
+
108
+ self.command_line << "mkdir -p #{final_output}"
109
+ self.command_line << "cp -r #{working_dir}/* #{final_output}"
110
+ self.command_line << "rm -fr #{working_dir}"
111
+ end
112
+
113
+ end
114
+
115
+ # convert the job object into a TORQUE::Qsub object
116
+ def to_pbs(options)
117
+ TORQUE::Qsub.new(options) do |torque_job|
118
+ torque_job.name = self.name
119
+ torque_job.working_directory = self.output # where pbs scripts and stdout / stderr files will be saved
120
+ if options[:pbs_opts]
121
+ torque_job.l = options[:pbs_opts]
122
+ else
123
+ l_string = []
124
+ l_string << "nodes=#{self.nodes}:ppn=#{self.cpus}"
125
+ l_string << "mem=#{self.mem}" if self.mem
126
+ torque_job.l = l_string
127
+ if options[:mail_exit]
128
+ torque_job.m = "e"
129
+ torque_job.M = options[:mail_exit]
130
+ end
131
+ if options[:mail_start]
132
+ torque_job.m = "b"
133
+ torque_job.M = options[:mail_start]
134
+ end
135
+ end
136
+ torque_job.q = options[:pbs_queue] if options[:pbs_queue]
137
+ torque_job.script = self.command_line.join("\n")+"\n"
138
+ end
139
+ end
140
+
141
+ def to_script(options)
142
+ File.open(self.name+'.sh','w') do |file|
143
+ file.puts "#!/usr/bin/env bash -l"
144
+ file.puts self.command_line.join("\n")
145
+ end
146
+ end
147
+
148
+ private
149
+
150
+ # create a unique ID for each job
151
+ def generate_uuid
152
+ SecureRandom.hex(5)
153
+ end
154
+
155
+ # this method call other methods to perform the right substitutions into the command lines
156
+ def generate_cmd_line(cmd,sample,step)
157
+ if step.is_multi? # if is a multi samples step call a different method
158
+ set_multi_cmd(step,self.multi_samples)
159
+ cmd = sub_multi(cmd,step)
160
+ else
161
+ cmd = sub_placeholders(cmd,sample,step) # normal step, perform usual substitutions
162
+ end
163
+ return cmd
164
+ end
165
+
166
+ # perform substitutions on all the placeholders
167
+ def sub_placeholders(cmd,sample,step=nil)
168
+ tmp_cmd = cmd.gsub(/<sample>/,sample.name)
169
+ if tmp_cmd =~/<sample_path>/
170
+ sample_path_glob = (tmp_cmd.scan(/<sample_path>(\S+)/).map {|e| e.first})
171
+ if sample_path_glob.empty?
172
+ tmp_cmd.gsub!(/<sample_path>/,sample.path.join("\s"))
173
+ else
174
+ sample_path_glob.each do |append|
175
+ tmp_cmd.gsub!(/<sample_path>#{Regexp.quote(append)}/,(sample.path.map {|s| s+append}).join("\s"))
176
+ end
177
+ end
178
+ end
179
+ # for resourcers and cpus
180
+ tmp_cmd = sub_resources_and_cpu(tmp_cmd,step)
181
+
182
+ # for placeholders like <mapping/sample>
183
+ tmp_cmd.scan(/<(\S+)\/sample>/).map {|e| e.first}.each do |input_folder|
184
+ warn "Directory #{self.output+"/"+sample.name+"/"+input_folder} not found".magenta unless Dir.exists? self.output+"/"+sample.name+"/"+input_folder
185
+ tmp_cmd = tmp_cmd.gsub(/<#{input_folder}\/sample>/,self.output+"/"+sample.name+"/"+input_folder+"/"+sample.name)
186
+ end
187
+
188
+ # for placeholders like <mapping/>
189
+ tmp_cmd.scan(/<(\S+)\/>/).map {|e| e.first}.each do |input_folder|
190
+ warn "Directory #{self.output+"/"+sample.name+"/"+input_folder} not found".magenta unless Dir.exists? self.output+"/"+sample.name+"/"+input_folder
191
+ tmp_cmd = tmp_cmd.gsub(/<#{input_folder}\/>/,self.output+"/"+sample.name+"/"+input_folder+"/")
192
+ end
193
+ return tmp_cmd
194
+ end
195
+
196
+ def sub_resources_and_cpu(cmd,step)
197
+ # for all resources tags like <gtf> <index> <genome> <bwa> etc.
198
+ self.resources.each_key do |r|
199
+ cmd.gsub!(/<#{r}>/,self.resources[r])
200
+ end
201
+ # set number of cpus for this command line
202
+ cmd.gsub!(/<cpu>/,step.cpus.to_s) unless step.nil?
203
+ return cmd
204
+ end
205
+
206
+
207
+ # creates actual multi-samples command lines to be substituted where <multi> placeholders are found
208
+ def set_multi_cmd(step,multi_samples)
209
+ if step.multi_def.kind_of? Array # in case of many multi-samples command lines
210
+ step.multi_cmd = []
211
+ step.multi_def.each do |m_def|
212
+ step.multi_cmd << generate_multi_cmd(m_def,multi_samples)
213
+ end
214
+ else
215
+ step.multi_cmd = generate_multi_cmd(step.multi_def,multi_samples)
216
+ end
217
+ end
218
+
219
+ # take the multi_cmd and perform the subsitutions into the step command lines
220
+ def sub_multi(cmd,step)
221
+ cmd = sub_resources_and_cpu(cmd,step)
222
+ if step.multi_cmd.kind_of? Array
223
+ step.multi_cmd.each_with_index do |m,index|
224
+ cmd.gsub!(/<multi#{index+1}>/,m)
225
+ end
226
+ else
227
+ cmd.gsub!(/<multi>/,step.multi_cmd)
228
+ end
229
+ return cmd
230
+ end
231
+
232
+ # this sub method handle different multi-samples definitions (like comma separated list, space separated etc.)
233
+ def generate_multi_cmd(multi_def,multi_samples)
234
+ multi_cmd = []
235
+ multi_samples.each do |sample_name|
236
+ if sample_name.include? ","
237
+ multi_cmd << split_and_sub(",",multi_def,sample_name)
238
+ elsif sample_name.include? ";"
239
+ multi_cmd << split_and_sub(";",multi_def,sample_name)
240
+ else
241
+ multi_cmd << sub_placeholders(multi_def,self.samples_obj[sample_name])
242
+ end
243
+ end
244
+ return multi_cmd.join("\s")
245
+ end
246
+
247
+ # take a non-space separated list of samples and perform the substitution with the group defitions
248
+ def split_and_sub(sep,multi_def,multi)
249
+ cmd_line = []
250
+ multi.split(sep).each do |sample_name|
251
+ cmd_line << sub_placeholders(multi_def,self.samples_obj[sample_name])
252
+ end
253
+ cmd_line.join(sep)
254
+ end
255
+
256
+ # log a step according to the selected adapter
257
+ def logger(step, message)
258
+ case self.log
259
+ when "stdin"
260
+ "echo \"#{step.name} #{name} #{message} `whoami` `hostname` `pwd` `date`.\""
261
+ when "syslog"
262
+ "logger -t PIPENGINE \"#{step.name} #{name} #{message} `whoami` `hostname` `pwd`\""
263
+ when "fluentd"
264
+ "curl -X POST -d 'json={\"source\":\"PIPENGINE\", \"step\":\"#{step.name}\", \"message\":\"#{message}\", \"job_id\":\"#{name}\", \"user\":\"\'\"`whoami`\"\'\", \"host\":\"\'\"`hostname`\"\'\", \"pwd\":\"\'\"`pwd`\"\'\"}' #{self.log_adapter}"
265
+ end
266
+ end #logger
267
+
268
+ end
269
+ end
270
+ end
271
+
@@ -0,0 +1,13 @@
1
+ module Bio
2
+ module Pipengine
3
+ class Sample
4
+ # Sample holds all the information on a sample and its original input path (or multiple paths)
5
+ attr_accessor :path, :name
6
+ def initialize(name,path_string)
7
+ @path = path_string.split(",")
8
+ @name = name
9
+ end
10
+ end
11
+ end
12
+ end
13
+
@@ -0,0 +1,39 @@
1
+ module Bio
2
+ module Pipengine
3
+
4
+ # Step holds information for a pipeline step
5
+ # groups_def is used to store information on groups definition (i.e. generic cmd lines with placeholders)
6
+ # groups_cmd is used to store the actual command lines for all the samples to be combined in a "groups" step
7
+ # this are generated by combining groups_def information with sample groups information and will be placed
8
+ # where <groups> placeholder is found into the step command lines.
9
+ class Step
10
+ attr_accessor :name, :run, :cpus, :mem, :nodes, :multi_def, :multi_cmd, :pre
11
+ def initialize(name,step_instructions)
12
+ @name = name
13
+ parse_yaml(step_instructions)
14
+ end
15
+
16
+ def is_multi?
17
+ return (self.multi_def.nil?) ? false : true
18
+ end
19
+
20
+ def has_prerequisite?
21
+ return (self.pre.nil?) ? false : true
22
+ end
23
+
24
+ private
25
+
26
+ def parse_yaml(step_instructions)
27
+ self.cpus = step_instructions["cpu"].to_i
28
+ self.nodes = step_instructions["nodes"]
29
+ self.mem = step_instructions["mem"]
30
+ self.run = step_instructions["run"]
31
+ self.multi_def = step_instructions["multi"]
32
+ self.pre = step_instructions["pre"]
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+ end
39
+
@@ -0,0 +1,234 @@
1
+ module Bio
2
+ module Pipengine
3
+
4
+ def self.run(options)
5
+
6
+ # reading the yaml files
7
+ pipeline = YAML.load_file options[:pipeline]
8
+ samples_file = YAML.load_file options[:samples_file]
9
+ samples_file["samples"].each do |k,v|
10
+ if v.kind_of? Hash
11
+ samples_file["samples"][k] = Hash[samples_file["samples"][k].map{ |key, value| [key.to_s, value.to_s] }]
12
+ else
13
+ samples_file["samples"][k] = v.to_s
14
+ end
15
+ end
16
+ # make sure everything in Samples and Resources is converted to string
17
+ #samples_file["samples"] = Hash[samples_file["samples"].map{ |key, value| [key.to_s, value.to_s] }]
18
+ samples_file["resources"] = Hash[samples_file["resources"].map {|k,v| [k.to_s, v.to_s]}]
19
+
20
+ # pre-running checks
21
+ check_steps(options[:steps],pipeline)
22
+ check_samples(options[:samples],samples_file) if options[:samples]
23
+
24
+ # list of samples the jobs will work on
25
+ samples_list = nil
26
+ # check if a group is specified
27
+ if options[:group]
28
+ samples_list = options[:samples] ? samples_file["samples"][options[:group]].select {|k,v| options[:samples].include? k} : samples_file["samples"][options[:group]]
29
+ options[:multi] = samples_list.keys
30
+ samples_file["resources"]["output"] << "/#{options[:group]}"
31
+ else # if not, proceed normalizing the sample list to remove groups and get a list of all samples
32
+ full_list_samples = {}
33
+ samples_file["samples"].each_key do |k|
34
+ if samples_file["samples"][k].kind_of? Hash
35
+ full_list_samples.merge! samples_file["samples"][k]
36
+ else
37
+ full_list_samples[k] = samples_file["samples"][k]
38
+ end
39
+ end
40
+ samples_list = options[:samples] ? full_list_samples.select {|k,v| options[:samples].include? k} : full_list_samples
41
+ end
42
+
43
+ ########### START ###########
44
+
45
+ # create output directory (jobs scripts will be saved there)
46
+ FileUtils.mkdir_p samples_file["resources"]["output"] unless options[:dry] #&& options[:spooler]!="pbs"
47
+
48
+ # check if the requested steps are multi-samples
49
+ run_multi = check_and_run_multi(samples_file,pipeline,samples_list,options)
50
+
51
+ unless run_multi # there are no multi-samples steps, so iterate on samples and create one job per sample
52
+ samples_list.each_key do |sample_name|
53
+ sample = Bio::Pipengine::Sample.new(sample_name,samples_list[sample_name])
54
+ create_job(samples_file,pipeline,samples_list,options,sample)
55
+ end
56
+ end
57
+ end
58
+
59
+ # handle steps that run on multiple samples (i.e. sample groups job)
60
+ def self.check_and_run_multi(samples_file,pipeline,samples_list,options)
61
+ step_multi = options[:steps].map {|s| Bio::Pipengine::Step.new(s,pipeline["steps"][s]).is_multi?}
62
+
63
+ if step_multi.include? false
64
+ if step_multi.uniq.size > 1
65
+ puts "\nAbort! You are trying to run both multi-samples and single sample steps in the same job".red
66
+ exit
67
+ else
68
+ return false
69
+ end
70
+ else
71
+ samples_obj = {}
72
+ samples_list.each_key {|sample_name| samples_obj[sample_name] = Bio::Pipengine::Sample.new(sample_name,samples_list[sample_name])}
73
+ create_job(samples_file,pipeline,samples_list,options,samples_obj)
74
+ return true
75
+ end
76
+ end
77
+
78
+ def self.create_job(samples_file,pipeline,samples_list,options,sample)
79
+ # getting the sample name (only if this is not a multi samples job)
80
+ sample_name = (sample.kind_of? Hash) ? nil : sample.name+"-"
81
+ # setting the job name
82
+ job_name = nil
83
+ if options[:name]
84
+ job_name = options[:name]
85
+ elsif options[:steps].size > 1
86
+ job_name = "#{sample_name}#{options[:steps].join("-")}"
87
+ else
88
+ job_name = "#{sample_name}#{options[:steps].first}"
89
+ end
90
+ # creating the Job object
91
+ job = Bio::Pipengine::Job.new(job_name)
92
+ job.local = options[:tmp]
93
+ job.custom_output = options[:output_dir]
94
+ job.custom_name = (options[:name]) ? options[:name] : nil
95
+ job.add_resources pipeline["resources"]
96
+ job.add_resources samples_file["resources"]
97
+ #setting the logging system
98
+ job.log = options[:log]
99
+ job.log_adapter = options[:log_adapter]
100
+ # setting sample groups either by cli option (if present) or by taking all available samples
101
+ job.multi_samples = (options[:multi]) ? options[:multi] : samples_list.keys
102
+ job.samples_obj = sample if sample.kind_of? Hash
103
+ # cycling through steps and add command lines to the job
104
+ options[:steps].each do |step_name|
105
+ # TODO WARNING this can add multiple times the same step if the are multi dependencies
106
+ self.add_job(job, pipeline, step_name, sample)
107
+ end
108
+
109
+ if options[:dry] #&& options[:spooler] == "script"
110
+ job.to_script(options)
111
+ else
112
+ script = job.to_pbs(options) # converting the Job into a TORQUE::Qsub PBS compatible object
113
+ job_id = script.submit(options)
114
+ puts "#{job_id}".green unless options[:dry]
115
+ end
116
+ end
117
+
118
+ # check if sample exists
119
+ def self.check_samples(passed_samples,samples)
120
+ passed_samples.each do |sample|
121
+ samples_names = []
122
+ samples["samples"].each_key do |k|
123
+ if samples["samples"][k].kind_of? Hash
124
+ samples["samples"][k].each_key {|s| samples_names << s}
125
+ else
126
+ samples_names << k
127
+ end
128
+ end
129
+ unless samples_names.include? sample
130
+ puts "Sample \"#{sample}\" does not exist in sample file!".red
131
+ exit
132
+ end
133
+ end
134
+ end
135
+
136
+ # check if step exists
137
+ def self.check_steps(passed_steps,pipeline)
138
+ passed_steps.each do |step|
139
+ unless pipeline["steps"].keys.include? step
140
+ puts "Step \"#{step}\" does not exist in pipeline file!".red
141
+ exit
142
+ end
143
+ end
144
+ end
145
+
146
+ # load the pipeline file and show a list of available steps
147
+ def self.inspect_steps(pipeline_file)
148
+ pipeline = YAML.load_file pipeline_file
149
+ print "\nPipeline: ".blue
150
+ print "#{pipeline["pipeline"]}\n\n".green
151
+ puts "List of available steps:".light_blue
152
+ pipeline["steps"].each_key do |s|
153
+ print "\s\s#{s}:\s\s".blue
154
+ print "#{pipeline["steps"][s]["desc"]}\n".green
155
+ end
156
+ puts "\n"
157
+ end
158
+
159
+ # create the samples.yml file (CASAVA ONLY!)
160
+ def self.create_samples(dir)
161
+ File.open("samples.yml","w") do |file|
162
+ file.write "resources:\n\soutput: #{FileUtils.pwd}\n\nsamples:\n"
163
+ samples = Hash.new {|hash,key| hash[key] = []}
164
+ dir.each do |path|
165
+ projects = Dir.glob(path+"/*").sort.select {|folders| folders.split("/")[-1] =~/Project_/}
166
+ unless projects.empty?
167
+ projects.each do |project_folder|
168
+ Dir.glob(project_folder+"/*").sort.each {|s| samples[s.split("/")[-1]] << s}
169
+ end
170
+ else
171
+ Dir.glob(path+"/*").sort.each {|s| samples[s.split("/")[-1]] << s if Dir.exists? s}
172
+ end
173
+ end
174
+ samples.each_key do |sample|
175
+ file.write "\s"+sample+":\s"+samples[sample].join(",")+"\n"
176
+ end
177
+ end
178
+ end
179
+
180
+ # show running jobs information
181
+ def self.show_stats(job_ids)
182
+ stats = TORQUE::Qstat.new
183
+ if job_ids.first == "all"
184
+ stats.display
185
+ else
186
+ stats.display(:job_ids => job_ids)
187
+ end
188
+ end
189
+
190
+ # delete running jobs from the scheduler
191
+ def self.delete_jobs(job_ids)
192
+ include TORQUE
193
+ if job_ids == ["all"]
194
+ Qdel.rm_all
195
+ else
196
+ job_ids.each {|job_id| Qdel.rm job_id}
197
+ end
198
+ end #delete_jobs
199
+
200
+ # check if required configuration exists
201
+ def self.check_config
202
+ unless File.exists?("#{Dir.home}/.torque_rm.yaml")
203
+ ARGV.clear
204
+ current_user = Etc.getlogin
205
+ puts "\nIt seems you are running PipEngine for the first time. Please fill in the following information:"
206
+ print "\nHostname or IP address of authorized server from where jobs will be submitted: ".light_blue
207
+ server = gets.chomp
208
+ print "\n"
209
+ print "Specify the username you will be using to connect and submit jobs [#{current_user}]: ".light_blue
210
+ username = gets.chomp
211
+ username = (username == "") ? current_user : username
212
+ puts "Attempting connection to the server...".green
213
+ path = `ssh #{username}@#{server} -t "which qsub"`.split("/qsub").first
214
+ unless path=~/\/\S+\/\S+/
215
+ warn "Connection problems detected! Please check that you are able to connect to '#{server}' as '#{username}' via ssh.".red
216
+ else
217
+ file = File.open("#{Dir.home}/.torque_rm.yaml","w")
218
+ file.write({:hostname => server, :path => path, :user => username}.to_yaml)
219
+ file.close
220
+ puts "First time configuration completed!".green
221
+ puts "It is strongly recommended to setup a password-less SSH connection to use PipEngine.".green
222
+ exit
223
+ end
224
+ end
225
+ end #check_config
226
+
227
+ def self.add_job(job, pipeline, step_name, sample)
228
+ step = Bio::Pipengine::Step.new(step_name,pipeline["steps"][step_name]) # parsing step instructions
229
+ self.add_job(job, pipeline, step.pre, sample) if step.has_prerequisite?
230
+ job.add_step(step,sample) # adding step command lines to the job
231
+ end #add_job
232
+
233
+ end
234
+ end