bio-pipengine 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.6.0
data/bin/pipengine ADDED
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:<< File.expand_path(File.join(File.dirname(File.dirname __FILE__),"lib"))
4
+ require 'bio-pipengine'
5
+
6
+ banner_text = "\nLauncher for Complex Biological Pipelines . Copyright(C) 2013 Francesco Strozzi\n\n"
7
+ version_text = File.read File.expand_path(File.join(File.dirname(File.dirname __FILE__),"VERSION"))
8
+ SUB_COMMANDS = %w(run jobs)
9
+
10
+
11
+ Bio::Pipengine.check_config
12
+
13
+ options = {}
14
+ cmd = ARGV.first # get the subcommand
15
+ opts = case cmd
16
+ when "run"
17
+ options[:run] = true
18
+ ARGV.shift
19
+ Trollop::options do
20
+ opt :pipeline, "YAML file with pipeline and sample details", :short => "p", :type => :string, :default => "pipeline.yml"
21
+ opt :samples_file, "YAML file with samples name and directory paths", :short => "f", :type => :string, :default => "samples.yml"
22
+ opt :samples, "List of sample names to run the pipeline", :type => :strings, :short => "l"
23
+ opt :steps, "List of steps to be executed", :type => :strings, :short => "s"
24
+ opt :dry,"Dry run. Just create the job script without submitting it to the batch system", :short => "d"
25
+ opt :spooler,"Destination spooler PBS, plain shell script", :short =>"x", :type => :string, :default => "pbs"
26
+ opt :tmp, "Temporary output folder", :type => :string, :short => "t"
27
+ opt :create_samples, "Create samples.yml file from a Sample directory (only for CASAVA projects)", :short => "c", :type => :strings
28
+ opt :multi, "List of samples to be processed by a given step (the order matters)", :short => "m", :type => :strings
29
+ opt :group, "Specify the group of samples to run the pipeline steps on (do not specify --multi)", :short => "g", :type => :string
30
+ opt :name, "Analysis name", :short => "n", :type => :string
31
+ opt :output_dir, "Output directory (override standard output directory names)", :short => "o", :type => :string
32
+ opt :pbs_opts, "PBS options", :type => :strings, :short => "b"
33
+ opt :pbs_queue, "PBS queue", :type => :string, :short => "q"
34
+ opt :inspect_pipeline, "Show steps", :short => "i", :type => :string
35
+ opt :mail_exit, "Send an Email when the job terminates", :type => :string
36
+ opt :mail_start, "Send an Email when the job starts", :type => :string
37
+ opt :log, "Log script activities, by default stdin. Options are fluentd", :type => :string, :default => "stdin"
38
+ opt :log_adapter, "(stdin|syslog|fluentd) In case of fluentd use http://destination.hostname:port/yourtag", :type => :string
39
+ end
40
+ when "jobs"
41
+ ARGV.shift
42
+ options[:jobs] = true
43
+ Trollop::options do
44
+ opt :job_id, "Search submitted jobs by Job ID", :type => :strings, :short => "i"
45
+ opt :job_name, "Search submitted jobs by Job Name", :type => :strings, :short => "n"
46
+ opt :delete, "Delete submitted jobs ('all' to erase everything or type one or more job IDs)", :short => "d", :type => :strings
47
+ end
48
+ when "-h"
49
+ puts banner_text
50
+ puts "List of available commands:\n\trun\tSubmit pipelines to the job scheduler\n\tjobs\tShow statistics and interact with running jobs\n"
51
+ exit
52
+ else
53
+ global_opts = Trollop::options do
54
+ banner banner_text
55
+ version "PipEngine v#{version_text}"
56
+ end
57
+ end
58
+
59
+
60
+
61
+ options = options.merge opts
62
+ Trollop::die :multi, "Specifing both --group and --multi is not allowed" if options[:multi] and options[:group]
63
+
64
+ if options[:create_samples]
65
+ Bio::Pipengine.create_samples options[:create_samples]
66
+ elsif options[:jobs]
67
+ if options[:job_id]
68
+ Bio::Pipengine.show_stats(options[:job_id])
69
+ elsif options[:job_name]
70
+ warn "Not yet implemented"
71
+ exit
72
+ elsif options[:delete]
73
+ if options[:delete].empty?
74
+ warn "Provide one or more Job IDs or write 'all' to delete all your running jobs".red
75
+ exit
76
+ end
77
+ puts "Warning: this will delete the following running jobs: ".light_blue + "#{options[:delete].join(",")}".green
78
+ print "Are you sure? (y|n):"
79
+ answer = gets.chomp
80
+ if answer == "y"
81
+ Bio::Pipengine.delete_jobs(options[:delete])
82
+ else
83
+ puts "Aborting..."
84
+ exit
85
+ end
86
+ else
87
+ Bio::Pipengine.show_stats(["all"])
88
+ end
89
+ elsif options[:pipeline] && options[:samples_file]
90
+ if options[:inspect_pipeline]
91
+ Bio::Pipengine.inspect_steps(options[:inspect_pipeline])
92
+ exit
93
+ else
94
+ abort("File not found: #{options[:pipeline]}".red) unless File.exists? options[:pipeline]
95
+ abort("File not found: #{options[:samples_file]}".red) unless File.exists? options[:samples_file]
96
+ abort("Please provide a valid step name with the --step parameter".red) unless options[:steps]
97
+ Bio::Pipengine.run(options)
98
+ end
99
+ end
100
+
101
+
@@ -0,0 +1,271 @@
1
+ module Bio
2
+ module Pipengine
3
+
4
+ class Job
5
+
6
+ # a Job object holds information on a job to be submitted
7
+ # samples_groups and samples_obj are used to store information in case of steps that require to combine info
8
+ # from multiple samples
9
+ attr_accessor :name, :cpus, :nodes, :mem, :resources, :command_line, :local,
10
+ :multi_samples, :samples_obj, :custom_output, :custom_name,
11
+ :log, :log_adapter
12
+ def initialize(name)
13
+ @name = generate_uuid + "-" + name
14
+ @shortname = name
15
+ @command_line = []
16
+ @resources = {}
17
+ @cpus = 1
18
+ @nodes = "1"
19
+ @log = "stdin"
20
+ @log_adapter = nil
21
+ end
22
+
23
+ def add_resources(resources)
24
+ self.resources.merge! resources
25
+ end
26
+
27
+ def output
28
+ self.resources["output"]
29
+ end
30
+
31
+ # add all the command lines for a given step
32
+ def add_step(step,sample)
33
+
34
+ # setting job working directory
35
+ working_dir = ""
36
+ if self.local
37
+ working_dir = self.local+"/"+self.name
38
+ else
39
+ working_dir = self.output
40
+
41
+ if step.is_multi?
42
+ folder = (self.custom_output) ? self.custom_output : @shortname
43
+ working_dir += "/#{folder}"
44
+ else
45
+ folder =
46
+ if self.custom_output
47
+ self.custom_output
48
+ elsif self.custom_name
49
+ self.custom_name
50
+ else
51
+ step.name
52
+ end
53
+ working_dir += "/#{sample.name}/#{folder}"
54
+ end
55
+
56
+ end
57
+
58
+ # set job cpus number to the higher step cpus (this in case of multiple steps)
59
+ self.cpus = step.cpus if step.cpus > self.cpus
60
+
61
+ # set number of nodes for job
62
+ self.nodes = (step.nodes) ? step.nodes : @nodes
63
+
64
+ # set the memory used
65
+ self.mem = step.mem
66
+
67
+ # adding job working directory
68
+ unless step.name.start_with? "_"
69
+ self.command_line << "if [ ! -f #{working_dir}/checkpoint ]"
70
+ self.command_line << "then"
71
+ self.command_line << logger(step, "start")
72
+ self.command_line << "\nmkdir -p #{working_dir}"
73
+ self.command_line << "cd #{working_dir}"
74
+ end
75
+
76
+ # generate command lines for this step
77
+ if step.run.kind_of? Array
78
+ step.run.each_with_index do |cmd, i|
79
+ command = generate_cmd_line(cmd,sample,step)
80
+ # TODO verify that logger works in this case
81
+ # self.command_line << "#{command} || { echo \"FAILED `date`: #{step.name}:#{i}\" ; exit 1; }"
82
+ self.command_line << "#{command} || { #{logger(step, "FAILED #{i}" )}; exit 1; }"
83
+ end
84
+ else
85
+ command = generate_cmd_line(step.run,sample,step)
86
+ # TODO verify that logger works in this case
87
+ # self.command_line << "#{command} || { echo \"FAILED `date`: #{step.name} \" ; exit 1; }"
88
+ self.command_line << "#{command} || { #{logger(step, "FAILED" )}; exit 1; }"
89
+ end
90
+ self.command_line << logger(step, "finished")
91
+ self.command_line << "touch #{working_dir}/checkpoint"
92
+ self.command_line << "else"
93
+ self.command_line << logger(step, "already executed, skipping this step")
94
+ self.command_line << "fi"
95
+
96
+ # check if a temporary (i.e. different from 'output') directory is set
97
+ if self.local
98
+ final_output = ""
99
+
100
+ if step.is_multi?
101
+ folder = (self.custom_output) ? self.custom_output : @shortname
102
+ final_output = self.output+"/#{folder}"
103
+ else
104
+ folder = (self.custom_output) ? self.custom_output : step.name
105
+ final_output = self.output+"/#{sample.name}/#{folder}"
106
+ end
107
+
108
+ self.command_line << "mkdir -p #{final_output}"
109
+ self.command_line << "cp -r #{working_dir}/* #{final_output}"
110
+ self.command_line << "rm -fr #{working_dir}"
111
+ end
112
+
113
+ end
114
+
115
+ # convert the job object into a TORQUE::Qsub object
116
+ def to_pbs(options)
117
+ TORQUE::Qsub.new(options) do |torque_job|
118
+ torque_job.name = self.name
119
+ torque_job.working_directory = self.output # where pbs scripts and stdout / stderr files will be saved
120
+ if options[:pbs_opts]
121
+ torque_job.l = options[:pbs_opts]
122
+ else
123
+ l_string = []
124
+ l_string << "nodes=#{self.nodes}:ppn=#{self.cpus}"
125
+ l_string << "mem=#{self.mem}" if self.mem
126
+ torque_job.l = l_string
127
+ if options[:mail_exit]
128
+ torque_job.m = "e"
129
+ torque_job.M = options[:mail_exit]
130
+ end
131
+ if options[:mail_start]
132
+ torque_job.m = "b"
133
+ torque_job.M = options[:mail_start]
134
+ end
135
+ end
136
+ torque_job.q = options[:pbs_queue] if options[:pbs_queue]
137
+ torque_job.script = self.command_line.join("\n")+"\n"
138
+ end
139
+ end
140
+
141
+ def to_script(options)
142
+ File.open(self.name+'.sh','w') do |file|
143
+ file.puts "#!/usr/bin/env bash -l"
144
+ file.puts self.command_line.join("\n")
145
+ end
146
+ end
147
+
148
+ private
149
+
150
+ # create a unique ID for each job
151
+ def generate_uuid
152
+ SecureRandom.hex(5)
153
+ end
154
+
155
+ # this method call other methods to perform the right substitutions into the command lines
156
+ def generate_cmd_line(cmd,sample,step)
157
+ if step.is_multi? # if is a multi samples step call a different method
158
+ set_multi_cmd(step,self.multi_samples)
159
+ cmd = sub_multi(cmd,step)
160
+ else
161
+ cmd = sub_placeholders(cmd,sample,step) # normal step, perform usual substitutions
162
+ end
163
+ return cmd
164
+ end
165
+
166
+ # perform substitutions on all the placeholders
167
+ def sub_placeholders(cmd,sample,step=nil)
168
+ tmp_cmd = cmd.gsub(/<sample>/,sample.name)
169
+ if tmp_cmd =~/<sample_path>/
170
+ sample_path_glob = (tmp_cmd.scan(/<sample_path>(\S+)/).map {|e| e.first})
171
+ if sample_path_glob.empty?
172
+ tmp_cmd.gsub!(/<sample_path>/,sample.path.join("\s"))
173
+ else
174
+ sample_path_glob.each do |append|
175
+ tmp_cmd.gsub!(/<sample_path>#{Regexp.quote(append)}/,(sample.path.map {|s| s+append}).join("\s"))
176
+ end
177
+ end
178
+ end
179
+ # for resourcers and cpus
180
+ tmp_cmd = sub_resources_and_cpu(tmp_cmd,step)
181
+
182
+ # for placeholders like <mapping/sample>
183
+ tmp_cmd.scan(/<(\S+)\/sample>/).map {|e| e.first}.each do |input_folder|
184
+ warn "Directory #{self.output+"/"+sample.name+"/"+input_folder} not found".magenta unless Dir.exists? self.output+"/"+sample.name+"/"+input_folder
185
+ tmp_cmd = tmp_cmd.gsub(/<#{input_folder}\/sample>/,self.output+"/"+sample.name+"/"+input_folder+"/"+sample.name)
186
+ end
187
+
188
+ # for placeholders like <mapping/>
189
+ tmp_cmd.scan(/<(\S+)\/>/).map {|e| e.first}.each do |input_folder|
190
+ warn "Directory #{self.output+"/"+sample.name+"/"+input_folder} not found".magenta unless Dir.exists? self.output+"/"+sample.name+"/"+input_folder
191
+ tmp_cmd = tmp_cmd.gsub(/<#{input_folder}\/>/,self.output+"/"+sample.name+"/"+input_folder+"/")
192
+ end
193
+ return tmp_cmd
194
+ end
195
+
196
+ def sub_resources_and_cpu(cmd,step)
197
+ # for all resources tags like <gtf> <index> <genome> <bwa> etc.
198
+ self.resources.each_key do |r|
199
+ cmd.gsub!(/<#{r}>/,self.resources[r])
200
+ end
201
+ # set number of cpus for this command line
202
+ cmd.gsub!(/<cpu>/,step.cpus.to_s) unless step.nil?
203
+ return cmd
204
+ end
205
+
206
+
207
+ # creates actual multi-samples command lines to be substituted where <multi> placeholders are found
208
+ def set_multi_cmd(step,multi_samples)
209
+ if step.multi_def.kind_of? Array # in case of many multi-samples command lines
210
+ step.multi_cmd = []
211
+ step.multi_def.each do |m_def|
212
+ step.multi_cmd << generate_multi_cmd(m_def,multi_samples)
213
+ end
214
+ else
215
+ step.multi_cmd = generate_multi_cmd(step.multi_def,multi_samples)
216
+ end
217
+ end
218
+
219
+ # take the multi_cmd and perform the subsitutions into the step command lines
220
+ def sub_multi(cmd,step)
221
+ cmd = sub_resources_and_cpu(cmd,step)
222
+ if step.multi_cmd.kind_of? Array
223
+ step.multi_cmd.each_with_index do |m,index|
224
+ cmd.gsub!(/<multi#{index+1}>/,m)
225
+ end
226
+ else
227
+ cmd.gsub!(/<multi>/,step.multi_cmd)
228
+ end
229
+ return cmd
230
+ end
231
+
232
+ # this sub method handle different multi-samples definitions (like comma separated list, space separated etc.)
233
+ def generate_multi_cmd(multi_def,multi_samples)
234
+ multi_cmd = []
235
+ multi_samples.each do |sample_name|
236
+ if sample_name.include? ","
237
+ multi_cmd << split_and_sub(",",multi_def,sample_name)
238
+ elsif sample_name.include? ";"
239
+ multi_cmd << split_and_sub(";",multi_def,sample_name)
240
+ else
241
+ multi_cmd << sub_placeholders(multi_def,self.samples_obj[sample_name])
242
+ end
243
+ end
244
+ return multi_cmd.join("\s")
245
+ end
246
+
247
+ # take a non-space separated list of samples and perform the substitution with the group defitions
248
+ def split_and_sub(sep,multi_def,multi)
249
+ cmd_line = []
250
+ multi.split(sep).each do |sample_name|
251
+ cmd_line << sub_placeholders(multi_def,self.samples_obj[sample_name])
252
+ end
253
+ cmd_line.join(sep)
254
+ end
255
+
256
+ # log a step according to the selected adapter
257
+ def logger(step, message)
258
+ case self.log
259
+ when "stdin"
260
+ "echo \"#{step.name} #{name} #{message} `whoami` `hostname` `pwd` `date`.\""
261
+ when "syslog"
262
+ "logger -t PIPENGINE \"#{step.name} #{name} #{message} `whoami` `hostname` `pwd`\""
263
+ when "fluentd"
264
+ "curl -X POST -d 'json={\"source\":\"PIPENGINE\", \"step\":\"#{step.name}\", \"message\":\"#{message}\", \"job_id\":\"#{name}\", \"user\":\"\'\"`whoami`\"\'\", \"host\":\"\'\"`hostname`\"\'\", \"pwd\":\"\'\"`pwd`\"\'\"}' #{self.log_adapter}"
265
+ end
266
+ end #logger
267
+
268
+ end
269
+ end
270
+ end
271
+
@@ -0,0 +1,13 @@
1
+ module Bio
2
+ module Pipengine
3
+ class Sample
4
+ # Sample holds all the information on a sample and its original input path (or multiple paths)
5
+ attr_accessor :path, :name
6
+ def initialize(name,path_string)
7
+ @path = path_string.split(",")
8
+ @name = name
9
+ end
10
+ end
11
+ end
12
+ end
13
+
@@ -0,0 +1,39 @@
1
+ module Bio
2
+ module Pipengine
3
+
4
+ # Step holds information for a pipeline step
5
+ # groups_def is used to store information on groups definition (i.e. generic cmd lines with placeholders)
6
+ # groups_cmd is used to store the actual command lines for all the samples to be combined in a "groups" step
7
+ # this are generated by combining groups_def information with sample groups information and will be placed
8
+ # where <groups> placeholder is found into the step command lines.
9
+ class Step
10
+ attr_accessor :name, :run, :cpus, :mem, :nodes, :multi_def, :multi_cmd, :pre
11
+ def initialize(name,step_instructions)
12
+ @name = name
13
+ parse_yaml(step_instructions)
14
+ end
15
+
16
+ def is_multi?
17
+ return (self.multi_def.nil?) ? false : true
18
+ end
19
+
20
+ def has_prerequisite?
21
+ return (self.pre.nil?) ? false : true
22
+ end
23
+
24
+ private
25
+
26
+ def parse_yaml(step_instructions)
27
+ self.cpus = step_instructions["cpu"].to_i
28
+ self.nodes = step_instructions["nodes"]
29
+ self.mem = step_instructions["mem"]
30
+ self.run = step_instructions["run"]
31
+ self.multi_def = step_instructions["multi"]
32
+ self.pre = step_instructions["pre"]
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+ end
39
+
@@ -0,0 +1,234 @@
1
+ module Bio
2
+ module Pipengine
3
+
4
+ def self.run(options)
5
+
6
+ # reading the yaml files
7
+ pipeline = YAML.load_file options[:pipeline]
8
+ samples_file = YAML.load_file options[:samples_file]
9
+ samples_file["samples"].each do |k,v|
10
+ if v.kind_of? Hash
11
+ samples_file["samples"][k] = Hash[samples_file["samples"][k].map{ |key, value| [key.to_s, value.to_s] }]
12
+ else
13
+ samples_file["samples"][k] = v.to_s
14
+ end
15
+ end
16
+ # make sure everything in Samples and Resources is converted to string
17
+ #samples_file["samples"] = Hash[samples_file["samples"].map{ |key, value| [key.to_s, value.to_s] }]
18
+ samples_file["resources"] = Hash[samples_file["resources"].map {|k,v| [k.to_s, v.to_s]}]
19
+
20
+ # pre-running checks
21
+ check_steps(options[:steps],pipeline)
22
+ check_samples(options[:samples],samples_file) if options[:samples]
23
+
24
+ # list of samples the jobs will work on
25
+ samples_list = nil
26
+ # check if a group is specified
27
+ if options[:group]
28
+ samples_list = options[:samples] ? samples_file["samples"][options[:group]].select {|k,v| options[:samples].include? k} : samples_file["samples"][options[:group]]
29
+ options[:multi] = samples_list.keys
30
+ samples_file["resources"]["output"] << "/#{options[:group]}"
31
+ else # if not, proceed normalizing the sample list to remove groups and get a list of all samples
32
+ full_list_samples = {}
33
+ samples_file["samples"].each_key do |k|
34
+ if samples_file["samples"][k].kind_of? Hash
35
+ full_list_samples.merge! samples_file["samples"][k]
36
+ else
37
+ full_list_samples[k] = samples_file["samples"][k]
38
+ end
39
+ end
40
+ samples_list = options[:samples] ? full_list_samples.select {|k,v| options[:samples].include? k} : full_list_samples
41
+ end
42
+
43
+ ########### START ###########
44
+
45
+ # create output directory (jobs scripts will be saved there)
46
+ FileUtils.mkdir_p samples_file["resources"]["output"] unless options[:dry] #&& options[:spooler]!="pbs"
47
+
48
+ # check if the requested steps are multi-samples
49
+ run_multi = check_and_run_multi(samples_file,pipeline,samples_list,options)
50
+
51
+ unless run_multi # there are no multi-samples steps, so iterate on samples and create one job per sample
52
+ samples_list.each_key do |sample_name|
53
+ sample = Bio::Pipengine::Sample.new(sample_name,samples_list[sample_name])
54
+ create_job(samples_file,pipeline,samples_list,options,sample)
55
+ end
56
+ end
57
+ end
58
+
59
+ # handle steps that run on multiple samples (i.e. sample groups job)
60
+ def self.check_and_run_multi(samples_file,pipeline,samples_list,options)
61
+ step_multi = options[:steps].map {|s| Bio::Pipengine::Step.new(s,pipeline["steps"][s]).is_multi?}
62
+
63
+ if step_multi.include? false
64
+ if step_multi.uniq.size > 1
65
+ puts "\nAbort! You are trying to run both multi-samples and single sample steps in the same job".red
66
+ exit
67
+ else
68
+ return false
69
+ end
70
+ else
71
+ samples_obj = {}
72
+ samples_list.each_key {|sample_name| samples_obj[sample_name] = Bio::Pipengine::Sample.new(sample_name,samples_list[sample_name])}
73
+ create_job(samples_file,pipeline,samples_list,options,samples_obj)
74
+ return true
75
+ end
76
+ end
77
+
78
+ def self.create_job(samples_file,pipeline,samples_list,options,sample)
79
+ # getting the sample name (only if this is not a multi samples job)
80
+ sample_name = (sample.kind_of? Hash) ? nil : sample.name+"-"
81
+ # setting the job name
82
+ job_name = nil
83
+ if options[:name]
84
+ job_name = options[:name]
85
+ elsif options[:steps].size > 1
86
+ job_name = "#{sample_name}#{options[:steps].join("-")}"
87
+ else
88
+ job_name = "#{sample_name}#{options[:steps].first}"
89
+ end
90
+ # creating the Job object
91
+ job = Bio::Pipengine::Job.new(job_name)
92
+ job.local = options[:tmp]
93
+ job.custom_output = options[:output_dir]
94
+ job.custom_name = (options[:name]) ? options[:name] : nil
95
+ job.add_resources pipeline["resources"]
96
+ job.add_resources samples_file["resources"]
97
+ #setting the logging system
98
+ job.log = options[:log]
99
+ job.log_adapter = options[:log_adapter]
100
+ # setting sample groups either by cli option (if present) or by taking all available samples
101
+ job.multi_samples = (options[:multi]) ? options[:multi] : samples_list.keys
102
+ job.samples_obj = sample if sample.kind_of? Hash
103
+ # cycling through steps and add command lines to the job
104
+ options[:steps].each do |step_name|
105
+ # TODO WARNING this can add multiple times the same step if the are multi dependencies
106
+ self.add_job(job, pipeline, step_name, sample)
107
+ end
108
+
109
+ if options[:dry] #&& options[:spooler] == "script"
110
+ job.to_script(options)
111
+ else
112
+ script = job.to_pbs(options) # converting the Job into a TORQUE::Qsub PBS compatible object
113
+ job_id = script.submit(options)
114
+ puts "#{job_id}".green unless options[:dry]
115
+ end
116
+ end
117
+
118
+ # check if sample exists
119
+ def self.check_samples(passed_samples,samples)
120
+ passed_samples.each do |sample|
121
+ samples_names = []
122
+ samples["samples"].each_key do |k|
123
+ if samples["samples"][k].kind_of? Hash
124
+ samples["samples"][k].each_key {|s| samples_names << s}
125
+ else
126
+ samples_names << k
127
+ end
128
+ end
129
+ unless samples_names.include? sample
130
+ puts "Sample \"#{sample}\" does not exist in sample file!".red
131
+ exit
132
+ end
133
+ end
134
+ end
135
+
136
+ # check if step exists
137
+ def self.check_steps(passed_steps,pipeline)
138
+ passed_steps.each do |step|
139
+ unless pipeline["steps"].keys.include? step
140
+ puts "Step \"#{step}\" does not exist in pipeline file!".red
141
+ exit
142
+ end
143
+ end
144
+ end
145
+
146
+ # load the pipeline file and show a list of available steps
147
+ def self.inspect_steps(pipeline_file)
148
+ pipeline = YAML.load_file pipeline_file
149
+ print "\nPipeline: ".blue
150
+ print "#{pipeline["pipeline"]}\n\n".green
151
+ puts "List of available steps:".light_blue
152
+ pipeline["steps"].each_key do |s|
153
+ print "\s\s#{s}:\s\s".blue
154
+ print "#{pipeline["steps"][s]["desc"]}\n".green
155
+ end
156
+ puts "\n"
157
+ end
158
+
159
+ # create the samples.yml file (CASAVA ONLY!)
160
+ def self.create_samples(dir)
161
+ File.open("samples.yml","w") do |file|
162
+ file.write "resources:\n\soutput: #{FileUtils.pwd}\n\nsamples:\n"
163
+ samples = Hash.new {|hash,key| hash[key] = []}
164
+ dir.each do |path|
165
+ projects = Dir.glob(path+"/*").sort.select {|folders| folders.split("/")[-1] =~/Project_/}
166
+ unless projects.empty?
167
+ projects.each do |project_folder|
168
+ Dir.glob(project_folder+"/*").sort.each {|s| samples[s.split("/")[-1]] << s}
169
+ end
170
+ else
171
+ Dir.glob(path+"/*").sort.each {|s| samples[s.split("/")[-1]] << s if Dir.exists? s}
172
+ end
173
+ end
174
+ samples.each_key do |sample|
175
+ file.write "\s"+sample+":\s"+samples[sample].join(",")+"\n"
176
+ end
177
+ end
178
+ end
179
+
180
+ # show running jobs information
181
+ def self.show_stats(job_ids)
182
+ stats = TORQUE::Qstat.new
183
+ if job_ids.first == "all"
184
+ stats.display
185
+ else
186
+ stats.display(:job_ids => job_ids)
187
+ end
188
+ end
189
+
190
+ # delete running jobs from the scheduler
191
+ def self.delete_jobs(job_ids)
192
+ include TORQUE
193
+ if job_ids == ["all"]
194
+ Qdel.rm_all
195
+ else
196
+ job_ids.each {|job_id| Qdel.rm job_id}
197
+ end
198
+ end #delete_jobs
199
+
200
+ # check if required configuration exists
201
+ def self.check_config
202
+ unless File.exists?("#{Dir.home}/.torque_rm.yaml")
203
+ ARGV.clear
204
+ current_user = Etc.getlogin
205
+ puts "\nIt seems you are running PipEngine for the first time. Please fill in the following information:"
206
+ print "\nHostname or IP address of authorized server from where jobs will be submitted: ".light_blue
207
+ server = gets.chomp
208
+ print "\n"
209
+ print "Specify the username you will be using to connect and submit jobs [#{current_user}]: ".light_blue
210
+ username = gets.chomp
211
+ username = (username == "") ? current_user : username
212
+ puts "Attempting connection to the server...".green
213
+ path = `ssh #{username}@#{server} -t "which qsub"`.split("/qsub").first
214
+ unless path=~/\/\S+\/\S+/
215
+ warn "Connection problems detected! Please check that you are able to connect to '#{server}' as '#{username}' via ssh.".red
216
+ else
217
+ file = File.open("#{Dir.home}/.torque_rm.yaml","w")
218
+ file.write({:hostname => server, :path => path, :user => username}.to_yaml)
219
+ file.close
220
+ puts "First time configuration completed!".green
221
+ puts "It is strongly recommended to setup a password-less SSH connection to use PipEngine.".green
222
+ exit
223
+ end
224
+ end
225
+ end #check_config
226
+
227
+ def self.add_job(job, pipeline, step_name, sample)
228
+ step = Bio::Pipengine::Step.new(step_name,pipeline["steps"][step_name]) # parsing step instructions
229
+ self.add_job(job, pipeline, step.pre, sample) if step.has_prerequisite?
230
+ job.add_step(step,sample) # adding step command lines to the job
231
+ end #add_job
232
+
233
+ end
234
+ end