mutations_caller_pipeline_aws 0.0.15 → 0.0.16

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,13 @@ require 'optparse'
4
4
  require 'rubygems'
5
5
  require 'yaml'
6
6
 
7
+ ###
8
+ # NOT FUNTIONAL ANYMORE
9
+ ###
10
+
11
+
12
+
13
+
7
14
  usage =<<EOF
8
15
  _________________________________________________________________________________________________
9
16
 
@@ -11,6 +18,7 @@ ________________________________________________________________________________
11
18
  -b sorted_bam_file
12
19
  -c config.yml -v raw_vcf_file
13
20
  [-a account || -p project]
21
+ -s SampleSheet.csv
14
22
 
15
23
  _________________________________________________________________________________________________
16
24
 
@@ -44,7 +52,8 @@ options = { :bam_file_sorted => nil,
44
52
  :vcf => nil,
45
53
  :account => "",
46
54
  :project => "",
47
- :debug => 1
55
+ :debug => 1,
56
+ :samplesheet => nil
48
57
  }
49
58
 
50
59
  optparse = OptionParser.new do |opts|
@@ -74,6 +83,10 @@ optparse = OptionParser.new do |opts|
74
83
  options[:debug] = 5 if i
75
84
  end
76
85
 
86
+ opts.on("-s", "--sampleSheet DIR", :REQUIRED, String, "SampleSheet.csv") do |i|
87
+ options[:samplesheet] = i
88
+ end
89
+
77
90
  opts.on_tail("-h", "--help", "Show this message") do
78
91
  puts opts
79
92
  exit
@@ -82,7 +95,8 @@ end
82
95
 
83
96
  begin
84
97
  optparse.parse!
85
- mandatory = [:bam_file_sorted, :index_prefix, :annotation_file, :bwa, :samtools, :gatk, :vcf, :index_vcf, :index_fa]
98
+ mandatory = [:samplesheet, :bam_file_sorted, :index_prefix, :annotation_file,
99
+ :bwa, :samtools, :gatk, :vcf, :index_vcf, :index_fa]
86
100
  missing = mandatory.select{ |param| options[param].nil? }
87
101
  if !missing.empty?
88
102
  puts "\nMissing options given or missing in config_file: \n\t#{missing.join(",\n\t")}"
@@ -112,7 +126,7 @@ options[:account] = options[:project] if options[:account].empty?
112
126
 
113
127
 
114
128
 
115
- # Indexing
129
+ # Indexing + Mark Duplicates
116
130
  SamtoolsIndexing.call(bam_file,
117
131
  job_prefix,
118
132
  options[:account],
@@ -3,6 +3,14 @@ require 'mutations_caller_pipeline_aws'
3
3
  require 'optparse'
4
4
  require 'rubygems'
5
5
  require 'yaml'
6
+ require 'csv'
7
+ =begin
8
+ * Name: Mutations Caller Pipeline (AWS)
9
+ * Pipeline combining bwa with GATK2
10
+ * Author: Katharina Hayer
11
+ * Date: 8/8/2012
12
+ * License: GNU General Public License (GPL-2.0)
13
+ =end
6
14
 
7
15
  usage =<<EOF
8
16
  _________________________________________________________________________________________________
@@ -12,6 +20,8 @@ ________________________________________________________________________________
12
20
  [-w wildtype_r1.fq -x wildtype_r2.fq]
13
21
  -c config.yml -v raw_vcf_file
14
22
  [-a account || -p project]
23
+ -s SampleSheet.csv
24
+ -f dbsnp_file
15
25
  _________________________________________________________________________________________________
16
26
 
17
27
  #{$0} ...
@@ -20,12 +30,12 @@ ________________________________________________________________________________
20
30
  Also you should have the indices for bwa and GATK prepared.
21
31
  NOTE: Only paired end reads are supported!
22
32
 
23
- +++ C L U S T E R V E R S I O N - 0.0.12 +++
33
+ +++ C L U S T E R V E R S I O N - 0.0.15 +++
24
34
 
25
35
  config.yml should look like this:
26
36
  # config.yml
27
- index_prefix: "path/to/prefix"
28
- annotation_file: "path/to/annotation_file"
37
+ bwa_prefix: "path/to/prefix"
38
+ picard_tools: "path/to/picard_tools/"
29
39
  bwa: "path/to/bwa"
30
40
  samtools: "path/to/samtools"
31
41
  gatk: "path/to/GenomeAnalysisTK.jar"
@@ -49,7 +59,8 @@ options = { :mutant_r1 => nil,
49
59
  :project => "",
50
60
  :debug => 1,
51
61
  :cluster => false,
52
- :coverage => false
62
+ :coverage => false,
63
+ :samplesheet => nil
53
64
  }
54
65
 
55
66
  optparse = OptionParser.new do |opts|
@@ -91,7 +102,7 @@ optparse = OptionParser.new do |opts|
91
102
  options[:debug] = 5 if i
92
103
  end
93
104
 
94
- opts.on("-p","--cluster ", "On compute cluster (none AWS)" ) do |i|
105
+ opts.on("-k","--cluster ", "On compute cluster (none AWS)" ) do |i|
95
106
  options[:cluster] = true if i
96
107
  end
97
108
 
@@ -99,6 +110,19 @@ optparse = OptionParser.new do |opts|
99
110
  options[:coverage] = true if i
100
111
  end
101
112
 
113
+ opts.on("-s", "--sampleSheet DIR", :REQUIRED, String, "SampleSheet.csv") do |i|
114
+ options[:samplesheet] = i
115
+ end
116
+
117
+ opts.on("-f", "--dbsnp_file DIR", :REQUIRED, String, "dbsnp.vcf") do |i|
118
+ options[:dbsnp_file] = i
119
+ end
120
+
121
+ opts.on("-b", "--sample_name Name", :REQUIRED, String, "Sample name / SampleID") do |i|
122
+ options[:sample_name] = i
123
+ end
124
+
125
+
102
126
  opts.on_tail("-h", "--help", "Show this message") do
103
127
  puts opts
104
128
  exit
@@ -107,7 +131,8 @@ end
107
131
 
108
132
  begin
109
133
  optparse.parse!
110
- mandatory = [:mutant_r1, :mutant_r2, :index_prefix, :annotation_file, :bwa, :samtools, :gatk, :vcf, :index_vcf, :index_fa]
134
+ mandatory = [:dbsnp_file,:picard_tools, :samplesheet, :mutant_r1, :mutant_r2, :bwa_prefix,
135
+ :bwa, :samtools, :gatk, :vcf, :index_vcf, :sample_name, :index_fa]
111
136
  missing = mandatory.select{ |param| options[param].nil? }
112
137
  if !missing.empty?
113
138
  puts "\nMissing options given or missing in config_file: \n\t#{missing.join(",\n\t")}"
@@ -120,41 +145,117 @@ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
120
145
  exit
121
146
  end
122
147
 
148
+ # Methods
149
+ def execute(cmd)
150
+ puts cmd
151
+ status = system(cmd)
152
+ raise cmd unless status
153
+ end
154
+
155
+ def run(options)
156
+ # tmp files for output
157
+ job_number = options[:job_number]
158
+ bam_file = "aligned_#{job_number}.bam"
159
+ bam_file_sorted = "sorted_#{job_number}.bam"
160
+ bam_file_sorted_dublicates = options[:step_one_bam_file]
161
+ dublicate_metrcis = "GATK_files/dublicate.metrics"
162
+ job_prefix = "#{job_number}"
163
+ log_file = "log/#{options[:sample_name]}.log"
164
+
165
+ cmd = PicardCaller.convert(options[:sam_file], bam_file, options[:picard_tools],
166
+ log_file, job_prefix, options[:account])
167
+
168
+ if options[:debug] == 1
169
+ execute(cmd)
170
+ else
171
+ puts cmd
172
+ end
173
+
174
+ cmd = PicardCaller.rg_and_sorting(bam_file, bam_file_sorted, options[:picard_tools],
175
+ options[:library], options[:index], options[:sample_name],
176
+ log_file, options[:id], job_prefix, options[:account])
177
+
178
+ if options[:debug] == 1
179
+ execute(cmd)
180
+ else
181
+ puts cmd
182
+ end
183
+
184
+ cmd = PicardCaller.mark_dublicates(bam_file_sorted, bam_file_sorted_dublicates,
185
+ dublicate_metrcis, options[:picard_tools], log_file, job_prefix,
186
+ options[:account])
187
+
188
+ if options[:debug] == 1
189
+ execute(cmd)
190
+ else
191
+ puts cmd
192
+ end
193
+
194
+ cmd = PicardCaller.build_index(bam_file_sorted_dublicates,
195
+ options[:picard_tools], log_file, job_prefix, options[:account])
196
+
197
+ if options[:debug] == 1
198
+ execute(cmd)
199
+ else
200
+ puts cmd
201
+ end
202
+ end
203
+
204
+
123
205
  # pipeline starts here
124
206
 
207
+ # get information from sample sheet
208
+ CSV.foreach(options[:samplesheet],{:headers => :first_row}) do |row|
209
+ if sample_name = row["SampleID"]
210
+ options[:index] = row["Index"]
211
+ lane = row["Lane"]
212
+ sample_project = row["SampleProject"]
213
+ options[:id] = "#{sample_project}_#{lane}_#{sample_name}"
214
+ options[:library] = row["FCID"]
215
+ end
216
+ end
217
+
218
+ # Create Dir to not overcluster output folder
219
+ Dir.mkdir("GATK_files") unless File.exists?("GATK_files")
220
+ Dir.mkdir("log") unless File.exists?("log")
221
+
125
222
  # tmp files for output
126
223
  random = (rand*1000000).floor.to_s
127
- bam_file = "mutant_#{random}"
224
+ sample_name = options[:sample_name]
225
+ options[:sam_file] = "#{sample_name}_#{random}.sam"
226
+ options[:step_one_bam_file] = "#{sample_name}_#{random}.bam"
227
+ bam_file = "mutant_#{random}.bam"
128
228
  job_prefix = "#{random}"
129
- log_file = "#{random}.log"
130
- target_intervals = "#{random}_target.intervals"
229
+ options[:job_number] = job_prefix
230
+ log_file = "log/#{sample_name}.log"
231
+ target_intervals = "GATK_files/target.intervals"
131
232
  realigned_bam = "#{random}_realigned.bam"
132
- recal_file = "#{random}_recal.csv"
133
- recal_bam = "#{random}_recal.bam"
233
+ recal_file = "GATK_files/recal.grp"
234
+ recal_bam = "#{sample_name}.bam"
134
235
  sai_file_fwd = "#{random}_fwd.sai"
135
236
  sai_file_rev = "#{random}_rev.sai"
136
237
 
137
238
  options[:account] = options[:project] if options[:account].empty?
138
- options[:gatk] = "java -Xmx4g -jar #{options[:gatk]}" if options[:cluster]
239
+ #options[:gatk] = "java -Xmx8g -jar #{options[:gatk]}"
139
240
 
140
241
  # BWA : ALN
141
242
  BwaCaller.call_aln(options[:mutant_r1],
142
- options[:index_prefix],
243
+ options[:bwa_prefix],
143
244
  sai_file_fwd,
144
245
  log_file,
145
246
  options[:bwa],
146
247
  job_prefix,
147
248
  options[:account],
148
- options[:debug])
249
+ options[:debug],"fwd")
149
250
 
150
251
  BwaCaller.call_aln(options[:mutant_r2],
151
- options[:index_prefix],
252
+ options[:bwa_prefix],
152
253
  sai_file_rev,
153
254
  log_file,
154
255
  options[:bwa],
155
256
  job_prefix,
156
257
  options[:account],
157
- options[:debug])
258
+ options[:debug],"rev")
158
259
 
159
260
 
160
261
  # BWA : First step mapping reads to reference
@@ -162,8 +263,8 @@ BwaCaller.call_paired_end(options[:mutant_r1],
162
263
  options[:mutant_r2],
163
264
  sai_file_fwd,
164
265
  sai_file_rev,
165
- bam_file,
166
- options[:index_prefix],
266
+ options[:sam_file],
267
+ options[:bwa_prefix],
167
268
  log_file,
168
269
  options[:bwa],
169
270
  options[:samtools],
@@ -172,27 +273,30 @@ BwaCaller.call_paired_end(options[:mutant_r1],
172
273
  options[:debug])
173
274
 
174
275
 
175
- # Indexing
176
- bam_file = bam_file + ".bam"
177
- SamtoolsIndexing.call(bam_file,
178
- job_prefix,
179
- options[:account],
180
- options[:debug],
181
- log_file)
276
+ # Indexing + Marking Duplicates
277
+ run(options)
278
+ #bam_file = bam_file + ".bam"
279
+ #SamtoolsIndexing.call(bam_file,
280
+ # job_prefix,
281
+ # options[:account],
282
+ # options[:debug],
283
+ # log_file)
182
284
 
183
285
  # Realigne
184
286
  GatkCaller.prepare_realigne(log_file,
185
287
  options[:gatk],
186
- bam_file,
288
+ options[:step_one_bam_file],
187
289
  options[:index_fa],
188
290
  target_intervals,
189
291
  job_prefix,
190
292
  options[:account],
293
+ options[:dbsnp_file],
191
294
  options[:debug])
192
295
 
296
+
193
297
  GatkCaller.realigne(log_file,
194
298
  options[:gatk],
195
- bam_file,
299
+ options[:step_one_bam_file],
196
300
  options[:index_fa],
197
301
  target_intervals,
198
302
  realigned_bam,
@@ -204,11 +308,11 @@ GatkCaller.realigne(log_file,
204
308
  GatkCaller.recalibrate_bam( log_file,
205
309
  options[:gatk],
206
310
  options[:index_fa],
207
- options[:index_vcf],
208
311
  realigned_bam,
209
312
  recal_file,
210
313
  job_prefix,
211
314
  options[:account],
315
+ options[:dbsnp_file],
212
316
  options[:debug] )
213
317
 
214
318
  GatkCaller.table_calibration(log_file,
@@ -225,11 +329,11 @@ GatkCaller.table_calibration(log_file,
225
329
  GatkCaller.call(log_file,
226
330
  options[:gatk],
227
331
  options[:index_fa],
228
- options[:index_vcf],
229
332
  recal_bam,
230
333
  options[:vcf],
231
334
  job_prefix,
232
335
  options[:account],
336
+ options[:dbsnp_file],
233
337
  options[:debug])
234
338
 
235
339
 
@@ -1,6 +1,7 @@
1
1
  require 'mutations_caller_pipeline_aws/bwa_caller'
2
2
  require 'mutations_caller_pipeline_aws/gatk_caller'
3
3
  require 'mutations_caller_pipeline_aws/samtools_indexing'
4
+ require 'mutations_caller_pipeline_aws/picard_caller'
4
5
 
5
6
  class MutationsCallerPipelineAws
6
7
  def self.hi
@@ -1,18 +1,17 @@
1
1
  class BwaCaller
2
2
 
3
- def self.call_paired_end(r1, r2, sai1, sai2, out_file, index, log_file, bwa, samtools, job_prefix,account, debug)
3
+ def self.call_paired_end(r1, r2, sai1, sai2, sam_file, index, log_file, bwa, samtools, job_prefix,account, debug)
4
4
  dummy = "\\\\\\"
5
- cmd = "qsub -o #{log_file} -hold_jid bwa_aln_#{job_prefix} -V -cwd -b y -N bwa_#{job_prefix} -l h_vmem=6G -pe make 3 #{account}\
6
- #{bwa} sampe -r '@RG#{dummy}tID:foo#{dummy}tSM:bar#{dummy}tPL:Illumina' #{index} \
7
- #{sai1} #{sai2} #{r1} #{r2} \
8
- \\| #{samtools} view -Su - \\| #{samtools} sort - #{out_file}"
5
+ cmd = "qsub -o #{log_file} -e #{log_file}_bwa_sampe_errors -hold_jid bwa_aln_#{job_prefix} -V -cwd -b y -N bwa_#{job_prefix} -l h_vmem=6G #{account}\
6
+ #{bwa} sampe #{index} \
7
+ #{sai1} #{sai2} #{r1} #{r2} -f #{sam_file}"
9
8
  puts cmd
10
9
  system('bash','-c', cmd) if debug == 1
11
10
  end
12
11
 
13
- def self.call_aln(read, index, out_file, log_file, bwa, job_prefix, account,debug)
14
- cmd = "qsub -o #{log_file} -V -cwd -b y -N bwa_aln_#{job_prefix} -l h_vmem=4G #{account} \
15
- #{bwa} aln -f #{out_file} #{index} #{read} "
12
+ def self.call_aln(read, index, out_file, log_file, bwa, job_prefix, account,debug,direction)
13
+ cmd = "qsub -pe DJ 8 -o #{log_file} -e #{log_file}_bwa_aln_errors_#{direction} -V -cwd -b y -N bwa_aln_#{job_prefix} -l h_vmem=4G #{account} \
14
+ #{bwa} aln -t 8 -f #{out_file} #{index} #{read} "
16
15
  puts cmd
17
16
  system(cmd) if debug == 1
18
17
  end
@@ -1,13 +1,11 @@
1
1
  class GatkCaller
2
2
  # INDEX is normal genom.fa
3
3
  # Genotyper
4
- def self.call(log_dir, gatk, index_fa, dbSNP, read_bam, read_vcf, job_prefix, account, debug)
5
- cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
6
- qsub -o #{log_dir} -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=4.5G -hold_jid recalibration_#{job_prefix} #{account}\
7
- #{gatk} -l INFO -R #{index_fa} -T UnifiedGenotyper \
8
- -I #{read_bam} \
9
- -D #{dbSNP}
10
- -o #{read_vcf} \
4
+ def self.call(log_dir, gatk, index_fa, read_bam, read_vcf, job_prefix, account,dbsnp_file, debug)
5
+ cmd = "qsub -pe DJ 4 -o #{log_dir} -e #{log_dir}_genotyper_errors -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=6G -hold_jid recalibration_#{job_prefix} #{account}\
6
+ java -Xmx6g -jar #{gatk} -l INFO -R #{index_fa} -T UnifiedGenotyper \
7
+ -I #{read_bam} --dbsnp #{dbsnp_file} \
8
+ -o #{read_vcf} -nt 4 --max_alternate_alleles 8 \
11
9
  --genotype_likelihoods_model BOTH"
12
10
  puts cmd
13
11
  system(cmd) if debug == 1
@@ -15,49 +13,46 @@ class GatkCaller
15
13
 
16
14
  # INDEX is normal genom.fa
17
15
  # Coverage Summary
16
+ # parallel not possible yet (1.6-13-g91f02df)
18
17
  def self.coverage(log_dir, gatk, index_fa, read_bam, outfile_prefix, job_prefix, account, debug)
19
- cmd = "echo 'starting coverage GATK for mutant at ' `date` >> #{log_dir}
20
- qsub -o #{log_dir} -V -cwd -b y -N coverage_#{job_prefix} -l h_vmem=4.5G -hold_jid recalibration_#{job_prefix} #{account}\
21
- #{gatk} -R #{index_fa} -T DepthOfCoverage \
22
- -I #{read_bam} \
23
- -o #{outfile_prefix} "
18
+ cmd = "qsub -o #{log_dir} -e #{log_dir}_coverage_errors -V -cwd -b y -N coverage_#{job_prefix} -l h_vmem=7G -hold_jid recalibration_#{job_prefix} #{account}\
19
+ java -Xmx6g -jar #{gatk} -R #{index_fa} -T DepthOfCoverage \
20
+ -I #{read_bam} --omitDepthOutputAtEachBase \
21
+ -o #{outfile_prefix} --omitIntervalStatistics --omitLocusTable"
24
22
  puts cmd
25
23
  system(cmd) if debug == 1
26
24
  end
27
25
 
28
26
  # Making recalibration table
29
- def self.recalibrate_bam(log_dir ,gatk, index_fa, index_vcf, read_bam, recal_file, job_prefix, account, debug )
30
- cmd = "echo 'starting recalibration table ' `date` >> #{log_dir}
31
- qsub -o #{log_dir} -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=4.5G -hold_jid realignment_#{job_prefix} #{account} \
32
- #{gatk} -knownSites #{index_vcf} -I #{read_bam} \
33
- -R #{index_fa} -T CountCovariates \
34
- -cov ReadGroupCovariate -cov QualityScoreCovariate -cov DinucCovariate \
35
- -cov CycleCovariate \
36
- -recalFile #{recal_file}"
27
+ def self.recalibrate_bam(log_dir ,gatk, index_fa, read_bam, recal_file, job_prefix, account, dbsnp_file, debug )
28
+ cmd = "qsub -o #{log_dir} -e #{log_dir}_recalibrate_errors -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=7G -hold_jid realignment_#{job_prefix} #{account} \
29
+ java -Xmx6g -jar #{gatk} -knownSites #{dbsnp_file} -I #{read_bam} \
30
+ -R #{index_fa} -T BaseRecalibrator \
31
+ -o #{recal_file}"
37
32
  puts cmd
38
33
  system(cmd) if debug == 1
39
34
  end
40
35
 
41
36
  # Using recalibration table
37
+ # parallel not possible yet (1.6-13-g91f02df)
42
38
  def self.table_calibration(log_dir, gatk, index_fa, read_bam, recal_bam, recal_file, job_prefix, account, debug)
43
- cmd = "echo 'recalibrating bam_file at ' `date` >> #{log_dir}
44
- qsub -V -o #{log_dir} -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=4.5G -hold_jid recalibration_table_#{job_prefix} #{account} \
45
- #{gatk} \
39
+ cmd = "qsub -V -o #{log_dir} -e #{log_dir}_prep_recal_errors -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=7G -hold_jid recalibration_table_#{job_prefix} #{account} \
40
+ java -Xmx6g -jar #{gatk} \
46
41
  -R #{index_fa} \
47
42
  -I #{read_bam} \
48
- -T TableRecalibration \
43
+ -T PrintReads \
49
44
  -o #{recal_bam} \
50
- -recalFile #{recal_file}"
45
+ -BQSR #{recal_file}"
51
46
  puts cmd
52
47
  system(cmd) if debug == 1
53
48
  end
54
49
 
55
50
  # Preparation realignement
56
- def self.prepare_realigne(log_dir, gatk, read_bam, index_fa, target_intervals, job_prefix, account, debug)
57
- cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
58
- qsub -o #{log_dir} -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=4.5G -hold_jid indexing_#{job_prefix} #{account}\
59
- #{gatk} \
60
- -I #{read_bam} \
51
+
52
+ def self.prepare_realigne(log_dir, gatk, read_bam, index_fa, target_intervals, job_prefix, account, dbsnp_file, debug)
53
+ cmd = "qsub -pe DJ 4 -o #{log_dir} -e #{log_dir}_prep_realign_errors -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=6G -hold_jid index_#{job_prefix} #{account}\
54
+ java -Xmx6g -jar #{gatk} -nt 4 \
55
+ -I #{read_bam} --known #{dbsnp_file} \
61
56
  -R #{index_fa} \
62
57
  -T RealignerTargetCreator \
63
58
  -o #{target_intervals}"
@@ -66,10 +61,10 @@ class GatkCaller
66
61
  end
67
62
 
68
63
  # Realignment
64
+ # parallel not possible yet (1.6-13-g91f02df)
69
65
  def self.realigne(log_dir, gatk, read_bam, index_fa, target_intervals, realigned_bam, job_prefix, account, debug)
70
- cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
71
- qsub -o #{log_dir} -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=4.5G -hold_jid prep_realignment_#{job_prefix} #{account} \
72
- #{gatk} \
66
+ cmd = "qsub -o #{log_dir} -e #{log_dir}_realign_errors -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=7G -hold_jid prep_realignment_#{job_prefix} #{account} \
67
+ java -Xmx6g -jar #{gatk} \
73
68
  -I #{read_bam} \
74
69
  -R #{index_fa} \
75
70
  -T IndelRealigner \
@@ -0,0 +1,28 @@
1
+ class PicardCaller
2
+ #converter = "java -jar ~/Downloads/picard-tools-1.56/picard-tools-1.56/SamFormatConverter.jar I=WT_aligned_sorted_rg.bam O=tmp.sam VALIDATION_STRINGENCY=LENIENT"
3
+ def self.convert(sam_file, bam_file, picard_tools, log_file, job_prefix, account)
4
+ cmd = "qsub -o #{log_file} -e #{log_file}_conversion_errors -V -cwd -b y -hold_jid bwa_#{job_prefix} -N convert_#{job_prefix} -l h_vmem=7G #{account} \
5
+ java -Xmx6g -jar #{picard_tools}/SamFormatConverter.jar I=#{sam_file} O=#{bam_file} VALIDATION_STRINGENCY=LENIENT "
6
+ end
7
+
8
+ #rg_and_sorting = "java -jar -Xmx3g ~/Downloads/picard-tools-1.56/picard-tools-1.56/AddOrReplaceReadGroups.jar I=WT_aligned.bam O=WT_aligned_sorted_rg.bam SO=coordinate ID=15 LB=nina_library PL=Illumina PU=ATCATC SM=My_test VALIDATION_STRINGENCY=LENIENT"
9
+ def self.rg_and_sorting(bam_file, bam_file_sorted, picard_tools, library, index, sample_name, log_file, id, job_prefix, account)
10
+ cmd = "qsub -o #{log_file} -e #{log_file}_rg_sorting_errors -V -cwd -b y -hold_jid convert_#{job_prefix} -N sort_#{job_prefix} -l h_vmem=7G #{account} \
11
+ java -Xmx6g -jar #{picard_tools}/AddOrReplaceReadGroups.jar I=#{bam_file} O=#{bam_file_sorted} SO=coordinate ID=#{id} \
12
+ LB=#{library} PL=Illumina PU=#{index} SM=#{sample_name} VALIDATION_STRINGENCY=LENIENT MAX_RECORDS_IN_RAM=1500000"
13
+ end
14
+
15
+
16
+ #mark_dublicates = "java -jar ~/Downloads/picard-tools-1.56/picard-tools-1.56/MarkDuplicates.jar I=WT_aligned_sorted_rg.bam O=marked_dublicates.bam M=dublicate.metrcis AS=true VALIDATION_STRINGENCY=LENIENT"
17
+ def self.mark_dublicates(bam_file_sorted, bam_file_sorted_dublicates, duplicate_metrcis, picard_tools, log_file, job_prefix, account)
18
+ cmd = "qsub -o #{log_file} -e #{log_file}_duplicates_errors -V -cwd -b y -hold_jid sort_#{job_prefix} -N duplicates_#{job_prefix} -l h_vmem=7G #{account} \
19
+ java -Xmx3g -jar #{picard_tools}/MarkDuplicates.jar I=#{bam_file_sorted} O=#{bam_file_sorted_dublicates} M=#{duplicate_metrcis} \
20
+ AS=true VALIDATION_STRINGENCY=LENIENT"
21
+ end
22
+
23
+ #build_index = "java -jar ~/Downloads/picard-tools-1.56/picard-tools-1.56/BuildBamIndex.jar I=marked_dublicates.bam VALIDATION_STRINGENCY=LENIENT"
24
+ def self.build_index(bam_file_sorted_dublicates, picard_tools, log_file, job_prefix, account)
25
+ cmd = "qsub -o #{log_file} -e #{log_file}_index_errors -V -cwd -b y -hold_jid dublicates_#{job_prefix} -N index_#{job_prefix} -l h_vmem=7G #{account} \
26
+ java -Xmx3g -jar #{picard_tools}/BuildBamIndex.jar I=#{bam_file_sorted_dublicates} VALIDATION_STRINGENCY=LENIENT"
27
+ end
28
+ end
metadata CHANGED
@@ -1,35 +1,25 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: mutations_caller_pipeline_aws
3
- version: !ruby/object:Gem::Version
4
- hash: 1
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.16
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 15
10
- version: 0.0.15
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Kaharina Hayer
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-01-20 00:00:00 -05:00
19
- default_executable:
12
+ date: 2012-10-08 00:00:00.000000000 Z
20
13
  dependencies: []
21
-
22
14
  description: Using BWA to align and GATK to call the bases
23
- email:
15
+ email:
24
16
  - katharinaehayer@gmail.com
25
- executables:
17
+ executables:
26
18
  - mutations_caller_pipeline_aws
27
19
  - gatk_pipe_only_aws
28
20
  extensions: []
29
-
30
21
  extra_rdoc_files: []
31
-
32
- files:
22
+ files:
33
23
  - bin/gatk_pipe_only_aws
34
24
  - bin/mutations_caller_pipeline_aws
35
25
  - lib/mutations_caller_pipeline_aws.rb
@@ -37,40 +27,30 @@ files:
37
27
  - lib/mutations_caller_pipeline_aws/bwa_caller.rb
38
28
  - lib/mutations_caller_pipeline_aws/gatk_caller.rb
39
29
  - lib/mutations_caller_pipeline_aws/location_file.rb
30
+ - lib/mutations_caller_pipeline_aws/picard_caller.rb
40
31
  - lib/mutations_caller_pipeline_aws/samtools_indexing.rb
41
- has_rdoc: true
42
32
  homepage: https://github.com/khayer/mutations_caller_pipeline_aws
43
33
  licenses: []
44
-
45
34
  post_install_message:
46
35
  rdoc_options: []
47
-
48
- require_paths:
36
+ require_paths:
49
37
  - lib
50
- required_ruby_version: !ruby/object:Gem::Requirement
38
+ required_ruby_version: !ruby/object:Gem::Requirement
51
39
  none: false
52
- requirements:
53
- - - ">="
54
- - !ruby/object:Gem::Version
55
- hash: 3
56
- segments:
57
- - 0
58
- version: "0"
59
- required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
45
  none: false
61
- requirements:
62
- - - ">="
63
- - !ruby/object:Gem::Version
64
- hash: 3
65
- segments:
66
- - 0
67
- version: "0"
46
+ requirements:
47
+ - - ! '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
68
50
  requirements: []
69
-
70
51
  rubyforge_project: mutations_caller_pipeline_aws
71
- rubygems_version: 1.6.2
52
+ rubygems_version: 1.8.23
72
53
  signing_key:
73
54
  specification_version: 3
74
55
  summary: Call Mutations for files.fq
75
56
  test_files: []
76
-