mutations_caller_pipeline_aws 0.0.10 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
data/bin/gatk_pipe_only_aws
CHANGED
@@ -107,6 +107,7 @@ realigned_bam = "#{random}_realigned.bam"
|
|
107
107
|
recal_file = "#{random}_recal.csv"
|
108
108
|
recal_bam = "#{random}_recal.bam"
|
109
109
|
|
110
|
+
|
110
111
|
options[:account] = options[:project] if options[:account].empty?
|
111
112
|
|
112
113
|
|
@@ -114,7 +115,8 @@ options[:account] = options[:project] if options[:account].empty?
|
|
114
115
|
SamtoolsIndexing.call(bam_file,
|
115
116
|
job_prefix,
|
116
117
|
options[:account],
|
117
|
-
options[:debug]
|
118
|
+
options[:debug],
|
119
|
+
log_file)
|
118
120
|
|
119
121
|
# Realigne
|
120
122
|
GatkCaller.prepare_realigne(log_file,
|
@@ -121,12 +121,36 @@ target_intervals = "#{random}_target.intervals"
|
|
121
121
|
realigned_bam = "#{random}_realigned.bam"
|
122
122
|
recal_file = "#{random}_recal.csv"
|
123
123
|
recal_bam = "#{random}_recal.bam"
|
124
|
+
sai_file_fwd = "#{random}_fwd.sai"
|
125
|
+
sai_file_rev = "#{random}_rev.sai"
|
124
126
|
|
125
127
|
options[:account] = options[:project] if options[:account].empty?
|
126
128
|
|
129
|
+
# BWA : ALN
|
130
|
+
BwaCaller.call_aln(options[:mutant_r1],
|
131
|
+
options[:index_prefix],
|
132
|
+
sai_file_fwd,
|
133
|
+
log_file,
|
134
|
+
options[:bwa],
|
135
|
+
job_prefix,
|
136
|
+
options[:account],
|
137
|
+
options[:debug])
|
138
|
+
|
139
|
+
BwaCaller.call_aln(options[:mutant_r2],
|
140
|
+
options[:index_prefix],
|
141
|
+
sai_file_rev,
|
142
|
+
log_file,
|
143
|
+
options[:bwa],
|
144
|
+
job_prefix,
|
145
|
+
options[:account],
|
146
|
+
options[:debug])
|
147
|
+
|
148
|
+
|
127
149
|
# BWA : First step mapping reads to reference
|
128
150
|
BwaCaller.call_paired_end(options[:mutant_r1],
|
129
151
|
options[:mutant_r2],
|
152
|
+
sai_file_fwd,
|
153
|
+
sai_file_rev,
|
130
154
|
bam_file,
|
131
155
|
options[:index_prefix],
|
132
156
|
log_file,
|
@@ -142,7 +166,8 @@ bam_file = bam_file + ".bam"
|
|
142
166
|
SamtoolsIndexing.call(bam_file,
|
143
167
|
job_prefix,
|
144
168
|
options[:account],
|
145
|
-
options[:debug]
|
169
|
+
options[:debug],
|
170
|
+
log_file)
|
146
171
|
|
147
172
|
# Realigne
|
148
173
|
GatkCaller.prepare_realigne(log_file,
|
@@ -1,18 +1,18 @@
|
|
1
1
|
class BwaCaller
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
|
3
|
+
def self.call_paired_end(r1, r2, sai1, sai2, out_file, index, log_file, bwa, samtools, job_prefix,account, debug)
|
4
|
+
cmd = "qsub -o #{log_file} -hold_jid bwa_aln_#{job_prefix} -V -cwd -b y -N bwa_#{job_prefix} -l h_vmem=9G -pe make 3 #{account}\
|
5
|
+
#{bwa} sampe -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
|
6
|
+
#{sai1} #{sai2} #{r1} #{r2} \
|
7
|
+
\\| #{samtools} view -Su - \\| #{samtools} sort - #{out_file}"
|
6
8
|
puts cmd
|
7
|
-
system('bash','-c',cmd
|
9
|
+
system('bash','-c', cmd) if debug == 1
|
8
10
|
end
|
9
11
|
|
10
|
-
def self.
|
11
|
-
cmd = "qsub -V -cwd -b y -N
|
12
|
-
|
13
|
-
<(#{bwa} aln #{index} #{r1} 2>>#{log_file}) <(#{bwa} aln #{index} #{r2} 2>>#{log_file} ) \
|
14
|
-
#{r1} #{r2} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
|
12
|
+
def self.call_aln(read, index, out_file, log_file, bwa, job_prefix, account,debug)
|
13
|
+
cmd = "qsub -o #{log_file} -V -cwd -b y -N bwa_aln_#{job_prefix} -l h_vmem=4G #{account} \
|
14
|
+
#{bwa} aln -f #{out_file} #{index} #{read} "
|
15
15
|
puts cmd
|
16
|
-
system(
|
16
|
+
system(cmd) if debug == 1
|
17
17
|
end
|
18
18
|
end
|
@@ -3,12 +3,11 @@ class GatkCaller
|
|
3
3
|
# Genotyper
|
4
4
|
def self.call(log_dir, gatk, index_fa, read_bam, read_vcf, job_prefix, account, debug)
|
5
5
|
cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
|
6
|
-
qsub -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=3G -hold_jid recalibration_#{job_prefix} #{account}\
|
6
|
+
qsub -o #{log_dir} -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=3G -hold_jid recalibration_#{job_prefix} #{account}\
|
7
7
|
#{gatk} -l INFO -R #{index_fa} -T UnifiedGenotyper \
|
8
8
|
-I #{read_bam} \
|
9
9
|
-o #{read_vcf} \
|
10
|
-
--genotype_likelihoods_model BOTH
|
11
|
-
>> #{log_dir} 2>&1 || exit 1"
|
10
|
+
--genotype_likelihoods_model BOTH || exit 1"
|
12
11
|
puts cmd
|
13
12
|
system(cmd) if debug == 1
|
14
13
|
end
|
@@ -16,12 +15,12 @@ class GatkCaller
|
|
16
15
|
# Making recalibration table
|
17
16
|
def self.recalibrate_bam(log_dir ,gatk, index_fa, index_vcf, read_bam, recal_file, job_prefix, account, debug )
|
18
17
|
cmd = "echo 'starting recalibration table ' `date` >> #{log_dir}
|
19
|
-
qsub -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=4G -hold_jid realignment_#{job_prefix} #{account} \
|
18
|
+
qsub -o #{log_dir} -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=4G -hold_jid realignment_#{job_prefix} #{account} \
|
20
19
|
#{gatk} -knownSites #{index_vcf} -I #{read_bam} \
|
21
20
|
-R #{index_fa} -T CountCovariates \
|
22
21
|
-cov ReadGroupCovariate -cov QualityScoreCovariate -cov DinucCovariate \
|
23
22
|
-cov CycleCovariate \
|
24
|
-
-recalFile #{recal_file}
|
23
|
+
-recalFile #{recal_file} || exit 1 "
|
25
24
|
puts cmd
|
26
25
|
system(cmd) if debug == 1
|
27
26
|
end
|
@@ -29,13 +28,13 @@ class GatkCaller
|
|
29
28
|
# Using recalibration table
|
30
29
|
def self.table_calibration(log_dir, gatk, index_fa, read_bam, recal_bam, recal_file, job_prefix, account, debug)
|
31
30
|
cmd = "echo 'recalibrating bam_file at ' `date` >> #{log_dir}
|
32
|
-
qsub -V -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=4G -hold_jid recalibration_table_#{job_prefix} #{account} \
|
31
|
+
qsub -V -o #{log_dir} -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=4G -hold_jid recalibration_table_#{job_prefix} #{account} \
|
33
32
|
#{gatk} \
|
34
33
|
-R #{index_fa} \
|
35
34
|
-I #{read_bam} \
|
36
35
|
-T TableRecalibration \
|
37
36
|
-o #{recal_bam} \
|
38
|
-
-recalFile #{recal_file}
|
37
|
+
-recalFile #{recal_file} || exit 1"
|
39
38
|
puts cmd
|
40
39
|
system(cmd) if debug == 1
|
41
40
|
end
|
@@ -43,12 +42,12 @@ class GatkCaller
|
|
43
42
|
# Preparation realignement
|
44
43
|
def self.prepare_realigne(log_dir, gatk, read_bam, index_fa, target_intervals, job_prefix, account, debug)
|
45
44
|
cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
|
46
|
-
qsub -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=4G -hold_jid indexing_#{job_prefix} #{account}\
|
45
|
+
qsub -o #{log_dir} -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=4G -hold_jid indexing_#{job_prefix} #{account}\
|
47
46
|
#{gatk} \
|
48
47
|
-I #{read_bam} \
|
49
48
|
-R #{index_fa} \
|
50
49
|
-T RealignerTargetCreator \
|
51
|
-
-o #{target_intervals}"
|
50
|
+
-o #{target_intervals} || exit 1 "
|
52
51
|
puts cmd
|
53
52
|
system(cmd) if debug == 1
|
54
53
|
end
|
@@ -56,13 +55,13 @@ class GatkCaller
|
|
56
55
|
# Realignment
|
57
56
|
def self.realigne(log_dir, gatk, read_bam, index_fa, target_intervals, realigned_bam, job_prefix, account, debug)
|
58
57
|
cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
|
59
|
-
qsub -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=4G -hold_jid prep_realignment_#{job_prefix} #{account} \
|
58
|
+
qsub -o #{log_dir} -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=4G -hold_jid prep_realignment_#{job_prefix} #{account} \
|
60
59
|
#{gatk} \
|
61
60
|
-I #{read_bam} \
|
62
61
|
-R #{index_fa} \
|
63
62
|
-T IndelRealigner \
|
64
63
|
-targetIntervals #{target_intervals} \
|
65
|
-
-o #{realigned_bam}
|
64
|
+
-o #{realigned_bam} || exit 1"
|
66
65
|
puts cmd
|
67
66
|
system(cmd) if debug == 1
|
68
67
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
class SamtoolsIndexing
|
2
|
-
def self.call(bam_file, job_prefix, account, debug)
|
3
|
-
cmd = "qsub -V -cwd -b y -N indexing_#{job_prefix} -l h_vmem=3G -hold_jid bwa_#{job_prefix} #{account} \
|
2
|
+
def self.call(bam_file, job_prefix, account, debug, log_file)
|
3
|
+
cmd = "qsub -o #{log_file} -V -cwd -b y -N indexing_#{job_prefix} -l h_vmem=3G -hold_jid bwa_#{job_prefix} #{account} \
|
4
4
|
samtools index #{bam_file}"
|
5
5
|
puts cmd
|
6
6
|
system(cmd) if debug == 1
|