mutations_caller_pipeline_aws 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/gatk_pipe_only_aws
CHANGED
@@ -107,6 +107,7 @@ realigned_bam = "#{random}_realigned.bam"
|
|
107
107
|
recal_file = "#{random}_recal.csv"
|
108
108
|
recal_bam = "#{random}_recal.bam"
|
109
109
|
|
110
|
+
|
110
111
|
options[:account] = options[:project] if options[:account].empty?
|
111
112
|
|
112
113
|
|
@@ -114,7 +115,8 @@ options[:account] = options[:project] if options[:account].empty?
|
|
114
115
|
SamtoolsIndexing.call(bam_file,
|
115
116
|
job_prefix,
|
116
117
|
options[:account],
|
117
|
-
options[:debug]
|
118
|
+
options[:debug],
|
119
|
+
log_file)
|
118
120
|
|
119
121
|
# Realigne
|
120
122
|
GatkCaller.prepare_realigne(log_file,
|
@@ -121,12 +121,36 @@ target_intervals = "#{random}_target.intervals"
|
|
121
121
|
realigned_bam = "#{random}_realigned.bam"
|
122
122
|
recal_file = "#{random}_recal.csv"
|
123
123
|
recal_bam = "#{random}_recal.bam"
|
124
|
+
sai_file_fwd = "#{random}_fwd.sai"
|
125
|
+
sai_file_rev = "#{random}_rev.sai"
|
124
126
|
|
125
127
|
options[:account] = options[:project] if options[:account].empty?
|
126
128
|
|
129
|
+
# BWA : ALN
|
130
|
+
BwaCaller.call_aln(options[:mutant_r1],
|
131
|
+
options[:index_prefix],
|
132
|
+
sai_file_fwd,
|
133
|
+
log_file,
|
134
|
+
options[:bwa],
|
135
|
+
job_prefix,
|
136
|
+
options[:account],
|
137
|
+
options[:debug])
|
138
|
+
|
139
|
+
BwaCaller.call_aln(options[:mutant_r2],
|
140
|
+
options[:index_prefix],
|
141
|
+
sai_file_rev,
|
142
|
+
log_file,
|
143
|
+
options[:bwa],
|
144
|
+
job_prefix,
|
145
|
+
options[:account],
|
146
|
+
options[:debug])
|
147
|
+
|
148
|
+
|
127
149
|
# BWA : First step mapping reads to reference
|
128
150
|
BwaCaller.call_paired_end(options[:mutant_r1],
|
129
151
|
options[:mutant_r2],
|
152
|
+
sai_file_fwd,
|
153
|
+
sai_file_rev,
|
130
154
|
bam_file,
|
131
155
|
options[:index_prefix],
|
132
156
|
log_file,
|
@@ -142,7 +166,8 @@ bam_file = bam_file + ".bam"
|
|
142
166
|
SamtoolsIndexing.call(bam_file,
|
143
167
|
job_prefix,
|
144
168
|
options[:account],
|
145
|
-
options[:debug]
|
169
|
+
options[:debug],
|
170
|
+
log_file)
|
146
171
|
|
147
172
|
# Realigne
|
148
173
|
GatkCaller.prepare_realigne(log_file,
|
@@ -1,18 +1,18 @@
|
|
1
1
|
class BwaCaller
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
|
3
|
+
def self.call_paired_end(r1, r2, sai1, sai2, out_file, index, log_file, bwa, samtools, job_prefix,account, debug)
|
4
|
+
cmd = "qsub -o #{log_file} -hold_jid bwa_aln_#{job_prefix} -V -cwd -b y -N bwa_#{job_prefix} -l h_vmem=9G -pe make 3 #{account}\
|
5
|
+
#{bwa} sampe -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
|
6
|
+
#{sai1} #{sai2} #{r1} #{r2} \
|
7
|
+
\\| #{samtools} view -Su - \\| #{samtools} sort - #{out_file}"
|
6
8
|
puts cmd
|
7
|
-
system('bash','-c',cmd
|
9
|
+
system('bash','-c', cmd) if debug == 1
|
8
10
|
end
|
9
11
|
|
10
|
-
def self.
|
11
|
-
cmd = "qsub -V -cwd -b y -N
|
12
|
-
|
13
|
-
<(#{bwa} aln #{index} #{r1} 2>>#{log_file}) <(#{bwa} aln #{index} #{r2} 2>>#{log_file} ) \
|
14
|
-
#{r1} #{r2} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
|
12
|
+
def self.call_aln(read, index, out_file, log_file, bwa, job_prefix, account,debug)
|
13
|
+
cmd = "qsub -o #{log_file} -V -cwd -b y -N bwa_aln_#{job_prefix} -l h_vmem=4G #{account} \
|
14
|
+
#{bwa} aln -f #{out_file} #{index} #{read} "
|
15
15
|
puts cmd
|
16
|
-
system(
|
16
|
+
system(cmd) if debug == 1
|
17
17
|
end
|
18
18
|
end
|
@@ -3,12 +3,11 @@ class GatkCaller
|
|
3
3
|
# Genotyper
|
4
4
|
def self.call(log_dir, gatk, index_fa, read_bam, read_vcf, job_prefix, account, debug)
|
5
5
|
cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
|
6
|
-
qsub -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=3G -hold_jid recalibration_#{job_prefix} #{account}\
|
6
|
+
qsub -o #{log_dir} -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=3G -hold_jid recalibration_#{job_prefix} #{account}\
|
7
7
|
#{gatk} -l INFO -R #{index_fa} -T UnifiedGenotyper \
|
8
8
|
-I #{read_bam} \
|
9
9
|
-o #{read_vcf} \
|
10
|
-
--genotype_likelihoods_model BOTH
|
11
|
-
>> #{log_dir} 2>&1 || exit 1"
|
10
|
+
--genotype_likelihoods_model BOTH || exit 1"
|
12
11
|
puts cmd
|
13
12
|
system(cmd) if debug == 1
|
14
13
|
end
|
@@ -16,12 +15,12 @@ class GatkCaller
|
|
16
15
|
# Making recalibration table
|
17
16
|
def self.recalibrate_bam(log_dir ,gatk, index_fa, index_vcf, read_bam, recal_file, job_prefix, account, debug )
|
18
17
|
cmd = "echo 'starting recalibration table ' `date` >> #{log_dir}
|
19
|
-
qsub -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=4G -hold_jid realignment_#{job_prefix} #{account} \
|
18
|
+
qsub -o #{log_dir} -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=4G -hold_jid realignment_#{job_prefix} #{account} \
|
20
19
|
#{gatk} -knownSites #{index_vcf} -I #{read_bam} \
|
21
20
|
-R #{index_fa} -T CountCovariates \
|
22
21
|
-cov ReadGroupCovariate -cov QualityScoreCovariate -cov DinucCovariate \
|
23
22
|
-cov CycleCovariate \
|
24
|
-
-recalFile #{recal_file}
|
23
|
+
-recalFile #{recal_file} || exit 1 "
|
25
24
|
puts cmd
|
26
25
|
system(cmd) if debug == 1
|
27
26
|
end
|
@@ -29,13 +28,13 @@ class GatkCaller
|
|
29
28
|
# Using recalibration table
|
30
29
|
def self.table_calibration(log_dir, gatk, index_fa, read_bam, recal_bam, recal_file, job_prefix, account, debug)
|
31
30
|
cmd = "echo 'recalibrating bam_file at ' `date` >> #{log_dir}
|
32
|
-
qsub -V -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=4G -hold_jid recalibration_table_#{job_prefix} #{account} \
|
31
|
+
qsub -V -o #{log_dir} -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=4G -hold_jid recalibration_table_#{job_prefix} #{account} \
|
33
32
|
#{gatk} \
|
34
33
|
-R #{index_fa} \
|
35
34
|
-I #{read_bam} \
|
36
35
|
-T TableRecalibration \
|
37
36
|
-o #{recal_bam} \
|
38
|
-
-recalFile #{recal_file}
|
37
|
+
-recalFile #{recal_file} || exit 1"
|
39
38
|
puts cmd
|
40
39
|
system(cmd) if debug == 1
|
41
40
|
end
|
@@ -43,12 +42,12 @@ class GatkCaller
|
|
43
42
|
# Preparation realignement
|
44
43
|
def self.prepare_realigne(log_dir, gatk, read_bam, index_fa, target_intervals, job_prefix, account, debug)
|
45
44
|
cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
|
46
|
-
qsub -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=4G -hold_jid indexing_#{job_prefix} #{account}\
|
45
|
+
qsub -o #{log_dir} -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=4G -hold_jid indexing_#{job_prefix} #{account}\
|
47
46
|
#{gatk} \
|
48
47
|
-I #{read_bam} \
|
49
48
|
-R #{index_fa} \
|
50
49
|
-T RealignerTargetCreator \
|
51
|
-
-o #{target_intervals}"
|
50
|
+
-o #{target_intervals} || exit 1 "
|
52
51
|
puts cmd
|
53
52
|
system(cmd) if debug == 1
|
54
53
|
end
|
@@ -56,13 +55,13 @@ class GatkCaller
|
|
56
55
|
# Realignment
|
57
56
|
def self.realigne(log_dir, gatk, read_bam, index_fa, target_intervals, realigned_bam, job_prefix, account, debug)
|
58
57
|
cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
|
59
|
-
qsub -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=4G -hold_jid prep_realignment_#{job_prefix} #{account} \
|
58
|
+
qsub -o #{log_dir} -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=4G -hold_jid prep_realignment_#{job_prefix} #{account} \
|
60
59
|
#{gatk} \
|
61
60
|
-I #{read_bam} \
|
62
61
|
-R #{index_fa} \
|
63
62
|
-T IndelRealigner \
|
64
63
|
-targetIntervals #{target_intervals} \
|
65
|
-
-o #{realigned_bam}
|
64
|
+
-o #{realigned_bam} || exit 1"
|
66
65
|
puts cmd
|
67
66
|
system(cmd) if debug == 1
|
68
67
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
class SamtoolsIndexing
|
2
|
-
def self.call(bam_file, job_prefix, account, debug)
|
3
|
-
cmd = "qsub -V -cwd -b y -N indexing_#{job_prefix} -l h_vmem=3G -hold_jid bwa_#{job_prefix} #{account} \
|
2
|
+
def self.call(bam_file, job_prefix, account, debug, log_file)
|
3
|
+
cmd = "qsub -o #{log_file} -V -cwd -b y -N indexing_#{job_prefix} -l h_vmem=3G -hold_jid bwa_#{job_prefix} #{account} \
|
4
4
|
samtools index #{bam_file}"
|
5
5
|
puts cmd
|
6
6
|
system(cmd) if debug == 1
|