mutations_caller_pipeline_aws 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -107,6 +107,7 @@ realigned_bam = "#{random}_realigned.bam"
107
107
  recal_file = "#{random}_recal.csv"
108
108
  recal_bam = "#{random}_recal.bam"
109
109
 
110
+
110
111
  options[:account] = options[:project] if options[:account].empty?
111
112
 
112
113
 
@@ -114,7 +115,8 @@ options[:account] = options[:project] if options[:account].empty?
114
115
  SamtoolsIndexing.call(bam_file,
115
116
  job_prefix,
116
117
  options[:account],
117
- options[:debug])
118
+ options[:debug],
119
+ log_file)
118
120
 
119
121
  # Realigne
120
122
  GatkCaller.prepare_realigne(log_file,
@@ -121,12 +121,36 @@ target_intervals = "#{random}_target.intervals"
121
121
  realigned_bam = "#{random}_realigned.bam"
122
122
  recal_file = "#{random}_recal.csv"
123
123
  recal_bam = "#{random}_recal.bam"
124
+ sai_file_fwd = "#{random}_fwd.sai"
125
+ sai_file_rev = "#{random}_rev.sai"
124
126
 
125
127
  options[:account] = options[:project] if options[:account].empty?
126
128
 
129
+ # BWA : ALN
130
+ BwaCaller.call_aln(options[:mutant_r1],
131
+ options[:index_prefix],
132
+ sai_file_fwd,
133
+ log_file,
134
+ options[:bwa],
135
+ job_prefix,
136
+ options[:account],
137
+ options[:debug])
138
+
139
+ BwaCaller.call_aln(options[:mutant_r2],
140
+ options[:index_prefix],
141
+ sai_file_rev,
142
+ log_file,
143
+ options[:bwa],
144
+ job_prefix,
145
+ options[:account],
146
+ options[:debug])
147
+
148
+
127
149
  # BWA : First step mapping reads to reference
128
150
  BwaCaller.call_paired_end(options[:mutant_r1],
129
151
  options[:mutant_r2],
152
+ sai_file_fwd,
153
+ sai_file_rev,
130
154
  bam_file,
131
155
  options[:index_prefix],
132
156
  log_file,
@@ -142,7 +166,8 @@ bam_file = bam_file + ".bam"
142
166
  SamtoolsIndexing.call(bam_file,
143
167
  job_prefix,
144
168
  options[:account],
145
- options[:debug])
169
+ options[:debug],
170
+ log_file)
146
171
 
147
172
  # Realigne
148
173
  GatkCaller.prepare_realigne(log_file,
@@ -1,18 +1,18 @@
1
1
  class BwaCaller
2
- def self.call_single_end(r1,out_file,index, log_file, bwa, samtools)
3
- cmd = "#{bwa} samse -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
4
- <(#{bwa} aln #{index} #{r1} 2>>#{log_file}) \
5
- #{r1} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
2
+
3
+ def self.call_paired_end(r1, r2, sai1, sai2, out_file, index, log_file, bwa, samtools, job_prefix,account, debug)
4
+ cmd = "qsub -o #{log_file} -hold_jid bwa_aln_#{job_prefix} -V -cwd -b y -N bwa_#{job_prefix} -l h_vmem=9G -pe make 3 #{account}\
5
+ #{bwa} sampe -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
6
+ #{sai1} #{sai2} #{r1} #{r2} \
7
+ \\| #{samtools} view -Su - \\| #{samtools} sort - #{out_file}"
6
8
  puts cmd
7
- system('bash','-c',cmd )
9
+ system('bash','-c', cmd) if debug == 1
8
10
  end
9
11
 
10
- def self.call_paired_end(r1, r2, out_file, index, log_file, bwa, samtools, job_prefix,account, debug)
11
- cmd = "qsub -V -cwd -b y -N bwa_#{job_prefix} -l h_vmem=9G -pe make 4 #{account}\
12
- #{bwa} sampe -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
13
- <(#{bwa} aln #{index} #{r1} 2>>#{log_file}) <(#{bwa} aln #{index} #{r2} 2>>#{log_file} ) \
14
- #{r1} #{r2} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
12
+ def self.call_aln(read, index, out_file, log_file, bwa, job_prefix, account,debug)
13
+ cmd = "qsub -o #{log_file} -V -cwd -b y -N bwa_aln_#{job_prefix} -l h_vmem=4G #{account} \
14
+ #{bwa} aln -f #{out_file} #{index} #{read} "
15
15
  puts cmd
16
- system('bash','-c', cmd) if debug == 1
16
+ system(cmd) if debug == 1
17
17
  end
18
18
  end
@@ -3,12 +3,11 @@ class GatkCaller
3
3
  # Genotyper
4
4
  def self.call(log_dir, gatk, index_fa, read_bam, read_vcf, job_prefix, account, debug)
5
5
  cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
6
- qsub -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=3G -hold_jid recalibration_#{job_prefix} #{account}\
6
+ qsub -o #{log_dir} -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=3G -hold_jid recalibration_#{job_prefix} #{account}\
7
7
  #{gatk} -l INFO -R #{index_fa} -T UnifiedGenotyper \
8
8
  -I #{read_bam} \
9
9
  -o #{read_vcf} \
10
- --genotype_likelihoods_model BOTH \
11
- >> #{log_dir} 2>&1 || exit 1"
10
+ --genotype_likelihoods_model BOTH || exit 1"
12
11
  puts cmd
13
12
  system(cmd) if debug == 1
14
13
  end
@@ -16,12 +15,12 @@ class GatkCaller
16
15
  # Making recalibration table
17
16
  def self.recalibrate_bam(log_dir ,gatk, index_fa, index_vcf, read_bam, recal_file, job_prefix, account, debug )
18
17
  cmd = "echo 'starting recalibration table ' `date` >> #{log_dir}
19
- qsub -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=4G -hold_jid realignment_#{job_prefix} #{account} \
18
+ qsub -o #{log_dir} -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=4G -hold_jid realignment_#{job_prefix} #{account} \
20
19
  #{gatk} -knownSites #{index_vcf} -I #{read_bam} \
21
20
  -R #{index_fa} -T CountCovariates \
22
21
  -cov ReadGroupCovariate -cov QualityScoreCovariate -cov DinucCovariate \
23
22
  -cov CycleCovariate \
24
- -recalFile #{recal_file} >> #{log_dir} 2>&1 || exit 1 "
23
+ -recalFile #{recal_file} || exit 1 "
25
24
  puts cmd
26
25
  system(cmd) if debug == 1
27
26
  end
@@ -29,13 +28,13 @@ class GatkCaller
29
28
  # Using recalibration table
30
29
  def self.table_calibration(log_dir, gatk, index_fa, read_bam, recal_bam, recal_file, job_prefix, account, debug)
31
30
  cmd = "echo 'recalibrating bam_file at ' `date` >> #{log_dir}
32
- qsub -V -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=4G -hold_jid recalibration_table_#{job_prefix} #{account} \
31
+ qsub -V -o #{log_dir} -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=4G -hold_jid recalibration_table_#{job_prefix} #{account} \
33
32
  #{gatk} \
34
33
  -R #{index_fa} \
35
34
  -I #{read_bam} \
36
35
  -T TableRecalibration \
37
36
  -o #{recal_bam} \
38
- -recalFile #{recal_file} >> #{log_dir} 2>&1 || exit 1"
37
+ -recalFile #{recal_file} || exit 1"
39
38
  puts cmd
40
39
  system(cmd) if debug == 1
41
40
  end
@@ -43,12 +42,12 @@ class GatkCaller
43
42
  # Preparation realignement
44
43
  def self.prepare_realigne(log_dir, gatk, read_bam, index_fa, target_intervals, job_prefix, account, debug)
45
44
  cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
46
- qsub -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=4G -hold_jid indexing_#{job_prefix} #{account}\
45
+ qsub -o #{log_dir} -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=4G -hold_jid indexing_#{job_prefix} #{account}\
47
46
  #{gatk} \
48
47
  -I #{read_bam} \
49
48
  -R #{index_fa} \
50
49
  -T RealignerTargetCreator \
51
- -o #{target_intervals}"
50
+ -o #{target_intervals} || exit 1 "
52
51
  puts cmd
53
52
  system(cmd) if debug == 1
54
53
  end
@@ -56,13 +55,13 @@ class GatkCaller
56
55
  # Realignment
57
56
  def self.realigne(log_dir, gatk, read_bam, index_fa, target_intervals, realigned_bam, job_prefix, account, debug)
58
57
  cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
59
- qsub -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=4G -hold_jid prep_realignment_#{job_prefix} #{account} \
58
+ qsub -o #{log_dir} -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=4G -hold_jid prep_realignment_#{job_prefix} #{account} \
60
59
  #{gatk} \
61
60
  -I #{read_bam} \
62
61
  -R #{index_fa} \
63
62
  -T IndelRealigner \
64
63
  -targetIntervals #{target_intervals} \
65
- -o #{realigned_bam} >> #{log_dir} 2>&1 || exit 1"
64
+ -o #{realigned_bam} || exit 1"
66
65
  puts cmd
67
66
  system(cmd) if debug == 1
68
67
  end
@@ -1,6 +1,6 @@
1
1
  class SamtoolsIndexing
2
- def self.call(bam_file, job_prefix, account, debug)
3
- cmd = "qsub -V -cwd -b y -N indexing_#{job_prefix} -l h_vmem=3G -hold_jid bwa_#{job_prefix} #{account} \
2
+ def self.call(bam_file, job_prefix, account, debug, log_file)
3
+ cmd = "qsub -o #{log_file} -V -cwd -b y -N indexing_#{job_prefix} -l h_vmem=3G -hold_jid bwa_#{job_prefix} #{account} \
4
4
  samtools index #{bam_file}"
5
5
  puts cmd
6
6
  system(cmd) if debug == 1
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mutations_caller_pipeline_aws
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: