mutations_caller_pipeline_aws 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
    
        data/bin/gatk_pipe_only_aws
    CHANGED
    
    | @@ -107,6 +107,7 @@ realigned_bam = "#{random}_realigned.bam" | |
| 107 107 | 
             
            recal_file = "#{random}_recal.csv"
         | 
| 108 108 | 
             
            recal_bam = "#{random}_recal.bam"
         | 
| 109 109 |  | 
| 110 | 
            +
             | 
| 110 111 | 
             
            options[:account] = options[:project] if options[:account].empty?
         | 
| 111 112 |  | 
| 112 113 |  | 
| @@ -114,7 +115,8 @@ options[:account] = options[:project] if options[:account].empty? | |
| 114 115 | 
             
            SamtoolsIndexing.call(bam_file,
         | 
| 115 116 | 
             
                                  job_prefix,
         | 
| 116 117 | 
             
                                  options[:account],
         | 
| 117 | 
            -
                                  options[:debug] | 
| 118 | 
            +
                                  options[:debug],
         | 
| 119 | 
            +
                                  log_file)
         | 
| 118 120 |  | 
| 119 121 | 
             
            # Realigne
         | 
| 120 122 | 
             
            GatkCaller.prepare_realigne(log_file,
         | 
| @@ -121,12 +121,36 @@ target_intervals = "#{random}_target.intervals" | |
| 121 121 | 
             
            realigned_bam = "#{random}_realigned.bam"
         | 
| 122 122 | 
             
            recal_file = "#{random}_recal.csv"
         | 
| 123 123 | 
             
            recal_bam = "#{random}_recal.bam"
         | 
| 124 | 
            +
            sai_file_fwd = "#{random}_fwd.sai"
         | 
| 125 | 
            +
            sai_file_rev = "#{random}_rev.sai"
         | 
| 124 126 |  | 
| 125 127 | 
             
            options[:account] = options[:project] if options[:account].empty?
         | 
| 126 128 |  | 
| 129 | 
            +
            # BWA : ALN
         | 
| 130 | 
            +
            BwaCaller.call_aln(options[:mutant_r1],
         | 
| 131 | 
            +
                               options[:index_prefix],
         | 
| 132 | 
            +
                               sai_file_fwd,
         | 
| 133 | 
            +
                               log_file,
         | 
| 134 | 
            +
                               options[:bwa],
         | 
| 135 | 
            +
                               job_prefix,
         | 
| 136 | 
            +
                               options[:account],
         | 
| 137 | 
            +
                               options[:debug])
         | 
| 138 | 
            +
             | 
| 139 | 
            +
            BwaCaller.call_aln(options[:mutant_r2],
         | 
| 140 | 
            +
                               options[:index_prefix],
         | 
| 141 | 
            +
                               sai_file_rev,
         | 
| 142 | 
            +
                               log_file,
         | 
| 143 | 
            +
                               options[:bwa],
         | 
| 144 | 
            +
                               job_prefix,
         | 
| 145 | 
            +
                               options[:account],
         | 
| 146 | 
            +
                               options[:debug])
         | 
| 147 | 
            +
             | 
| 148 | 
            +
             | 
| 127 149 | 
             
            # BWA : First step mapping reads to reference
         | 
| 128 150 | 
             
            BwaCaller.call_paired_end(options[:mutant_r1],
         | 
| 129 151 | 
             
                                      options[:mutant_r2],
         | 
| 152 | 
            +
                                      sai_file_fwd,
         | 
| 153 | 
            +
                                      sai_file_rev,
         | 
| 130 154 | 
             
                                      bam_file,
         | 
| 131 155 | 
             
                                      options[:index_prefix],
         | 
| 132 156 | 
             
                                      log_file,
         | 
| @@ -142,7 +166,8 @@ bam_file = bam_file + ".bam" | |
| 142 166 | 
             
            SamtoolsIndexing.call(bam_file,
         | 
| 143 167 | 
             
                                  job_prefix,
         | 
| 144 168 | 
             
                                  options[:account],
         | 
| 145 | 
            -
                                  options[:debug] | 
| 169 | 
            +
                                  options[:debug],
         | 
| 170 | 
            +
                                  log_file)
         | 
| 146 171 |  | 
| 147 172 | 
             
            # Realigne
         | 
| 148 173 | 
             
            GatkCaller.prepare_realigne(log_file,
         | 
| @@ -1,18 +1,18 @@ | |
| 1 1 | 
             
            class BwaCaller
         | 
| 2 | 
            -
             | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 2 | 
            +
             | 
| 3 | 
            +
              def self.call_paired_end(r1, r2, sai1, sai2,  out_file, index, log_file, bwa, samtools, job_prefix,account, debug)
         | 
| 4 | 
            +
                cmd = "qsub -o #{log_file} -hold_jid bwa_aln_#{job_prefix} -V -cwd -b y -N bwa_#{job_prefix} -l h_vmem=9G -pe make 3  #{account}\
         | 
| 5 | 
            +
                       #{bwa} sampe -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
         | 
| 6 | 
            +
                      #{sai1} #{sai2}  #{r1} #{r2} \
         | 
| 7 | 
            +
                      \\| #{samtools} view -Su - \\| #{samtools} sort - #{out_file}"
         | 
| 6 8 | 
             
                puts cmd
         | 
| 7 | 
            -
                system('bash','-c',cmd  | 
| 9 | 
            +
                system('bash','-c', cmd) if debug == 1
         | 
| 8 10 | 
             
              end
         | 
| 9 11 |  | 
| 10 | 
            -
              def self. | 
| 11 | 
            -
                cmd = "qsub -V -cwd -b y -N  | 
| 12 | 
            -
             | 
| 13 | 
            -
                    <(#{bwa} aln #{index} #{r1} 2>>#{log_file}) <(#{bwa} aln #{index} #{r2} 2>>#{log_file} ) \
         | 
| 14 | 
            -
                    #{r1} #{r2} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
         | 
| 12 | 
            +
              def self.call_aln(read, index, out_file, log_file, bwa, job_prefix, account,debug)
         | 
| 13 | 
            +
                cmd = "qsub -o #{log_file} -V -cwd -b y -N bwa_aln_#{job_prefix} -l h_vmem=4G #{account} \
         | 
| 14 | 
            +
                       #{bwa} aln -f #{out_file} #{index} #{read} "
         | 
| 15 15 | 
             
                puts cmd
         | 
| 16 | 
            -
                system( | 
| 16 | 
            +
                system(cmd) if debug == 1
         | 
| 17 17 | 
             
              end
         | 
| 18 18 | 
             
            end
         | 
| @@ -3,12 +3,11 @@ class GatkCaller | |
| 3 3 | 
             
              # Genotyper
         | 
| 4 4 | 
             
              def self.call(log_dir, gatk, index_fa, read_bam, read_vcf, job_prefix, account, debug)
         | 
| 5 5 | 
             
                cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
         | 
| 6 | 
            -
                  qsub -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=3G -hold_jid recalibration_#{job_prefix} #{account}\
         | 
| 6 | 
            +
                  qsub -o #{log_dir} -V -cwd -b y -N genotyper_#{job_prefix} -l h_vmem=3G -hold_jid recalibration_#{job_prefix} #{account}\
         | 
| 7 7 | 
             
                  #{gatk} -l INFO -R #{index_fa} -T UnifiedGenotyper \
         | 
| 8 8 | 
             
                  -I #{read_bam} \
         | 
| 9 9 | 
             
                  -o #{read_vcf} \
         | 
| 10 | 
            -
                  --genotype_likelihoods_model BOTH  | 
| 11 | 
            -
                  >> #{log_dir} 2>&1 || exit 1"
         | 
| 10 | 
            +
                  --genotype_likelihoods_model BOTH || exit 1"
         | 
| 12 11 | 
             
                puts cmd
         | 
| 13 12 | 
             
                system(cmd) if debug == 1
         | 
| 14 13 | 
             
              end
         | 
| @@ -16,12 +15,12 @@ class GatkCaller | |
| 16 15 | 
             
              # Making recalibration table
         | 
| 17 16 | 
             
              def self.recalibrate_bam(log_dir ,gatk, index_fa, index_vcf, read_bam, recal_file, job_prefix, account, debug )
         | 
| 18 17 | 
             
                cmd = "echo 'starting recalibration table ' `date` >> #{log_dir}
         | 
| 19 | 
            -
                  qsub -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=4G  -hold_jid realignment_#{job_prefix} #{account} \
         | 
| 18 | 
            +
                  qsub -o #{log_dir} -V -cwd -b y -N recalibration_table_#{job_prefix} -l h_vmem=4G  -hold_jid realignment_#{job_prefix} #{account} \
         | 
| 20 19 | 
             
                  #{gatk} -knownSites #{index_vcf} -I #{read_bam} \
         | 
| 21 20 | 
             
                  -R #{index_fa} -T CountCovariates \
         | 
| 22 21 | 
             
                  -cov ReadGroupCovariate -cov QualityScoreCovariate -cov DinucCovariate \
         | 
| 23 22 | 
             
                  -cov CycleCovariate \
         | 
| 24 | 
            -
                  -recalFile #{recal_file} | 
| 23 | 
            +
                  -recalFile #{recal_file}  || exit 1 "
         | 
| 25 24 | 
             
                puts cmd
         | 
| 26 25 | 
             
                system(cmd) if debug == 1
         | 
| 27 26 | 
             
              end
         | 
| @@ -29,13 +28,13 @@ class GatkCaller | |
| 29 28 | 
             
              # Using recalibration table
         | 
| 30 29 | 
             
              def self.table_calibration(log_dir, gatk, index_fa, read_bam, recal_bam, recal_file, job_prefix, account, debug)
         | 
| 31 30 | 
             
                cmd = "echo 'recalibrating bam_file at ' `date` >> #{log_dir}
         | 
| 32 | 
            -
                  qsub -V -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=4G -hold_jid recalibration_table_#{job_prefix} #{account} \
         | 
| 31 | 
            +
                  qsub -V -o #{log_dir} -cwd -b y -N recalibration_#{job_prefix} -l h_vmem=4G -hold_jid recalibration_table_#{job_prefix} #{account} \
         | 
| 33 32 | 
             
                  #{gatk} \
         | 
| 34 33 | 
             
                  -R #{index_fa} \
         | 
| 35 34 | 
             
                  -I #{read_bam} \
         | 
| 36 35 | 
             
                  -T TableRecalibration \
         | 
| 37 36 | 
             
                  -o #{recal_bam} \
         | 
| 38 | 
            -
                  -recalFile #{recal_file}  | 
| 37 | 
            +
                  -recalFile #{recal_file} || exit 1"
         | 
| 39 38 | 
             
                puts cmd
         | 
| 40 39 | 
             
                system(cmd) if debug == 1
         | 
| 41 40 | 
             
              end
         | 
| @@ -43,12 +42,12 @@ class GatkCaller | |
| 43 42 | 
             
              # Preparation realignement
         | 
| 44 43 | 
             
              def self.prepare_realigne(log_dir, gatk, read_bam, index_fa, target_intervals, job_prefix, account, debug)
         | 
| 45 44 | 
             
                cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
         | 
| 46 | 
            -
                  qsub -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=4G -hold_jid indexing_#{job_prefix} #{account}\
         | 
| 45 | 
            +
                  qsub -o #{log_dir} -V -cwd -b y -N prep_realignment_#{job_prefix} -l h_vmem=4G -hold_jid indexing_#{job_prefix} #{account}\
         | 
| 47 46 | 
             
                  #{gatk} \
         | 
| 48 47 | 
             
                  -I #{read_bam} \
         | 
| 49 48 | 
             
                  -R #{index_fa} \
         | 
| 50 49 | 
             
                  -T RealignerTargetCreator \
         | 
| 51 | 
            -
                  -o #{target_intervals}"
         | 
| 50 | 
            +
                  -o #{target_intervals} || exit 1 "
         | 
| 52 51 | 
             
                puts cmd
         | 
| 53 52 | 
             
                system(cmd) if debug == 1
         | 
| 54 53 | 
             
              end
         | 
| @@ -56,13 +55,13 @@ class GatkCaller | |
| 56 55 | 
             
              # Realignment
         | 
| 57 56 | 
             
              def self.realigne(log_dir, gatk, read_bam, index_fa, target_intervals, realigned_bam, job_prefix, account, debug)
         | 
| 58 57 | 
             
                cmd = "echo 'preparing realignement at ' `date` >> #{log_dir}
         | 
| 59 | 
            -
                  qsub -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=4G -hold_jid prep_realignment_#{job_prefix} #{account} \
         | 
| 58 | 
            +
                  qsub -o #{log_dir} -V -cwd -b y -N realignment_#{job_prefix} -l h_vmem=4G -hold_jid prep_realignment_#{job_prefix} #{account} \
         | 
| 60 59 | 
             
                  #{gatk} \
         | 
| 61 60 | 
             
                  -I #{read_bam} \
         | 
| 62 61 | 
             
                  -R #{index_fa} \
         | 
| 63 62 | 
             
                  -T IndelRealigner \
         | 
| 64 63 | 
             
                  -targetIntervals #{target_intervals} \
         | 
| 65 | 
            -
                  -o #{realigned_bam} | 
| 64 | 
            +
                  -o #{realigned_bam}  || exit 1"
         | 
| 66 65 | 
             
                puts cmd
         | 
| 67 66 | 
             
                system(cmd) if debug == 1
         | 
| 68 67 | 
             
              end
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            class SamtoolsIndexing
         | 
| 2 | 
            -
              def self.call(bam_file, job_prefix, account, debug)
         | 
| 3 | 
            -
                cmd = "qsub -V -cwd -b y -N indexing_#{job_prefix} -l h_vmem=3G -hold_jid bwa_#{job_prefix} #{account} \
         | 
| 2 | 
            +
              def self.call(bam_file, job_prefix, account, debug, log_file)
         | 
| 3 | 
            +
                cmd = "qsub -o #{log_file} -V -cwd -b y -N indexing_#{job_prefix} -l h_vmem=3G -hold_jid bwa_#{job_prefix} #{account} \
         | 
| 4 4 | 
             
                  samtools index #{bam_file}"
         | 
| 5 5 | 
             
                puts cmd
         | 
| 6 6 | 
             
                system(cmd) if debug == 1
         |