mutations_caller_pipeline 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,142 @@
1
1
  #!/usr/bin/env ruby
2
-
3
2
  require 'mutations_caller_pipeline'
4
- MutationsCallerPipeline.hi
3
+ require 'optparse'
4
+ require 'rubygems'
5
+ require 'YAML'
6
+
7
+ usage =<<EOF
8
+ _________________________________________________________________________________________________
9
+
10
+ #{$0}
11
+ -m mutant_r1.fq -n mutant_r2.fq
12
+ [-w wildtype_r1.fq -x wildtype_r2.fq]
13
+ -c config.yml -v raw_vcf_file
14
+ _________________________________________________________________________________________________
15
+
16
+ #{$0} ...
17
+ ... is a tool to find mutations between the reference gene and a given test
18
+ population. To run this tool you must have bwa, samtools and GATK installed.
19
+ Also you should have the indices for bwa and GATK prepared.
20
+ NOTE: Only paired end reads are supported!
21
+
22
+ config.yml should look like this:
23
+ # config.yml
24
+ index_prefix: "path/to/prefix"
25
+ annotation_file: "path/to/annotation_file"
26
+ bwa: "path/to/bwa"
27
+ samtools: "path/to/samtools"
28
+ gatk: "path/to/GenomeAnalysisTK.jar"
29
+ _________________________________________________________________________________________________
30
+
31
+ EOF
32
+
33
+ options = { :mutant_r1 => nil,
34
+ :mutant_r2 => nil,
35
+ :wildtype_r1 => nil,
36
+ :wildtype_r2 => nil,
37
+ :index_prefix => nil,
38
+ :index_fa => nil,
39
+ :index_vcf => nil,
40
+ :annotation_file => nil,
41
+ :samtools => nil,
42
+ :gatk => nil,
43
+ :bwa => nil,
44
+ :vcf => nil
45
+ }
46
+
47
+ optparse = OptionParser.new do |opts|
48
+ opts.banner = usage
49
+
50
+ opts.on("-m", "--fwd_read_mutant DIR", :REQUIRED, String, "Path to fwd read of mutant") do |i|
51
+ options[:mutant_r1] = i
52
+ end
53
+
54
+ opts.on("-n", "--rev_read_mutant DIR", :REQUIRED, String, "Path to rev read of mutant") do |i|
55
+ options[:mutant_r2] = i
56
+ end
57
+
58
+ opts.on("-w", "--fwd_read_wildtype DIR", String, "Path to fwd read of wildtype, not mandatory") do |i|
59
+ options[:wildtype_r1] = i if i
60
+ end
61
+
62
+ opts.on("-x", "--rev_read_wildtype DIR", String, "Path to rev read of wildtype, not mandatory") do |i|
63
+ options[:wildtype_r2] = i if i
64
+ end
65
+
66
+ opts.on("-c", "--config DIR", String, "Set config file") do |path|
67
+ options.merge!(Hash[YAML::load(open(path)).map { |k, v| [k.to_sym, v] }])
68
+ end
69
+
70
+ opts.on("-v","--vcf [PATH]", "Output of pipeline") do |i|
71
+ options[:vcf] = i
72
+ end
73
+
74
+ opts.on_tail("-h", "--help", "Show this message") do
75
+ puts opts
76
+ exit
77
+ end
78
+ end
79
+
80
+ begin
81
+ optparse.parse!
82
+ mandatory = [:mutant_r1, :mutant_r2, :index_prefix, :annotation_file, :bwa, :samtools, :gatk, :vcf, :index_vcf, :index_fa]
83
+ missing = mandatory.select{ |param| options[param].nil? }
84
+ if !missing.empty?
85
+ puts "\nMissing options given or missing in config_file: \n\t#{missing.join(",\n\t")}"
86
+ puts optparse
87
+ exit
88
+ end
89
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
90
+ puts $!.to_s
91
+ puts optparse
92
+ exit
93
+ end
94
+
95
+ # pipeline starts here
96
+
97
+ # tmp files for output
98
+ random = (rand*1000000).floor.to_s
99
+ bam_file = "mutant_#{random}"
100
+ log_file = "#{random}.log"
101
+ target_intervals = "#{random}_target.intervals"
102
+ realigned_bam = "#{random}_realigned.bam"
103
+ recal_file = "#{random}_recal.csv"
104
+ recal_bam = "#{random}_recal.bam"
105
+
106
+ # BWA : First step mapping reads to reference
107
+ BwaCaller.call_paired_end(options[:mutant_r1],
108
+ options[:mutant_r2],
109
+ bam_file,
110
+ options[:index_prefix],
111
+ log_file,
112
+ options[:bwa],
113
+ options[:samtools])
114
+
115
+ if options[:wildtype_r1] && options[:wildtype_r2]
116
+ bam_file_wildtype = "wildtype_#{random}.bam"
117
+ BwaCaller.call_paired_end(options[:wildtype_r1],
118
+ options[:wildtype_r2],
119
+ bam_file_wildtype,
120
+ options[:index_prefix],
121
+ log_file,
122
+ options[:bwa],
123
+ options[:samtools])
124
+ end
125
+
126
+ # Indexing
127
+ bam_file = bam_file + ".bam"
128
+ SamtoolsIndexing.call(bam_file)
129
+ if bam_file_wildtype
130
+ SamtoolsIndexing.call(bam_file_wildtype)
131
+ end
132
+
133
+ # Realigne
134
+ GatkCaller.prepare_realigne(log_file, options[:gatk], bam_file , options[:index_fa], target_intervals)
135
+ GatkCaller.realigne(log_file,options[:gatk], bam_file, options[:index_fa], target_intervals, realigned_bam)
136
+
137
+ # Recalibration
138
+ GatkCaller.recalibrate_bam(log_file, options[:gatk], options[:index_fa], options[:index_vcf], realigned_bam, recal_file )
139
+ GatkCaller.table_calibration(log_file, options[:gatk], options[:index_fa], realigned_bam, recal_bam, recal_file)
140
+
141
+ # GATK: finding mutations
142
+ GatkCaller.call(log_file, options[:gatk], options[:index_fa], recal_bam, options[:vcf])
@@ -1,10 +1,11 @@
1
+ require 'mutations_caller_pipeline/bwa_caller'
2
+ require 'mutations_caller_pipeline/gatk_caller'
3
+ require 'mutations_caller_pipeline/samtools_indexing'
4
+
1
5
  class MutationsCallerPipeline
2
6
  def self.hi
3
7
  "Hello World!"
4
8
  end
5
9
  end
6
10
 
7
- require "mutations_caller_pipeline/bwa_caller"
8
- require "mutations_caller_pipeline/gatk_caller"
9
- require "mutations_caller_pipeline/samtools_indexing"
10
- require "mutations_caller_pipeline/location_file"
11
+
@@ -0,0 +1,17 @@
1
+ class BwaCaller
2
+ def self.call_single_end(r1,out_file,index, log_file, bwa, samtools)
3
+ cmd = "#{bwa} samse -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
4
+ <(#{bwa} aln #{index} #{r1} 2>>#{log_file}) \
5
+ #{r1} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
6
+ puts cmd
7
+ system('bash','-c',cmd )
8
+ end
9
+
10
+ def self.call_paired_end(r1, r2, out_file, index, log_file, bwa, samtools)
11
+ cmd = "#{bwa} sampe -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
12
+ <(#{bwa} aln #{index} #{r1} 2>>#{log_file} || exit 1) <(#{bwa} aln #{index} #{r2} 2>>#{log_file} ) \
13
+ #{r1} #{r2} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
14
+ puts cmd
15
+ system('bash','-c', cmd)
16
+ end
17
+ end
@@ -0,0 +1,57 @@
1
+ class GatkCaller
2
+ # INDEX is normal genom.fa
3
+ def self.call(log_dir, gatk, index_fa, read_bam, read_vcf)
4
+ cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
5
+ java -Xmx4g -jar #{gatk} -l INFO -R #{index_fa} -T UnifiedGenotyper \
6
+ -I #{read_bam} \
7
+ -o #{read_vcf} \
8
+ --genotype_likelihoods_model BOTH \
9
+ >> #{log_dir} 2>&1 || exit 1"
10
+ puts cmd
11
+ system(cmd)
12
+ end
13
+
14
+ def self.recalibrate_bam(log_dir ,gatk, index_fa, index_vcf, read_bam, recal_file )
15
+ cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
16
+ java -Xmx4g -jar #{gatk} -knownSites #{index_vcf} -I #{read_bam} \
17
+ -R #{index_fa} -T CountCovariates \
18
+ -cov ReadGroupCovariate -cov QualityScoreCovariate -cov DinucCovariate \
19
+ -cov CycleCovariate \
20
+ -recalFile #{recal_file} >> #{log_dir} 2>&1 || exit 1 "
21
+ puts cmd
22
+ system(cmd)
23
+ end
24
+
25
+ def self.table_calibration(log_dir, gatk, index_fa, read_bam, recal_bam, recal_file)
26
+ cmd = "java -Xmx4g -jar #{gatk} \
27
+ -R #{index_fa} \
28
+ -I #{read_bam} \
29
+ -T TableRecalibration \
30
+ -o #{recal_bam} \
31
+ -recalFile #{recal_file} >> #{log_dir} 2>&1 || exit 1"
32
+ puts cmd
33
+ system(cmd)
34
+ end
35
+
36
+ def self.prepare_realigne(log_dir, gatk, read_bam, index_fa, target_intervals)
37
+ cmd = "java -Xmx2g -jar #{gatk} \
38
+ -I #{read_bam} \
39
+ -R #{index_fa} \
40
+ -T RealignerTargetCreator \
41
+ -o #{target_intervals}"
42
+ puts cmd
43
+ system(cmd)
44
+ end
45
+
46
+ def self.realigne(log_dir, gatk, read_bam, index_fa, target_intervals, realigned_bam)
47
+ cmd = "java -Xmx4g -jar #{gatk} \
48
+ -I #{read_bam} \
49
+ -R #{index_fa} \
50
+ -T IndelRealigner \
51
+ -targetIntervals #{target_intervals} \
52
+ -o #{realigned_bam} >> #{log_dir} 2>&1 || exit 1"
53
+ puts cmd
54
+ system(cmd)
55
+ end
56
+
57
+ end
@@ -0,0 +1,21 @@
1
+ class LocationFile
2
+ def self.create(vcf_file, loction_file_output)
3
+ locations = File.open(vcf_file)
4
+ line = locations.readline()
5
+
6
+ locus = []
7
+ while line.include?('#')
8
+ location = line.scan(/##contig=<ID=+\w+/)
9
+ if !location.empty?()
10
+ location = location[0].split('=')
11
+ locus << location[-1]
12
+ end
13
+ line = locations.readline()
14
+ end
15
+
16
+ locations.close()
17
+ locus_file = File.new(location_file_output,'w')
18
+ locus_file.write(locus.join("\n"))
19
+ locus_file.close()
20
+ end
21
+ end
@@ -0,0 +1,7 @@
1
+ class SamtoolsIndexing
2
+ def self.call(bam_file)
3
+ cmd = "samtools index #{bam_file}"
4
+ puts cmd
5
+ system(cmd)
6
+ end
7
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mutations_caller_pipeline
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-20 00:00:00.000000000Z
12
+ date: 2012-01-20 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Using BWA to align and GATK to call the bases
15
15
  email:
@@ -19,9 +19,12 @@ executables:
19
19
  extensions: []
20
20
  extra_rdoc_files: []
21
21
  files:
22
- - lib/mutations_caller_pipeline.rb
23
- - test/test_mutations_caller_pipeline.rb
24
22
  - bin/mutations_caller_pipeline
23
+ - lib/mutations_caller_pipeline.rb
24
+ - lib/mutations_caller_pipeline/bwa_caller.rb
25
+ - lib/mutations_caller_pipeline/gatk_caller.rb
26
+ - lib/mutations_caller_pipeline/location_file.rb
27
+ - lib/mutations_caller_pipeline/samtools_indexing.rb
25
28
  homepage: https://github.com/khayer/mutations_caller_pipeline
26
29
  licenses: []
27
30
  post_install_message:
@@ -42,9 +45,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
45
  version: '0'
43
46
  requirements: []
44
47
  rubyforge_project: mutations_caller_pipeline
45
- rubygems_version: 1.8.15
48
+ rubygems_version: 1.8.10
46
49
  signing_key:
47
50
  specification_version: 3
48
51
  summary: Call Mutations for files.fq
49
- test_files:
50
- - test/test_mutations_caller_pipeline.rb
52
+ test_files: []
53
+ has_rdoc:
@@ -1,24 +0,0 @@
1
- require 'test/unit'
2
- require 'mutations_caller_pipeline'
3
-
4
- class MutationsCallerPipelineTest < Test::Unit::TestCase
5
- def test_hi
6
- assert_equal "Hello World!", MutationsCallerPipeline.hi
7
- end
8
-
9
- def test_bwa_caller
10
-
11
- end
12
-
13
- def test_samtools_indexing
14
-
15
- end
16
-
17
- def test_gatk_caller
18
-
19
- end
20
-
21
- def test_create_location_file
22
-
23
- end
24
- end