mutations_caller_pipeline 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,142 @@
1
1
  #!/usr/bin/env ruby
2
-
3
2
  require 'mutations_caller_pipeline'
4
- MutationsCallerPipeline.hi
3
+ require 'optparse'
4
+ require 'rubygems'
5
+ require 'YAML'
6
+
7
+ usage =<<EOF
8
+ _________________________________________________________________________________________________
9
+
10
+ #{$0}
11
+ -m mutant_r1.fq -n mutant_r2.fq
12
+ [-w wildtype_r1.fq -x wildtype_r2.fq]
13
+ -c config.yml -v raw_vcf_file
14
+ _________________________________________________________________________________________________
15
+
16
+ #{$0} ...
17
+ ... is a tool to find mutations between the reference gene and a given test
18
+ population. To run this tool you must have bwa, samtools and GATK installed.
19
+ Also you should have the indices for bwa and GATK prepared.
20
+ NOTE: Only paired end reads are supported!
21
+
22
+ config.yml should look like this:
23
+ # config.yml
24
+ index_prefix: "path/to/prefix"
25
+ annotation_file: "path/to/annotation_file"
26
+ bwa: "path/to/bwa"
27
+ samtools: "path/to/samtools"
28
+ gatk: "path/to/GenomeAnalysisTK.jar"
29
+ _________________________________________________________________________________________________
30
+
31
+ EOF
32
+
33
+ options = { :mutant_r1 => nil,
34
+ :mutant_r2 => nil,
35
+ :wildtype_r1 => nil,
36
+ :wildtype_r2 => nil,
37
+ :index_prefix => nil,
38
+ :index_fa => nil,
39
+ :index_vcf => nil,
40
+ :annotation_file => nil,
41
+ :samtools => nil,
42
+ :gatk => nil,
43
+ :bwa => nil,
44
+ :vcf => nil
45
+ }
46
+
47
+ optparse = OptionParser.new do |opts|
48
+ opts.banner = usage
49
+
50
+ opts.on("-m", "--fwd_read_mutant DIR", :REQUIRED, String, "Path to fwd read of mutant") do |i|
51
+ options[:mutant_r1] = i
52
+ end
53
+
54
+ opts.on("-n", "--rev_read_mutant DIR", :REQUIRED, String, "Path to rev read of mutant") do |i|
55
+ options[:mutant_r2] = i
56
+ end
57
+
58
+ opts.on("-w", "--fwd_read_wildtype DIR", String, "Path to fwd read of wildtype, not mandatory") do |i|
59
+ options[:wildtype_r1] = i if i
60
+ end
61
+
62
+ opts.on("-x", "--rev_read_wildtype DIR", String, "Path to rev read of wildtype, not mandatory") do |i|
63
+ options[:wildtype_r2] = i if i
64
+ end
65
+
66
+ opts.on("-c", "--config DIR", String, "Set config file") do |path|
67
+ options.merge!(Hash[YAML::load(open(path)).map { |k, v| [k.to_sym, v] }])
68
+ end
69
+
70
+ opts.on("-v","--vcf [PATH]", "Output of pipeline") do |i|
71
+ options[:vcf] = i
72
+ end
73
+
74
+ opts.on_tail("-h", "--help", "Show this message") do
75
+ puts opts
76
+ exit
77
+ end
78
+ end
79
+
80
+ begin
81
+ optparse.parse!
82
+ mandatory = [:mutant_r1, :mutant_r2, :index_prefix, :annotation_file, :bwa, :samtools, :gatk, :vcf, :index_vcf, :index_fa]
83
+ missing = mandatory.select{ |param| options[param].nil? }
84
+ if !missing.empty?
85
+ puts "\nMissing options given or missing in config_file: \n\t#{missing.join(",\n\t")}"
86
+ puts optparse
87
+ exit
88
+ end
89
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
90
+ puts $!.to_s
91
+ puts optparse
92
+ exit
93
+ end
94
+
95
+ # pipeline starts here
96
+
97
+ # tmp files for output
98
+ random = (rand*1000000).floor.to_s
99
+ bam_file = "mutant_#{random}"
100
+ log_file = "#{random}.log"
101
+ target_intervals = "#{random}_target.intervals"
102
+ realigned_bam = "#{random}_realigned.bam"
103
+ recal_file = "#{random}_recal.csv"
104
+ recal_bam = "#{random}_recal.bam"
105
+
106
+ # BWA : First step mapping reads to reference
107
+ BwaCaller.call_paired_end(options[:mutant_r1],
108
+ options[:mutant_r2],
109
+ bam_file,
110
+ options[:index_prefix],
111
+ log_file,
112
+ options[:bwa],
113
+ options[:samtools])
114
+
115
+ if options[:wildtype_r1] && options[:wildtype_r2]
116
+ bam_file_wildtype = "wildtype_#{random}.bam"
117
+ BwaCaller.call_paired_end(options[:wildtype_r1],
118
+ options[:wildtype_r2],
119
+ bam_file_wildtype,
120
+ options[:index_prefix],
121
+ log_file,
122
+ options[:bwa],
123
+ options[:samtools])
124
+ end
125
+
126
+ # Indexing
127
+ bam_file = bam_file + ".bam"
128
+ SamtoolsIndexing.call(bam_file)
129
+ if bam_file_wildtype
130
+ SamtoolsIndexing.call(bam_file_wildtype)
131
+ end
132
+
133
+ # Realigne
134
+ GatkCaller.prepare_realigne(log_file, options[:gatk], bam_file , options[:index_fa], target_intervals)
135
+ GatkCaller.realigne(log_file,options[:gatk], bam_file, options[:index_fa], target_intervals, realigned_bam)
136
+
137
+ # Recalibration
138
+ GatkCaller.recalibrate_bam(log_file, options[:gatk], options[:index_fa], options[:index_vcf], realigned_bam, recal_file )
139
+ GatkCaller.table_calibration(log_file, options[:gatk], options[:index_fa], realigned_bam, recal_bam, recal_file)
140
+
141
+ # GATK: finding mutations
142
+ GatkCaller.call(log_file, options[:gatk], options[:index_fa], recal_bam, options[:vcf])
@@ -1,10 +1,11 @@
1
+ require 'mutations_caller_pipeline/bwa_caller'
2
+ require 'mutations_caller_pipeline/gatk_caller'
3
+ require 'mutations_caller_pipeline/samtools_indexing'
4
+
1
5
  class MutationsCallerPipeline
2
6
  def self.hi
3
7
  "Hello World!"
4
8
  end
5
9
  end
6
10
 
7
- require "mutations_caller_pipeline/bwa_caller"
8
- require "mutations_caller_pipeline/gatk_caller"
9
- require "mutations_caller_pipeline/samtools_indexing"
10
- require "mutations_caller_pipeline/location_file"
11
+
@@ -0,0 +1,17 @@
1
+ class BwaCaller
2
+ def self.call_single_end(r1,out_file,index, log_file, bwa, samtools)
3
+ cmd = "#{bwa} samse -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
4
+ <(#{bwa} aln #{index} #{r1} 2>>#{log_file}) \
5
+ #{r1} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
6
+ puts cmd
7
+ system('bash','-c',cmd )
8
+ end
9
+
10
+ def self.call_paired_end(r1, r2, out_file, index, log_file, bwa, samtools)
11
+ cmd = "#{bwa} sampe -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
12
+ <(#{bwa} aln #{index} #{r1} 2>>#{log_file} || exit 1) <(#{bwa} aln #{index} #{r2} 2>>#{log_file} ) \
13
+ #{r1} #{r2} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
14
+ puts cmd
15
+ system('bash','-c', cmd)
16
+ end
17
+ end
@@ -0,0 +1,57 @@
1
+ class GatkCaller
2
+ # INDEX is normal genom.fa
3
+ def self.call(log_dir, gatk, index_fa, read_bam, read_vcf)
4
+ cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
5
+ java -Xmx4g -jar #{gatk} -l INFO -R #{index_fa} -T UnifiedGenotyper \
6
+ -I #{read_bam} \
7
+ -o #{read_vcf} \
8
+ --genotype_likelihoods_model BOTH \
9
+ >> #{log_dir} 2>&1 || exit 1"
10
+ puts cmd
11
+ system(cmd)
12
+ end
13
+
14
+ def self.recalibrate_bam(log_dir ,gatk, index_fa, index_vcf, read_bam, recal_file )
15
+ cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
16
+ java -Xmx4g -jar #{gatk} -knownSites #{index_vcf} -I #{read_bam} \
17
+ -R #{index_fa} -T CountCovariates \
18
+ -cov ReadGroupCovariate -cov QualityScoreCovariate -cov DinucCovariate \
19
+ -cov CycleCovariate \
20
+ -recalFile #{recal_file} >> #{log_dir} 2>&1 || exit 1 "
21
+ puts cmd
22
+ system(cmd)
23
+ end
24
+
25
+ def self.table_calibration(log_dir, gatk, index_fa, read_bam, recal_bam, recal_file)
26
+ cmd = "java -Xmx4g -jar #{gatk} \
27
+ -R #{index_fa} \
28
+ -I #{read_bam} \
29
+ -T TableRecalibration \
30
+ -o #{recal_bam} \
31
+ -recalFile #{recal_file} >> #{log_dir} 2>&1 || exit 1"
32
+ puts cmd
33
+ system(cmd)
34
+ end
35
+
36
+ def self.prepare_realigne(log_dir, gatk, read_bam, index_fa, target_intervals)
37
+ cmd = "java -Xmx2g -jar #{gatk} \
38
+ -I #{read_bam} \
39
+ -R #{index_fa} \
40
+ -T RealignerTargetCreator \
41
+ -o #{target_intervals}"
42
+ puts cmd
43
+ system(cmd)
44
+ end
45
+
46
+ def self.realigne(log_dir, gatk, read_bam, index_fa, target_intervals, realigned_bam)
47
+ cmd = "java -Xmx4g -jar #{gatk} \
48
+ -I #{read_bam} \
49
+ -R #{index_fa} \
50
+ -T IndelRealigner \
51
+ -targetIntervals #{target_intervals} \
52
+ -o #{realigned_bam} >> #{log_dir} 2>&1 || exit 1"
53
+ puts cmd
54
+ system(cmd)
55
+ end
56
+
57
+ end
@@ -0,0 +1,21 @@
1
+ class LocationFile
2
+ def self.create(vcf_file, loction_file_output)
3
+ locations = File.open(vcf_file)
4
+ line = locations.readline()
5
+
6
+ locus = []
7
+ while line.include?('#')
8
+ location = line.scan(/##contig=<ID=+\w+/)
9
+ if !location.empty?()
10
+ location = location[0].split('=')
11
+ locus << location[-1]
12
+ end
13
+ line = locations.readline()
14
+ end
15
+
16
+ locations.close()
17
+ locus_file = File.new(location_file_output,'w')
18
+ locus_file.write(locus.join("\n"))
19
+ locus_file.close()
20
+ end
21
+ end
@@ -0,0 +1,7 @@
1
+ class SamtoolsIndexing
2
+ def self.call(bam_file)
3
+ cmd = "samtools index #{bam_file}"
4
+ puts cmd
5
+ system(cmd)
6
+ end
7
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mutations_caller_pipeline
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-20 00:00:00.000000000Z
12
+ date: 2012-01-20 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Using BWA to align and GATK to call the bases
15
15
  email:
@@ -19,9 +19,12 @@ executables:
19
19
  extensions: []
20
20
  extra_rdoc_files: []
21
21
  files:
22
- - lib/mutations_caller_pipeline.rb
23
- - test/test_mutations_caller_pipeline.rb
24
22
  - bin/mutations_caller_pipeline
23
+ - lib/mutations_caller_pipeline.rb
24
+ - lib/mutations_caller_pipeline/bwa_caller.rb
25
+ - lib/mutations_caller_pipeline/gatk_caller.rb
26
+ - lib/mutations_caller_pipeline/location_file.rb
27
+ - lib/mutations_caller_pipeline/samtools_indexing.rb
25
28
  homepage: https://github.com/khayer/mutations_caller_pipeline
26
29
  licenses: []
27
30
  post_install_message:
@@ -42,9 +45,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
45
  version: '0'
43
46
  requirements: []
44
47
  rubyforge_project: mutations_caller_pipeline
45
- rubygems_version: 1.8.15
48
+ rubygems_version: 1.8.10
46
49
  signing_key:
47
50
  specification_version: 3
48
51
  summary: Call Mutations for files.fq
49
- test_files:
50
- - test/test_mutations_caller_pipeline.rb
52
+ test_files: []
53
+ has_rdoc:
@@ -1,24 +0,0 @@
1
- require 'test/unit'
2
- require 'mutations_caller_pipeline'
3
-
4
- class MutationsCallerPipelineTest < Test::Unit::TestCase
5
- def test_hi
6
- assert_equal "Hello World!", MutationsCallerPipeline.hi
7
- end
8
-
9
- def test_bwa_caller
10
-
11
- end
12
-
13
- def test_samtools_indexing
14
-
15
- end
16
-
17
- def test_gatk_caller
18
-
19
- end
20
-
21
- def test_create_location_file
22
-
23
- end
24
- end