mutations_caller_pipeline 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/mutations_caller_pipeline +140 -2
- data/lib/mutations_caller_pipeline.rb +5 -4
- data/lib/mutations_caller_pipeline/bwa_caller.rb +17 -0
- data/lib/mutations_caller_pipeline/gatk_caller.rb +57 -0
- data/lib/mutations_caller_pipeline/location_file.rb +21 -0
- data/lib/mutations_caller_pipeline/samtools_indexing.rb +7 -0
- metadata +10 -7
- data/test/test_mutations_caller_pipeline.rb +0 -24
@@ -1,4 +1,142 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
2
|
require 'mutations_caller_pipeline'
|
4
|
-
|
3
|
+
require 'optparse'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'YAML'
|
6
|
+
|
7
|
+
usage =<<EOF
|
8
|
+
_________________________________________________________________________________________________
|
9
|
+
|
10
|
+
#{$0}
|
11
|
+
-m mutant_r1.fq -n mutant_r2.fq
|
12
|
+
[-w wildtype_r1.fq -x wildtype_r2.fq]
|
13
|
+
-c config.yml -v raw_vcf_file
|
14
|
+
_________________________________________________________________________________________________
|
15
|
+
|
16
|
+
#{$0} ...
|
17
|
+
... is a tool to find mutations between the reference gene and a given test
|
18
|
+
population. To run this tool you must have bwa, samtools and GATK installed.
|
19
|
+
Also you should have the indices for bwa and GATK prepared.
|
20
|
+
NOTE: Only paired end reads are supported!
|
21
|
+
|
22
|
+
config.yml should look like this:
|
23
|
+
# config.yml
|
24
|
+
index_prefix: "path/to/prefix"
|
25
|
+
annotation_file: "path/to/annotation_file"
|
26
|
+
bwa: "path/to/bwa"
|
27
|
+
samtools: "path/to/samtools"
|
28
|
+
gatk: "path/to/GenomeAnalysisTK.jar"
|
29
|
+
_________________________________________________________________________________________________
|
30
|
+
|
31
|
+
EOF
|
32
|
+
|
33
|
+
options = { :mutant_r1 => nil,
|
34
|
+
:mutant_r2 => nil,
|
35
|
+
:wildtype_r1 => nil,
|
36
|
+
:wildtype_r2 => nil,
|
37
|
+
:index_prefix => nil,
|
38
|
+
:index_fa => nil,
|
39
|
+
:index_vcf => nil,
|
40
|
+
:annotation_file => nil,
|
41
|
+
:samtools => nil,
|
42
|
+
:gatk => nil,
|
43
|
+
:bwa => nil,
|
44
|
+
:vcf => nil
|
45
|
+
}
|
46
|
+
|
47
|
+
optparse = OptionParser.new do |opts|
|
48
|
+
opts.banner = usage
|
49
|
+
|
50
|
+
opts.on("-m", "--fwd_read_mutant DIR", :REQUIRED, String, "Path to fwd read of mutant") do |i|
|
51
|
+
options[:mutant_r1] = i
|
52
|
+
end
|
53
|
+
|
54
|
+
opts.on("-n", "--rev_read_mutant DIR", :REQUIRED, String, "Path to rev read of mutant") do |i|
|
55
|
+
options[:mutant_r2] = i
|
56
|
+
end
|
57
|
+
|
58
|
+
opts.on("-w", "--fwd_read_wildtype DIR", String, "Path to fwd read of wildtype, not mandatory") do |i|
|
59
|
+
options[:wildtype_r1] = i if i
|
60
|
+
end
|
61
|
+
|
62
|
+
opts.on("-x", "--rev_read_wildtype DIR", String, "Path to rev read of wildtype, not mandatory") do |i|
|
63
|
+
options[:wildtype_r2] = i if i
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on("-c", "--config DIR", String, "Set config file") do |path|
|
67
|
+
options.merge!(Hash[YAML::load(open(path)).map { |k, v| [k.to_sym, v] }])
|
68
|
+
end
|
69
|
+
|
70
|
+
opts.on("-v","--vcf [PATH]", "Output of pipeline") do |i|
|
71
|
+
options[:vcf] = i
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
75
|
+
puts opts
|
76
|
+
exit
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
begin
|
81
|
+
optparse.parse!
|
82
|
+
mandatory = [:mutant_r1, :mutant_r2, :index_prefix, :annotation_file, :bwa, :samtools, :gatk, :vcf, :index_vcf, :index_fa]
|
83
|
+
missing = mandatory.select{ |param| options[param].nil? }
|
84
|
+
if !missing.empty?
|
85
|
+
puts "\nMissing options given or missing in config_file: \n\t#{missing.join(",\n\t")}"
|
86
|
+
puts optparse
|
87
|
+
exit
|
88
|
+
end
|
89
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
90
|
+
puts $!.to_s
|
91
|
+
puts optparse
|
92
|
+
exit
|
93
|
+
end
|
94
|
+
|
95
|
+
# pipeline starts here
|
96
|
+
|
97
|
+
# tmp files for output
|
98
|
+
random = (rand*1000000).floor.to_s
|
99
|
+
bam_file = "mutant_#{random}"
|
100
|
+
log_file = "#{random}.log"
|
101
|
+
target_intervals = "#{random}_target.intervals"
|
102
|
+
realigned_bam = "#{random}_realigned.bam"
|
103
|
+
recal_file = "#{random}_recal.csv"
|
104
|
+
recal_bam = "#{random}_recal.bam"
|
105
|
+
|
106
|
+
# BWA : First step mapping reads to reference
|
107
|
+
BwaCaller.call_paired_end(options[:mutant_r1],
|
108
|
+
options[:mutant_r2],
|
109
|
+
bam_file,
|
110
|
+
options[:index_prefix],
|
111
|
+
log_file,
|
112
|
+
options[:bwa],
|
113
|
+
options[:samtools])
|
114
|
+
|
115
|
+
if options[:wildtype_r1] && options[:wildtype_r2]
|
116
|
+
bam_file_wildtype = "wildtype_#{random}.bam"
|
117
|
+
BwaCaller.call_paired_end(options[:wildtype_r1],
|
118
|
+
options[:wildtype_r2],
|
119
|
+
bam_file_wildtype,
|
120
|
+
options[:index_prefix],
|
121
|
+
log_file,
|
122
|
+
options[:bwa],
|
123
|
+
options[:samtools])
|
124
|
+
end
|
125
|
+
|
126
|
+
# Indexing
|
127
|
+
bam_file = bam_file + ".bam"
|
128
|
+
SamtoolsIndexing.call(bam_file)
|
129
|
+
if bam_file_wildtype
|
130
|
+
SamtoolsIndexing.call(bam_file_wildtype)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Realigne
|
134
|
+
GatkCaller.prepare_realigne(log_file, options[:gatk], bam_file , options[:index_fa], target_intervals)
|
135
|
+
GatkCaller.realigne(log_file,options[:gatk], bam_file, options[:index_fa], target_intervals, realigned_bam)
|
136
|
+
|
137
|
+
# Recalibration
|
138
|
+
GatkCaller.recalibrate_bam(log_file, options[:gatk], options[:index_fa], options[:index_vcf], realigned_bam, recal_file )
|
139
|
+
GatkCaller.table_calibration(log_file, options[:gatk], options[:index_fa], realigned_bam, recal_bam, recal_file)
|
140
|
+
|
141
|
+
# GATK: finding mutations
|
142
|
+
GatkCaller.call(log_file, options[:gatk], options[:index_fa], recal_bam, options[:vcf])
|
@@ -1,10 +1,11 @@
|
|
1
|
+
require 'mutations_caller_pipeline/bwa_caller'
|
2
|
+
require 'mutations_caller_pipeline/gatk_caller'
|
3
|
+
require 'mutations_caller_pipeline/samtools_indexing'
|
4
|
+
|
1
5
|
class MutationsCallerPipeline
|
2
6
|
def self.hi
|
3
7
|
"Hello World!"
|
4
8
|
end
|
5
9
|
end
|
6
10
|
|
7
|
-
|
8
|
-
require "mutations_caller_pipeline/gatk_caller"
|
9
|
-
require "mutations_caller_pipeline/samtools_indexing"
|
10
|
-
require "mutations_caller_pipeline/location_file"
|
11
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class BwaCaller
|
2
|
+
def self.call_single_end(r1,out_file,index, log_file, bwa, samtools)
|
3
|
+
cmd = "#{bwa} samse -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
|
4
|
+
<(#{bwa} aln #{index} #{r1} 2>>#{log_file}) \
|
5
|
+
#{r1} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
|
6
|
+
puts cmd
|
7
|
+
system('bash','-c',cmd )
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.call_paired_end(r1, r2, out_file, index, log_file, bwa, samtools)
|
11
|
+
cmd = "#{bwa} sampe -r '@RG\tID:foo\tSM:bar\tPL:Illumina' #{index} \
|
12
|
+
<(#{bwa} aln #{index} #{r1} 2>>#{log_file} || exit 1) <(#{bwa} aln #{index} #{r2} 2>>#{log_file} ) \
|
13
|
+
#{r1} #{r2} 2>>#{log_file} | #{samtools} view -Su - 2>>#{log_file} | #{samtools} sort - #{out_file} 2>>#{log_file}"
|
14
|
+
puts cmd
|
15
|
+
system('bash','-c', cmd)
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
class GatkCaller
|
2
|
+
# INDEX is normal genom.fa
|
3
|
+
def self.call(log_dir, gatk, index_fa, read_bam, read_vcf)
|
4
|
+
cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
|
5
|
+
java -Xmx4g -jar #{gatk} -l INFO -R #{index_fa} -T UnifiedGenotyper \
|
6
|
+
-I #{read_bam} \
|
7
|
+
-o #{read_vcf} \
|
8
|
+
--genotype_likelihoods_model BOTH \
|
9
|
+
>> #{log_dir} 2>&1 || exit 1"
|
10
|
+
puts cmd
|
11
|
+
system(cmd)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.recalibrate_bam(log_dir ,gatk, index_fa, index_vcf, read_bam, recal_file )
|
15
|
+
cmd = "echo 'starting GATK for mutant at ' `date` >> #{log_dir}
|
16
|
+
java -Xmx4g -jar #{gatk} -knownSites #{index_vcf} -I #{read_bam} \
|
17
|
+
-R #{index_fa} -T CountCovariates \
|
18
|
+
-cov ReadGroupCovariate -cov QualityScoreCovariate -cov DinucCovariate \
|
19
|
+
-cov CycleCovariate \
|
20
|
+
-recalFile #{recal_file} >> #{log_dir} 2>&1 || exit 1 "
|
21
|
+
puts cmd
|
22
|
+
system(cmd)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.table_calibration(log_dir, gatk, index_fa, read_bam, recal_bam, recal_file)
|
26
|
+
cmd = "java -Xmx4g -jar #{gatk} \
|
27
|
+
-R #{index_fa} \
|
28
|
+
-I #{read_bam} \
|
29
|
+
-T TableRecalibration \
|
30
|
+
-o #{recal_bam} \
|
31
|
+
-recalFile #{recal_file} >> #{log_dir} 2>&1 || exit 1"
|
32
|
+
puts cmd
|
33
|
+
system(cmd)
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.prepare_realigne(log_dir, gatk, read_bam, index_fa, target_intervals)
|
37
|
+
cmd = "java -Xmx2g -jar #{gatk} \
|
38
|
+
-I #{read_bam} \
|
39
|
+
-R #{index_fa} \
|
40
|
+
-T RealignerTargetCreator \
|
41
|
+
-o #{target_intervals}"
|
42
|
+
puts cmd
|
43
|
+
system(cmd)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.realigne(log_dir, gatk, read_bam, index_fa, target_intervals, realigned_bam)
|
47
|
+
cmd = "java -Xmx4g -jar #{gatk} \
|
48
|
+
-I #{read_bam} \
|
49
|
+
-R #{index_fa} \
|
50
|
+
-T IndelRealigner \
|
51
|
+
-targetIntervals #{target_intervals} \
|
52
|
+
-o #{realigned_bam} >> #{log_dir} 2>&1 || exit 1"
|
53
|
+
puts cmd
|
54
|
+
system(cmd)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class LocationFile
|
2
|
+
def self.create(vcf_file, loction_file_output)
|
3
|
+
locations = File.open(vcf_file)
|
4
|
+
line = locations.readline()
|
5
|
+
|
6
|
+
locus = []
|
7
|
+
while line.include?('#')
|
8
|
+
location = line.scan(/##contig=<ID=+\w+/)
|
9
|
+
if !location.empty?()
|
10
|
+
location = location[0].split('=')
|
11
|
+
locus << location[-1]
|
12
|
+
end
|
13
|
+
line = locations.readline()
|
14
|
+
end
|
15
|
+
|
16
|
+
locations.close()
|
17
|
+
locus_file = File.new(location_file_output,'w')
|
18
|
+
locus_file.write(locus.join("\n"))
|
19
|
+
locus_file.close()
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mutations_caller_pipeline
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-01-20 00:00:00.
|
12
|
+
date: 2012-01-20 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Using BWA to align and GATK to call the bases
|
15
15
|
email:
|
@@ -19,9 +19,12 @@ executables:
|
|
19
19
|
extensions: []
|
20
20
|
extra_rdoc_files: []
|
21
21
|
files:
|
22
|
-
- lib/mutations_caller_pipeline.rb
|
23
|
-
- test/test_mutations_caller_pipeline.rb
|
24
22
|
- bin/mutations_caller_pipeline
|
23
|
+
- lib/mutations_caller_pipeline.rb
|
24
|
+
- lib/mutations_caller_pipeline/bwa_caller.rb
|
25
|
+
- lib/mutations_caller_pipeline/gatk_caller.rb
|
26
|
+
- lib/mutations_caller_pipeline/location_file.rb
|
27
|
+
- lib/mutations_caller_pipeline/samtools_indexing.rb
|
25
28
|
homepage: https://github.com/khayer/mutations_caller_pipeline
|
26
29
|
licenses: []
|
27
30
|
post_install_message:
|
@@ -42,9 +45,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
42
45
|
version: '0'
|
43
46
|
requirements: []
|
44
47
|
rubyforge_project: mutations_caller_pipeline
|
45
|
-
rubygems_version: 1.8.
|
48
|
+
rubygems_version: 1.8.10
|
46
49
|
signing_key:
|
47
50
|
specification_version: 3
|
48
51
|
summary: Call Mutations for files.fq
|
49
|
-
test_files:
|
50
|
-
|
52
|
+
test_files: []
|
53
|
+
has_rdoc:
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'test/unit'
|
2
|
-
require 'mutations_caller_pipeline'
|
3
|
-
|
4
|
-
class MutationsCallerPipelineTest < Test::Unit::TestCase
|
5
|
-
def test_hi
|
6
|
-
assert_equal "Hello World!", MutationsCallerPipeline.hi
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_bwa_caller
|
10
|
-
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_samtools_indexing
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_gatk_caller
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_create_location_file
|
22
|
-
|
23
|
-
end
|
24
|
-
end
|