bio-polyploid-tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +67 -0
- data/README +21 -0
- data/Rakefile +61 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +133 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +15 -0
- data/bin/find_best_blat_hit.rb +32 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +155 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/polymarker.rb +219 -0
- data/bin/snps_between_bams.rb +106 -0
- data/bio-polyploid-tools.gemspec +139 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +698 -0
- data/lib/bio/BIOExtensions.rb +186 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
- data/lib/bio/PolyploidTools/SNP.rb +681 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
- data/lib/bio/SAMToolsExtensions.rb +284 -0
- data/lib/bio/db/exonerate.rb +272 -0
- data/lib/bio/db/fastadb.rb +164 -0
- data/lib/bio/db/primer3.rb +673 -0
- data/lib/bioruby-polyploid-tools.rb +25 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/Test3Aspecific.csv +1 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +51 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +53 -0
- data/test/test_snp_parsing.rb +40 -0
- metadata +201 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
|
|
2
|
+
require_relative "SNP"
|
|
3
|
+
module Bio::PolyploidTools
|
|
4
|
+
class SNPSequenceException < RuntimeError
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
class SNPSequence < SNP
|
|
8
|
+
|
|
9
|
+
attr_accessor :sequence_original
|
|
10
|
+
#Format:
|
|
11
|
+
#snp name,chromsome from contig,microarray sequence
|
|
12
|
+
#BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
|
|
13
|
+
def self.parse(reg_str)
|
|
14
|
+
reg_str.chomp!
|
|
15
|
+
snp = SNPSequence.new
|
|
16
|
+
|
|
17
|
+
arr = reg_str.split(",")
|
|
18
|
+
|
|
19
|
+
if arr.size == 3
|
|
20
|
+
snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
|
|
21
|
+
elsif arr.size == 2
|
|
22
|
+
snp.gene, snp.sequence_original = arr
|
|
23
|
+
else
|
|
24
|
+
throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
|
|
25
|
+
end
|
|
26
|
+
#snp.position = snp.position.to_i
|
|
27
|
+
#snp.original.upcase!
|
|
28
|
+
#snp.snp.upcase!
|
|
29
|
+
snp.chromosome. strip!
|
|
30
|
+
snp.parse_sequence_snp
|
|
31
|
+
snp.exon_list = Hash.new()
|
|
32
|
+
snp
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def parse_snp
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def parse_sequence_snp
|
|
40
|
+
pos = 0
|
|
41
|
+
match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
|
|
42
|
+
if match_data
|
|
43
|
+
@position = Regexp.last_match(:pre).size + 1
|
|
44
|
+
@original = Regexp.last_match(:org)
|
|
45
|
+
@snp = Regexp.last_match(:snp)
|
|
46
|
+
amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
|
|
47
|
+
|
|
48
|
+
@template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
|
|
49
|
+
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'pathname'
|
|
3
|
+
#require_relative 'db/fasta.rb'
|
|
4
|
+
require 'bio'
|
|
5
|
+
|
|
6
|
+
require_relative 'db/fastadb.rb'
|
|
7
|
+
|
|
8
|
+
#require "set"
|
|
9
|
+
#require 'systemu'
|
|
10
|
+
#require 'json'
|
|
11
|
+
|
|
12
|
+
=begin
|
|
13
|
+
|
|
14
|
+
Extends the methods to be able to calculate the BFR and a consensus from the pileup
|
|
15
|
+
|
|
16
|
+
=end
|
|
17
|
+
|
|
18
|
+
class Bio::DB::Pileup
|
|
19
|
+
|
|
20
|
+
#attr_accessor :minumum_ratio_for_iup_consensus
|
|
21
|
+
#@minumum_ratio_for_iup_consensus = 0.20
|
|
22
|
+
|
|
23
|
+
#Returns a hash with the count of bases
|
|
24
|
+
|
|
25
|
+
def bases
|
|
26
|
+
return @bases if @bases
|
|
27
|
+
@bases = self.non_refs
|
|
28
|
+
#puts self.ref_count
|
|
29
|
+
@bases[self.ref_base.upcase.to_sym] = self.ref_count
|
|
30
|
+
@bases
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def base_coverage
|
|
34
|
+
total = 0
|
|
35
|
+
@bases.each do |k,v|
|
|
36
|
+
total += v
|
|
37
|
+
end
|
|
38
|
+
total
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def base_ratios
|
|
42
|
+
return @base_ratios if @base_ratios
|
|
43
|
+
bases = self.bases
|
|
44
|
+
@base_ratios = Hash.new
|
|
45
|
+
bases.each do |k,v|
|
|
46
|
+
@base_ratios[k] = v.to_f/self.base_coverage.to_f
|
|
47
|
+
end
|
|
48
|
+
@base_ratios
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
|
|
52
|
+
def consensus_iuap(minumum_ratio_for_iup_consensus)
|
|
53
|
+
minumum_ratio_for_iup_consensus
|
|
54
|
+
if @consensus_iuap.nil?
|
|
55
|
+
@consensus_iuap = self.ref_base.downcase
|
|
56
|
+
bases = self.bases
|
|
57
|
+
tmp = String.new
|
|
58
|
+
bases.each do |k,v|
|
|
59
|
+
tmp << k[0].to_s if v/self.coverage > minumum_ratio_for_iup_consensus
|
|
60
|
+
end
|
|
61
|
+
if tmp.length > 0
|
|
62
|
+
@consensus_iuap = Bio::NucleicAcid.to_IUAPC(tmp)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
@consensus_iuap
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class Bio::NucleicAcid
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def self.to_IUAPC(bases)
|
|
76
|
+
#puts "TADA"
|
|
77
|
+
base = IUPAC_CODES[bases.to_s.downcase.chars.sort.uniq.join]
|
|
78
|
+
if base == nil
|
|
79
|
+
p "Invalid base! #{base}"
|
|
80
|
+
base = 'n' #This is a patch... as one of the scripts failed here.
|
|
81
|
+
end
|
|
82
|
+
base.upcase
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def self.is_valid(code, base)
|
|
86
|
+
IUPAC_CODES[code.downcase].chars.include? base.downcase
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
#class Bio::DB::Sam::SAMException < RuntimeError
|
|
93
|
+
|
|
94
|
+
#end
|
|
95
|
+
|
|
96
|
+
class Bio::DB::Sam
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
attr_accessor :minumum_ratio_for_iup_consensus
|
|
100
|
+
attr_reader :cached_regions
|
|
101
|
+
#attr_accessor :pileup_cache
|
|
102
|
+
@minumum_ratio_for_iup_consensus = 0.20
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
#Same as mpilup, but it caches the pileup, so if you want several operations on the same set of regions
|
|
106
|
+
#the pile for different operations, it won't execute the mpilup command several times
|
|
107
|
+
#Whenever you finish using a region, call mpileup_clear_cache to free the cache
|
|
108
|
+
#The argument Region is required, as it will be the key for the underlying hash.
|
|
109
|
+
#We asume that the options are constant. If they are not, the cache mechanism may not be consistent.
|
|
110
|
+
#
|
|
111
|
+
#TODO: It may be good to load partially the pileup
|
|
112
|
+
def mpileup_cached (opts={})
|
|
113
|
+
raise SAMException.new(), "A region must be provided" unless opts[:r] or opts[:region]
|
|
114
|
+
@pileup_cache = Hash.new unless @pileup_cache
|
|
115
|
+
@cached_regions = Hash.new unless @cached_regions
|
|
116
|
+
|
|
117
|
+
region = opts[:r] ? opts[:r] : opts[:region]
|
|
118
|
+
opts[:r] = "#{region.to_s}"
|
|
119
|
+
opts[:region] = "#{region.to_s}"
|
|
120
|
+
opts[:A] = true
|
|
121
|
+
#reg = region.class == Bio::DB::Fasta::Region ? region : Bio::DB::Fasta::Region.parse_region(region.to_s)
|
|
122
|
+
|
|
123
|
+
unless @cached_regions[region.to_s]
|
|
124
|
+
@cached_regions[region.to_s] = Bio::DB::Fasta::Region.parse_region(region.to_s)
|
|
125
|
+
tmp = Array.new
|
|
126
|
+
@cached_regions[region.to_s].pileup = tmp
|
|
127
|
+
#puts "Loading #{region.to_s}"
|
|
128
|
+
mpileup(opts) do | pile |
|
|
129
|
+
# puts pile
|
|
130
|
+
tmp << pile
|
|
131
|
+
yield pile
|
|
132
|
+
end
|
|
133
|
+
else
|
|
134
|
+
# puts "Loaded, reruning #{region.to_s}"
|
|
135
|
+
@cached_regions.pileup[region.to_s] .each do | pile |
|
|
136
|
+
yield pile
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
#Clears the pileup cache. If a region is passed as argument, just the specified region is removed
|
|
142
|
+
#If no region is passed, the hash is emptied
|
|
143
|
+
def mpileup_clear_cache (region)
|
|
144
|
+
return unless @cached_regions
|
|
145
|
+
if region
|
|
146
|
+
@cached_regions[region.to_s] = nil
|
|
147
|
+
else
|
|
148
|
+
@cached_regions.clear
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
#Gets the coverage of a region from a pileup.
|
|
153
|
+
def average_coverage_from_pileup(opts={})
|
|
154
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
|
155
|
+
region = opts[:region]
|
|
156
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
|
157
|
+
@cached_regions[region].average_coverage
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
#
|
|
161
|
+
def coverages_from_pileup(opts={})
|
|
162
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
|
163
|
+
region = opts[:region]
|
|
164
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
|
165
|
+
@cached_regions[region].coverages
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def consensus_with_ambiguities(opts={})
|
|
169
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
|
170
|
+
region = opts[:region]
|
|
171
|
+
# p "consensus with ambiguities for: " << opts[:region]
|
|
172
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
|
173
|
+
@cached_regions[region].consensus
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def calculate_stats_from_pile(opts={})
|
|
177
|
+
min_cov = opts[:min_cov] ? opts[:min_cov] : 20
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
|
|
181
|
+
region = opts[:region]
|
|
182
|
+
|
|
183
|
+
mark_case = true if opts[:case]
|
|
184
|
+
# puts "Marcase: #{mark_case}"
|
|
185
|
+
reference = self.fetch_reference(region.entry, region.start, region.end).downcase
|
|
186
|
+
# p "calculationg from pile..." << region.to_s
|
|
187
|
+
base_ratios = Array.new(region.size, BASE_COUNT_ZERO)
|
|
188
|
+
bases = Array.new(region.size, BASE_COUNT_ZERO)
|
|
189
|
+
coverages = Array.new(region.size, 0)
|
|
190
|
+
total_cov = 0
|
|
191
|
+
|
|
192
|
+
self.mpileup_cached(:region=>"#{region.to_s}") do | pile |
|
|
193
|
+
#puts pile
|
|
194
|
+
#puts pile.coverage
|
|
195
|
+
bef=reference[pile.pos - region.start - 1 ]
|
|
196
|
+
if pile.coverage > min_cov
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
base_ratios[pile.pos - region.start ] = pile.base_ratios
|
|
200
|
+
reference[pile.pos - region.start - 1 ] = pile.consensus_iuap(0.20).upcase
|
|
201
|
+
coverages[pile.pos - region.start ] = pile.coverage.to_i
|
|
202
|
+
bases[pile.pos - region.start ] = pile.bases
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
end
|
|
206
|
+
#puts "#{pile.pos}\t#{bef}\t#{reference[pile.pos - region.start - 1 ]} "
|
|
207
|
+
total_cov += pile.coverage
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
#puts ">Ref\n#{reference}"
|
|
211
|
+
#puts ">Original\n#{r}"
|
|
212
|
+
region = @cached_regions[region.to_s]
|
|
213
|
+
region.coverages = coverages
|
|
214
|
+
region.base_ratios = base_ratios
|
|
215
|
+
region.consensus = Bio::Sequence.new(reference)
|
|
216
|
+
region.consensus.na
|
|
217
|
+
if region.orientation == :reverse
|
|
218
|
+
region.consensus.reverse_complement!()
|
|
219
|
+
end
|
|
220
|
+
region.average_coverage = total_cov.to_f/region.size.to_f
|
|
221
|
+
region.bases = bases
|
|
222
|
+
region
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
#BASE_COUNT_ZERO = {:A => 0, :C => 0, :G => 0, :T => 0}
|
|
228
|
+
|
|
229
|
+
#Gets an array with the proportions of the bases in the region. If there is no coverage, a
|
|
230
|
+
def base_ratios_in_region(opts={})
|
|
231
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
|
232
|
+
region = opts[:region]
|
|
233
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
|
234
|
+
@cached_regions[region].base_ratios
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
#Gets an array with the bsaes count in the region. If there is no coverage, a
|
|
238
|
+
def bases_in_region(opts={})
|
|
239
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
|
240
|
+
region = opts[:region]
|
|
241
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
|
242
|
+
@cached_regions[region].bases
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def extract_reads(opts={})
|
|
248
|
+
opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
|
|
249
|
+
fastq_filename = opts[:fastq]
|
|
250
|
+
fastq_file = opts[:fastq_file]
|
|
251
|
+
|
|
252
|
+
out = $stdout
|
|
253
|
+
|
|
254
|
+
print_fastq = Proc.new do |alignment|
|
|
255
|
+
out.puts "@#{alignment.qname}"
|
|
256
|
+
out.puts "#{alignment.seq}"
|
|
257
|
+
out.puts "+#{alignment.qname}"
|
|
258
|
+
out.puts "#{alignment.qual}"
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
fetch_with_function(chromosome, qstart, qstart+len, print_fastq)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
class Bio::DB::Fasta::Region
|
|
269
|
+
attr_accessor :pileup, :average_coverage, :snps, :reference, :base_ratios, :consensus, :coverages, :bases
|
|
270
|
+
|
|
271
|
+
#TODO: Debug, as it hasnt been tested in the actual code.
|
|
272
|
+
def base_ratios_for_base(base)
|
|
273
|
+
@all_ratios = Hash.new unless @all_ratios
|
|
274
|
+
unless @all_ratios[base]
|
|
275
|
+
ratios = Array.new
|
|
276
|
+
for i in (0..region.size-1)
|
|
277
|
+
ratios << @base_ratios[i][base]
|
|
278
|
+
end
|
|
279
|
+
@all_ratios[base] = ratios
|
|
280
|
+
end
|
|
281
|
+
@all_ratios[base]
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
end
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
# RYO %S\t%pi\t%ql\t%tl\t%g\t%V\n
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
module Bio::DB::Exonerate
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
#TODO: Make a proper object with generic parser
|
|
8
|
+
def self.align(opts={})
|
|
9
|
+
opts = {
|
|
10
|
+
:model => 'affine:local' ,
|
|
11
|
+
:ryo => "RESULT:\\t%S\\t%pi\\t%ql\\t%tl\\t%g\\t%V\\n" ,
|
|
12
|
+
:bestn => 20,
|
|
13
|
+
:percentage => 50
|
|
14
|
+
}
|
|
15
|
+
.merge(opts)
|
|
16
|
+
|
|
17
|
+
target=opts[:target]
|
|
18
|
+
query=opts[:query]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
cmdline = "exonerate --verbose 0 --showalignment no --bestn #{opts[:bestn]} --showvulgar no --model #{opts[:model]} --ryo '#{opts[:ryo]}' #{query} #{target}"
|
|
22
|
+
status, stdout, stderr = systemu cmdline
|
|
23
|
+
#$stderr.puts cmdline
|
|
24
|
+
if status.exitstatus == 0
|
|
25
|
+
alns = Array.new unless block_given?
|
|
26
|
+
stdout.each_line do |line|
|
|
27
|
+
aln = Alignment.parse_custom(line)
|
|
28
|
+
if aln
|
|
29
|
+
if block_given?
|
|
30
|
+
yield aln
|
|
31
|
+
else
|
|
32
|
+
alns << aln
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
return alns unless block_given?
|
|
37
|
+
else
|
|
38
|
+
raise ExonerateException.new(), "Error running exonerate. Command line was '#{cmdline}'\nExonerate STDERR was:\n#{stderr}"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ExonerateException < RuntimeError
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
class Alignment
|
|
47
|
+
attr_accessor :query_id, :query_start, :query_end, :query_strand
|
|
48
|
+
attr_accessor :target_id, :target_start, :target_end, :target_strand, :score
|
|
49
|
+
attr_accessor :vulgar_block, :pi, :ql, :tl, :g
|
|
50
|
+
attr_accessor :line
|
|
51
|
+
|
|
52
|
+
#This one day may grow to work with complex ryo....
|
|
53
|
+
def self.parse_custom(line)
|
|
54
|
+
fields=line.split(/\t/)
|
|
55
|
+
if fields[0] == "RESULT:"
|
|
56
|
+
al = Bio::DB::Exonerate::Alignment.new()
|
|
57
|
+
al.parse_sugar(fields[1])
|
|
58
|
+
al.pi = fields[2].to_f
|
|
59
|
+
al.ql = fields[3].to_i
|
|
60
|
+
al.tl = fields[4].to_i
|
|
61
|
+
al.g = fields[5]
|
|
62
|
+
al.parse_vulgar(fields[6])
|
|
63
|
+
al.line = line
|
|
64
|
+
return al
|
|
65
|
+
else
|
|
66
|
+
return nil
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def identity
|
|
71
|
+
@pi
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def parse_sugar(sugar_str)
|
|
75
|
+
@query_id, @query_start, @query_end, @query_strand, @target_id, @target_start, @target_end, @target_strand, @score = sugar_str.split(/\s+/)
|
|
76
|
+
|
|
77
|
+
@query_start = @query_start.to_i
|
|
78
|
+
@query_end = @query_end.to_i
|
|
79
|
+
@target_start = @target_start.to_i
|
|
80
|
+
@target_end = @target_end.to_i
|
|
81
|
+
@score = @score.to_f
|
|
82
|
+
|
|
83
|
+
if @target_strand == "+"
|
|
84
|
+
@target_strand = :forward
|
|
85
|
+
elsif @target_strand == "-"
|
|
86
|
+
@target_strand = :reverse
|
|
87
|
+
else
|
|
88
|
+
raise ExonerateException.new(), "Ivalid target orientation #{@target_strand} for line:\n#{sugar_str}"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if @query_strand == "+"
|
|
93
|
+
@query_strand = :forward
|
|
94
|
+
elsif @query_strand == "-"
|
|
95
|
+
@query_strand = :reverse
|
|
96
|
+
else
|
|
97
|
+
raise ExonerateException.new(), "Ivalid query orientation #{@query_strand} for line:\n#{sugar_str}"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
raise ExonerateException.new(), "Inconsistent orientation (forward, query)" if @query_strand == :forward and @query_start > @query_end
|
|
101
|
+
raise ExonerateException.new(), "Inconsistent orientation (reverse, query)" if @query_strand == :reverse and @query_start < @query_end
|
|
102
|
+
raise ExonerateException.new(), "Inconsistent orientation (forward, target)" if @target_strand == :forward and @target_start > @target_end
|
|
103
|
+
raise ExonerateException.new(), "Inconsistent orientation (reverse, target)" if @target_strand == :reverse and @target_start < @target_end
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
self
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
#The vulgar has to be parsed AFTER the sugar, otherwise it is impossible to determine the orientations
|
|
111
|
+
def parse_vulgar(vulgar_str)
|
|
112
|
+
|
|
113
|
+
tarcurrent = @target_start
|
|
114
|
+
query_current = @query_start
|
|
115
|
+
target_multiply = 1
|
|
116
|
+
query_multiply = 1
|
|
117
|
+
|
|
118
|
+
if @target_strand == :reverse
|
|
119
|
+
target_multiply = -1
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
if @query_strand == :reverse
|
|
123
|
+
query_multiply = -1
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
@vulgar_block = Array.new
|
|
127
|
+
# p "VULGAR #{vulgar_str}"
|
|
128
|
+
vulgar_str.split(/\s/).each_slice(3) do | block |
|
|
129
|
+
# p block
|
|
130
|
+
vulgar = Vulgar.new(block[0].to_sym, block[1].to_i, block[2].to_i, tarcurrent, target_multiply, query_current, query_multiply, self)
|
|
131
|
+
query_current = vulgar.query_end
|
|
132
|
+
tarcurrent = vulgar.target_end
|
|
133
|
+
vulgar_block << vulgar
|
|
134
|
+
end
|
|
135
|
+
self
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
#This assumes that the gene is the query and the chromosome is the target
|
|
139
|
+
def exon_on_gene_position(position)
|
|
140
|
+
@vulgar_block.each do |vulgar|
|
|
141
|
+
if position.between?(vulgar.query_start, vulgar.query_end)
|
|
142
|
+
return vulgar
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
nil
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def tarpostion_from_query_position(position)
|
|
149
|
+
ret = nil
|
|
150
|
+
vulgar_block = exon_on_gene_position(position)
|
|
151
|
+
ret
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def print_features
|
|
155
|
+
out = String.new
|
|
156
|
+
|
|
157
|
+
@vulgar_block.each do | vulgar |
|
|
158
|
+
out << vulgar.to_s << "\n"
|
|
159
|
+
end
|
|
160
|
+
out
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class Vulgar
|
|
166
|
+
attr_reader :label, :query_length, :target_length, :query_start, :query_end, :target_start, :target_end, :record, :snp_in_gap
|
|
167
|
+
def initialize(label, ql, tl, target_start, target_multiply, query_start, query_multiply, record)
|
|
168
|
+
@label = label
|
|
169
|
+
@query_length = ql
|
|
170
|
+
@target_length = tl
|
|
171
|
+
@query_start = query_start
|
|
172
|
+
@query_end = query_start + (query_multiply * query_length)
|
|
173
|
+
@target_start = target_start
|
|
174
|
+
@target_end = target_start + (target_multiply * target_length)
|
|
175
|
+
@record = record
|
|
176
|
+
@snp_in_gap = false
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def to_s
|
|
180
|
+
out = String.new
|
|
181
|
+
out << @label.to_s << "\t" << @query_length.to_s << "\t" << @target_length.to_s << "\t" << @query_start.to_s << "\t" << @query_end.to_s << "\t" << @target_start.to_s << "\t" << @target_end.to_s
|
|
182
|
+
out
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def query_id
|
|
186
|
+
record.query_id
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def target_id
|
|
190
|
+
record.target_id
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def target_flanking_region_from_position(position, flanking_size)
|
|
194
|
+
reg = reg = Bio::DB::Fasta::Region.new()
|
|
195
|
+
reg.entry = target_id
|
|
196
|
+
target_snp_pos = target_position_from_query(position)
|
|
197
|
+
return nil if snp_in_gap
|
|
198
|
+
reg.orientation = record.target_strand
|
|
199
|
+
reg.start = target_snp_pos - flanking_size
|
|
200
|
+
reg.end = target_snp_pos + flanking_size
|
|
201
|
+
raise ExonerateException.new "Target Query out of bounds!" unless position.between?(query_start, query_end)
|
|
202
|
+
#puts "Flanking region for #{position} in exon between ( #{query_id}:#{query_start}-#{query_end}), the target in #{target_snp_pos} ( #{target_id}:#{target_start}-#{target_end}) "
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
reg
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def target_position_from_query(position)
|
|
209
|
+
raise ExonerateException.new(), "Position: #{position} not in range (#{query_start}-#{query_end}) #{self.to_s} " unless position.between?(query_start, query_end) or position.between?(query_end, query_start)
|
|
210
|
+
offset = 0
|
|
211
|
+
ret = 0
|
|
212
|
+
if record.query_strand == :forward
|
|
213
|
+
offset = position - query_start
|
|
214
|
+
elsif record.query_strand == :reverse
|
|
215
|
+
offset = query_start - position
|
|
216
|
+
else
|
|
217
|
+
raise ExonerateException.new(), "The strand is not forward or reverse (#{record.query_strand}) ! #{self.inspect}"
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
if record.target_strand == :forward
|
|
221
|
+
ret = target_start + offset
|
|
222
|
+
elsif record.target_strand == :reverse
|
|
223
|
+
ret = target_start - offset + 1
|
|
224
|
+
else
|
|
225
|
+
raise ExonerateException.new(), "The strand is not forward or reverse! #{self.inspect}"
|
|
226
|
+
end
|
|
227
|
+
#THis is in case the position is on a gap.
|
|
228
|
+
if @target_length == 0 and label == :G
|
|
229
|
+
#puts "Returning nil"
|
|
230
|
+
@snp_in_gap = true
|
|
231
|
+
ret = target_start
|
|
232
|
+
end
|
|
233
|
+
raise ExonerateException.new(), "Return position #{ret} outside block (#{target_start}-#{target_end}, #{self.inspect})" unless ret.between?(target_start, target_end) or ret.between?(target_end, target_start)
|
|
234
|
+
ret
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def query_region
|
|
238
|
+
reg = Bio::DB::Fasta::Region.new()
|
|
239
|
+
reg.entry = query_id
|
|
240
|
+
reg.orientation = record.query_strand
|
|
241
|
+
if record.query_strand == :forward
|
|
242
|
+
reg.start = @query_start + 1
|
|
243
|
+
reg.end = @query_end
|
|
244
|
+
elsif record.query_strand == :reverse
|
|
245
|
+
reg.start = @query_end + 1
|
|
246
|
+
reg.end = @query_start
|
|
247
|
+
else
|
|
248
|
+
raise ExonerateException.new(), "Ivalid query orientation #{@query_strand}"
|
|
249
|
+
end
|
|
250
|
+
reg
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def target_region
|
|
254
|
+
reg = Bio::DB::Fasta::Region.new()
|
|
255
|
+
|
|
256
|
+
reg.entry = target_id
|
|
257
|
+
reg.orientation = record.target_strand
|
|
258
|
+
if record.target_strand == :forward
|
|
259
|
+
reg.start = @target_start + 1
|
|
260
|
+
reg.end = @target_end
|
|
261
|
+
elsif record.target_strand == :reverse
|
|
262
|
+
reg.start = @target_end + 1
|
|
263
|
+
reg.end = @target_start
|
|
264
|
+
else
|
|
265
|
+
raise ExonerateException.new(), "Ivalid target orientation #{@target_strand}"
|
|
266
|
+
end
|
|
267
|
+
reg
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
end
|