bio-polyploid-tools 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +67 -0
- data/README +21 -0
- data/Rakefile +61 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +133 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +15 -0
- data/bin/find_best_blat_hit.rb +32 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +155 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/polymarker.rb +219 -0
- data/bin/snps_between_bams.rb +106 -0
- data/bio-polyploid-tools.gemspec +139 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +698 -0
- data/lib/bio/BIOExtensions.rb +186 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
- data/lib/bio/PolyploidTools/SNP.rb +681 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
- data/lib/bio/SAMToolsExtensions.rb +284 -0
- data/lib/bio/db/exonerate.rb +272 -0
- data/lib/bio/db/fastadb.rb +164 -0
- data/lib/bio/db/primer3.rb +673 -0
- data/lib/bioruby-polyploid-tools.rb +25 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/Test3Aspecific.csv +1 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +51 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +53 -0
- data/test/test_snp_parsing.rb +40 -0
- metadata +201 -0
@@ -0,0 +1,56 @@
|
|
1
|
+
|
2
|
+
require_relative "SNP"
|
3
|
+
module Bio::PolyploidTools
|
4
|
+
class SNPSequenceException < RuntimeError
|
5
|
+
end
|
6
|
+
|
7
|
+
class SNPSequence < SNP
|
8
|
+
|
9
|
+
attr_accessor :sequence_original
|
10
|
+
#Format:
|
11
|
+
#snp name,chromsome from contig,microarray sequence
|
12
|
+
#BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
|
13
|
+
def self.parse(reg_str)
|
14
|
+
reg_str.chomp!
|
15
|
+
snp = SNPSequence.new
|
16
|
+
|
17
|
+
arr = reg_str.split(",")
|
18
|
+
|
19
|
+
if arr.size == 3
|
20
|
+
snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
|
21
|
+
elsif arr.size == 2
|
22
|
+
snp.gene, snp.sequence_original = arr
|
23
|
+
else
|
24
|
+
throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
|
25
|
+
end
|
26
|
+
#snp.position = snp.position.to_i
|
27
|
+
#snp.original.upcase!
|
28
|
+
#snp.snp.upcase!
|
29
|
+
snp.chromosome. strip!
|
30
|
+
snp.parse_sequence_snp
|
31
|
+
snp.exon_list = Hash.new()
|
32
|
+
snp
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_snp
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
def parse_sequence_snp
|
40
|
+
pos = 0
|
41
|
+
match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
|
42
|
+
if match_data
|
43
|
+
@position = Regexp.last_match(:pre).size + 1
|
44
|
+
@original = Regexp.last_match(:org)
|
45
|
+
@snp = Regexp.last_match(:snp)
|
46
|
+
amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
|
47
|
+
|
48
|
+
@template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,284 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'pathname'
|
3
|
+
#require_relative 'db/fasta.rb'
|
4
|
+
require 'bio'
|
5
|
+
|
6
|
+
require_relative 'db/fastadb.rb'
|
7
|
+
|
8
|
+
#require "set"
|
9
|
+
#require 'systemu'
|
10
|
+
#require 'json'
|
11
|
+
|
12
|
+
=begin
|
13
|
+
|
14
|
+
Extends the methods to be able to calculate the BFR and a consensus from the pileup
|
15
|
+
|
16
|
+
=end
|
17
|
+
|
18
|
+
class Bio::DB::Pileup
|
19
|
+
|
20
|
+
#attr_accessor :minumum_ratio_for_iup_consensus
|
21
|
+
#@minumum_ratio_for_iup_consensus = 0.20
|
22
|
+
|
23
|
+
#Returns a hash with the count of bases
|
24
|
+
|
25
|
+
def bases
|
26
|
+
return @bases if @bases
|
27
|
+
@bases = self.non_refs
|
28
|
+
#puts self.ref_count
|
29
|
+
@bases[self.ref_base.upcase.to_sym] = self.ref_count
|
30
|
+
@bases
|
31
|
+
end
|
32
|
+
|
33
|
+
def base_coverage
|
34
|
+
total = 0
|
35
|
+
@bases.each do |k,v|
|
36
|
+
total += v
|
37
|
+
end
|
38
|
+
total
|
39
|
+
end
|
40
|
+
|
41
|
+
def base_ratios
|
42
|
+
return @base_ratios if @base_ratios
|
43
|
+
bases = self.bases
|
44
|
+
@base_ratios = Hash.new
|
45
|
+
bases.each do |k,v|
|
46
|
+
@base_ratios[k] = v.to_f/self.base_coverage.to_f
|
47
|
+
end
|
48
|
+
@base_ratios
|
49
|
+
end
|
50
|
+
|
51
|
+
# returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
|
52
|
+
def consensus_iuap(minumum_ratio_for_iup_consensus)
|
53
|
+
minumum_ratio_for_iup_consensus
|
54
|
+
if @consensus_iuap.nil?
|
55
|
+
@consensus_iuap = self.ref_base.downcase
|
56
|
+
bases = self.bases
|
57
|
+
tmp = String.new
|
58
|
+
bases.each do |k,v|
|
59
|
+
tmp << k[0].to_s if v/self.coverage > minumum_ratio_for_iup_consensus
|
60
|
+
end
|
61
|
+
if tmp.length > 0
|
62
|
+
@consensus_iuap = Bio::NucleicAcid.to_IUAPC(tmp)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
@consensus_iuap
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
class Bio::NucleicAcid
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
def self.to_IUAPC(bases)
|
76
|
+
#puts "TADA"
|
77
|
+
base = IUPAC_CODES[bases.to_s.downcase.chars.sort.uniq.join]
|
78
|
+
if base == nil
|
79
|
+
p "Invalid base! #{base}"
|
80
|
+
base = 'n' #This is a patch... as one of the scripts failed here.
|
81
|
+
end
|
82
|
+
base.upcase
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.is_valid(code, base)
|
86
|
+
IUPAC_CODES[code.downcase].chars.include? base.downcase
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
#class Bio::DB::Sam::SAMException < RuntimeError
|
93
|
+
|
94
|
+
#end
|
95
|
+
|
96
|
+
class Bio::DB::Sam
|
97
|
+
|
98
|
+
|
99
|
+
attr_accessor :minumum_ratio_for_iup_consensus
|
100
|
+
attr_reader :cached_regions
|
101
|
+
#attr_accessor :pileup_cache
|
102
|
+
@minumum_ratio_for_iup_consensus = 0.20
|
103
|
+
|
104
|
+
|
105
|
+
#Same as mpilup, but it caches the pileup, so if you want several operations on the same set of regions
|
106
|
+
#the pile for different operations, it won't execute the mpilup command several times
|
107
|
+
#Whenever you finish using a region, call mpileup_clear_cache to free the cache
|
108
|
+
#The argument Region is required, as it will be the key for the underlying hash.
|
109
|
+
#We asume that the options are constant. If they are not, the cache mechanism may not be consistent.
|
110
|
+
#
|
111
|
+
#TODO: It may be good to load partially the pileup
|
112
|
+
def mpileup_cached (opts={})
|
113
|
+
raise SAMException.new(), "A region must be provided" unless opts[:r] or opts[:region]
|
114
|
+
@pileup_cache = Hash.new unless @pileup_cache
|
115
|
+
@cached_regions = Hash.new unless @cached_regions
|
116
|
+
|
117
|
+
region = opts[:r] ? opts[:r] : opts[:region]
|
118
|
+
opts[:r] = "#{region.to_s}"
|
119
|
+
opts[:region] = "#{region.to_s}"
|
120
|
+
opts[:A] = true
|
121
|
+
#reg = region.class == Bio::DB::Fasta::Region ? region : Bio::DB::Fasta::Region.parse_region(region.to_s)
|
122
|
+
|
123
|
+
unless @cached_regions[region.to_s]
|
124
|
+
@cached_regions[region.to_s] = Bio::DB::Fasta::Region.parse_region(region.to_s)
|
125
|
+
tmp = Array.new
|
126
|
+
@cached_regions[region.to_s].pileup = tmp
|
127
|
+
#puts "Loading #{region.to_s}"
|
128
|
+
mpileup(opts) do | pile |
|
129
|
+
# puts pile
|
130
|
+
tmp << pile
|
131
|
+
yield pile
|
132
|
+
end
|
133
|
+
else
|
134
|
+
# puts "Loaded, reruning #{region.to_s}"
|
135
|
+
@cached_regions.pileup[region.to_s] .each do | pile |
|
136
|
+
yield pile
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
#Clears the pileup cache. If a region is passed as argument, just the specified region is removed
|
142
|
+
#If no region is passed, the hash is emptied
|
143
|
+
def mpileup_clear_cache (region)
|
144
|
+
return unless @cached_regions
|
145
|
+
if region
|
146
|
+
@cached_regions[region.to_s] = nil
|
147
|
+
else
|
148
|
+
@cached_regions.clear
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
#Gets the coverage of a region from a pileup.
|
153
|
+
def average_coverage_from_pileup(opts={})
|
154
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
155
|
+
region = opts[:region]
|
156
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
157
|
+
@cached_regions[region].average_coverage
|
158
|
+
end
|
159
|
+
|
160
|
+
#
|
161
|
+
def coverages_from_pileup(opts={})
|
162
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
163
|
+
region = opts[:region]
|
164
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
165
|
+
@cached_regions[region].coverages
|
166
|
+
end
|
167
|
+
|
168
|
+
def consensus_with_ambiguities(opts={})
|
169
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
170
|
+
region = opts[:region]
|
171
|
+
# p "consensus with ambiguities for: " << opts[:region]
|
172
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
173
|
+
@cached_regions[region].consensus
|
174
|
+
end
|
175
|
+
|
176
|
+
def calculate_stats_from_pile(opts={})
|
177
|
+
min_cov = opts[:min_cov] ? opts[:min_cov] : 20
|
178
|
+
|
179
|
+
|
180
|
+
opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
|
181
|
+
region = opts[:region]
|
182
|
+
|
183
|
+
mark_case = true if opts[:case]
|
184
|
+
# puts "Marcase: #{mark_case}"
|
185
|
+
reference = self.fetch_reference(region.entry, region.start, region.end).downcase
|
186
|
+
# p "calculationg from pile..." << region.to_s
|
187
|
+
base_ratios = Array.new(region.size, BASE_COUNT_ZERO)
|
188
|
+
bases = Array.new(region.size, BASE_COUNT_ZERO)
|
189
|
+
coverages = Array.new(region.size, 0)
|
190
|
+
total_cov = 0
|
191
|
+
|
192
|
+
self.mpileup_cached(:region=>"#{region.to_s}") do | pile |
|
193
|
+
#puts pile
|
194
|
+
#puts pile.coverage
|
195
|
+
bef=reference[pile.pos - region.start - 1 ]
|
196
|
+
if pile.coverage > min_cov
|
197
|
+
|
198
|
+
|
199
|
+
base_ratios[pile.pos - region.start ] = pile.base_ratios
|
200
|
+
reference[pile.pos - region.start - 1 ] = pile.consensus_iuap(0.20).upcase
|
201
|
+
coverages[pile.pos - region.start ] = pile.coverage.to_i
|
202
|
+
bases[pile.pos - region.start ] = pile.bases
|
203
|
+
|
204
|
+
|
205
|
+
end
|
206
|
+
#puts "#{pile.pos}\t#{bef}\t#{reference[pile.pos - region.start - 1 ]} "
|
207
|
+
total_cov += pile.coverage
|
208
|
+
end
|
209
|
+
|
210
|
+
#puts ">Ref\n#{reference}"
|
211
|
+
#puts ">Original\n#{r}"
|
212
|
+
region = @cached_regions[region.to_s]
|
213
|
+
region.coverages = coverages
|
214
|
+
region.base_ratios = base_ratios
|
215
|
+
region.consensus = Bio::Sequence.new(reference)
|
216
|
+
region.consensus.na
|
217
|
+
if region.orientation == :reverse
|
218
|
+
region.consensus.reverse_complement!()
|
219
|
+
end
|
220
|
+
region.average_coverage = total_cov.to_f/region.size.to_f
|
221
|
+
region.bases = bases
|
222
|
+
region
|
223
|
+
end
|
224
|
+
|
225
|
+
|
226
|
+
|
227
|
+
#BASE_COUNT_ZERO = {:A => 0, :C => 0, :G => 0, :T => 0}
|
228
|
+
|
229
|
+
#Gets an array with the proportions of the bases in the region. If there is no coverage, a
|
230
|
+
def base_ratios_in_region(opts={})
|
231
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
232
|
+
region = opts[:region]
|
233
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
234
|
+
@cached_regions[region].base_ratios
|
235
|
+
end
|
236
|
+
|
237
|
+
#Gets an array with the bsaes count in the region. If there is no coverage, a
|
238
|
+
def bases_in_region(opts={})
|
239
|
+
opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
|
240
|
+
region = opts[:region]
|
241
|
+
calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
|
242
|
+
@cached_regions[region].bases
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
def extract_reads(opts={})
|
248
|
+
opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
|
249
|
+
fastq_filename = opts[:fastq]
|
250
|
+
fastq_file = opts[:fastq_file]
|
251
|
+
|
252
|
+
out = $stdout
|
253
|
+
|
254
|
+
print_fastq = Proc.new do |alignment|
|
255
|
+
out.puts "@#{alignment.qname}"
|
256
|
+
out.puts "#{alignment.seq}"
|
257
|
+
out.puts "+#{alignment.qname}"
|
258
|
+
out.puts "#{alignment.qual}"
|
259
|
+
end
|
260
|
+
|
261
|
+
fetch_with_function(chromosome, qstart, qstart+len, print_fastq)
|
262
|
+
|
263
|
+
|
264
|
+
end
|
265
|
+
|
266
|
+
end
|
267
|
+
|
268
|
+
class Bio::DB::Fasta::Region
|
269
|
+
attr_accessor :pileup, :average_coverage, :snps, :reference, :base_ratios, :consensus, :coverages, :bases
|
270
|
+
|
271
|
+
#TODO: Debug, as it hasnt been tested in the actual code.
|
272
|
+
def base_ratios_for_base(base)
|
273
|
+
@all_ratios = Hash.new unless @all_ratios
|
274
|
+
unless @all_ratios[base]
|
275
|
+
ratios = Array.new
|
276
|
+
for i in (0..region.size-1)
|
277
|
+
ratios << @base_ratios[i][base]
|
278
|
+
end
|
279
|
+
@all_ratios[base] = ratios
|
280
|
+
end
|
281
|
+
@all_ratios[base]
|
282
|
+
end
|
283
|
+
|
284
|
+
end
|
@@ -0,0 +1,272 @@
|
|
1
|
+
# RYO %S\t%pi\t%ql\t%tl\t%g\t%V\n
|
2
|
+
|
3
|
+
|
4
|
+
module Bio::DB::Exonerate
|
5
|
+
|
6
|
+
|
7
|
+
#TODO: Make a proper object with generic parser
|
8
|
+
def self.align(opts={})
|
9
|
+
opts = {
|
10
|
+
:model => 'affine:local' ,
|
11
|
+
:ryo => "RESULT:\\t%S\\t%pi\\t%ql\\t%tl\\t%g\\t%V\\n" ,
|
12
|
+
:bestn => 20,
|
13
|
+
:percentage => 50
|
14
|
+
}
|
15
|
+
.merge(opts)
|
16
|
+
|
17
|
+
target=opts[:target]
|
18
|
+
query=opts[:query]
|
19
|
+
|
20
|
+
|
21
|
+
cmdline = "exonerate --verbose 0 --showalignment no --bestn #{opts[:bestn]} --showvulgar no --model #{opts[:model]} --ryo '#{opts[:ryo]}' #{query} #{target}"
|
22
|
+
status, stdout, stderr = systemu cmdline
|
23
|
+
#$stderr.puts cmdline
|
24
|
+
if status.exitstatus == 0
|
25
|
+
alns = Array.new unless block_given?
|
26
|
+
stdout.each_line do |line|
|
27
|
+
aln = Alignment.parse_custom(line)
|
28
|
+
if aln
|
29
|
+
if block_given?
|
30
|
+
yield aln
|
31
|
+
else
|
32
|
+
alns << aln
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
return alns unless block_given?
|
37
|
+
else
|
38
|
+
raise ExonerateException.new(), "Error running exonerate. Command line was '#{cmdline}'\nExonerate STDERR was:\n#{stderr}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
class ExonerateException < RuntimeError
|
44
|
+
end
|
45
|
+
|
46
|
+
class Alignment
|
47
|
+
attr_accessor :query_id, :query_start, :query_end, :query_strand
|
48
|
+
attr_accessor :target_id, :target_start, :target_end, :target_strand, :score
|
49
|
+
attr_accessor :vulgar_block, :pi, :ql, :tl, :g
|
50
|
+
attr_accessor :line
|
51
|
+
|
52
|
+
#This one day may grow to work with complex ryo....
|
53
|
+
def self.parse_custom(line)
|
54
|
+
fields=line.split(/\t/)
|
55
|
+
if fields[0] == "RESULT:"
|
56
|
+
al = Bio::DB::Exonerate::Alignment.new()
|
57
|
+
al.parse_sugar(fields[1])
|
58
|
+
al.pi = fields[2].to_f
|
59
|
+
al.ql = fields[3].to_i
|
60
|
+
al.tl = fields[4].to_i
|
61
|
+
al.g = fields[5]
|
62
|
+
al.parse_vulgar(fields[6])
|
63
|
+
al.line = line
|
64
|
+
return al
|
65
|
+
else
|
66
|
+
return nil
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def identity
|
71
|
+
@pi
|
72
|
+
end
|
73
|
+
|
74
|
+
def parse_sugar(sugar_str)
|
75
|
+
@query_id, @query_start, @query_end, @query_strand, @target_id, @target_start, @target_end, @target_strand, @score = sugar_str.split(/\s+/)
|
76
|
+
|
77
|
+
@query_start = @query_start.to_i
|
78
|
+
@query_end = @query_end.to_i
|
79
|
+
@target_start = @target_start.to_i
|
80
|
+
@target_end = @target_end.to_i
|
81
|
+
@score = @score.to_f
|
82
|
+
|
83
|
+
if @target_strand == "+"
|
84
|
+
@target_strand = :forward
|
85
|
+
elsif @target_strand == "-"
|
86
|
+
@target_strand = :reverse
|
87
|
+
else
|
88
|
+
raise ExonerateException.new(), "Ivalid target orientation #{@target_strand} for line:\n#{sugar_str}"
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
if @query_strand == "+"
|
93
|
+
@query_strand = :forward
|
94
|
+
elsif @query_strand == "-"
|
95
|
+
@query_strand = :reverse
|
96
|
+
else
|
97
|
+
raise ExonerateException.new(), "Ivalid query orientation #{@query_strand} for line:\n#{sugar_str}"
|
98
|
+
end
|
99
|
+
|
100
|
+
raise ExonerateException.new(), "Inconsistent orientation (forward, query)" if @query_strand == :forward and @query_start > @query_end
|
101
|
+
raise ExonerateException.new(), "Inconsistent orientation (reverse, query)" if @query_strand == :reverse and @query_start < @query_end
|
102
|
+
raise ExonerateException.new(), "Inconsistent orientation (forward, target)" if @target_strand == :forward and @target_start > @target_end
|
103
|
+
raise ExonerateException.new(), "Inconsistent orientation (reverse, target)" if @target_strand == :reverse and @target_start < @target_end
|
104
|
+
|
105
|
+
|
106
|
+
self
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
#The vulgar has to be parsed AFTER the sugar, otherwise it is impossible to determine the orientations
|
111
|
+
def parse_vulgar(vulgar_str)
|
112
|
+
|
113
|
+
tarcurrent = @target_start
|
114
|
+
query_current = @query_start
|
115
|
+
target_multiply = 1
|
116
|
+
query_multiply = 1
|
117
|
+
|
118
|
+
if @target_strand == :reverse
|
119
|
+
target_multiply = -1
|
120
|
+
end
|
121
|
+
|
122
|
+
if @query_strand == :reverse
|
123
|
+
query_multiply = -1
|
124
|
+
end
|
125
|
+
|
126
|
+
@vulgar_block = Array.new
|
127
|
+
# p "VULGAR #{vulgar_str}"
|
128
|
+
vulgar_str.split(/\s/).each_slice(3) do | block |
|
129
|
+
# p block
|
130
|
+
vulgar = Vulgar.new(block[0].to_sym, block[1].to_i, block[2].to_i, tarcurrent, target_multiply, query_current, query_multiply, self)
|
131
|
+
query_current = vulgar.query_end
|
132
|
+
tarcurrent = vulgar.target_end
|
133
|
+
vulgar_block << vulgar
|
134
|
+
end
|
135
|
+
self
|
136
|
+
end
|
137
|
+
|
138
|
+
#This assumes that the gene is the query and the chromosome is the target
|
139
|
+
def exon_on_gene_position(position)
|
140
|
+
@vulgar_block.each do |vulgar|
|
141
|
+
if position.between?(vulgar.query_start, vulgar.query_end)
|
142
|
+
return vulgar
|
143
|
+
end
|
144
|
+
end
|
145
|
+
nil
|
146
|
+
end
|
147
|
+
|
148
|
+
def tarpostion_from_query_position(position)
|
149
|
+
ret = nil
|
150
|
+
vulgar_block = exon_on_gene_position(position)
|
151
|
+
ret
|
152
|
+
end
|
153
|
+
|
154
|
+
def print_features
|
155
|
+
out = String.new
|
156
|
+
|
157
|
+
@vulgar_block.each do | vulgar |
|
158
|
+
out << vulgar.to_s << "\n"
|
159
|
+
end
|
160
|
+
out
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
class Vulgar
|
166
|
+
attr_reader :label, :query_length, :target_length, :query_start, :query_end, :target_start, :target_end, :record, :snp_in_gap
|
167
|
+
def initialize(label, ql, tl, target_start, target_multiply, query_start, query_multiply, record)
|
168
|
+
@label = label
|
169
|
+
@query_length = ql
|
170
|
+
@target_length = tl
|
171
|
+
@query_start = query_start
|
172
|
+
@query_end = query_start + (query_multiply * query_length)
|
173
|
+
@target_start = target_start
|
174
|
+
@target_end = target_start + (target_multiply * target_length)
|
175
|
+
@record = record
|
176
|
+
@snp_in_gap = false
|
177
|
+
end
|
178
|
+
|
179
|
+
def to_s
|
180
|
+
out = String.new
|
181
|
+
out << @label.to_s << "\t" << @query_length.to_s << "\t" << @target_length.to_s << "\t" << @query_start.to_s << "\t" << @query_end.to_s << "\t" << @target_start.to_s << "\t" << @target_end.to_s
|
182
|
+
out
|
183
|
+
end
|
184
|
+
|
185
|
+
def query_id
|
186
|
+
record.query_id
|
187
|
+
end
|
188
|
+
|
189
|
+
def target_id
|
190
|
+
record.target_id
|
191
|
+
end
|
192
|
+
|
193
|
+
def target_flanking_region_from_position(position, flanking_size)
|
194
|
+
reg = reg = Bio::DB::Fasta::Region.new()
|
195
|
+
reg.entry = target_id
|
196
|
+
target_snp_pos = target_position_from_query(position)
|
197
|
+
return nil if snp_in_gap
|
198
|
+
reg.orientation = record.target_strand
|
199
|
+
reg.start = target_snp_pos - flanking_size
|
200
|
+
reg.end = target_snp_pos + flanking_size
|
201
|
+
raise ExonerateException.new "Target Query out of bounds!" unless position.between?(query_start, query_end)
|
202
|
+
#puts "Flanking region for #{position} in exon between ( #{query_id}:#{query_start}-#{query_end}), the target in #{target_snp_pos} ( #{target_id}:#{target_start}-#{target_end}) "
|
203
|
+
|
204
|
+
|
205
|
+
reg
|
206
|
+
end
|
207
|
+
|
208
|
+
def target_position_from_query(position)
|
209
|
+
raise ExonerateException.new(), "Position: #{position} not in range (#{query_start}-#{query_end}) #{self.to_s} " unless position.between?(query_start, query_end) or position.between?(query_end, query_start)
|
210
|
+
offset = 0
|
211
|
+
ret = 0
|
212
|
+
if record.query_strand == :forward
|
213
|
+
offset = position - query_start
|
214
|
+
elsif record.query_strand == :reverse
|
215
|
+
offset = query_start - position
|
216
|
+
else
|
217
|
+
raise ExonerateException.new(), "The strand is not forward or reverse (#{record.query_strand}) ! #{self.inspect}"
|
218
|
+
end
|
219
|
+
|
220
|
+
if record.target_strand == :forward
|
221
|
+
ret = target_start + offset
|
222
|
+
elsif record.target_strand == :reverse
|
223
|
+
ret = target_start - offset + 1
|
224
|
+
else
|
225
|
+
raise ExonerateException.new(), "The strand is not forward or reverse! #{self.inspect}"
|
226
|
+
end
|
227
|
+
#THis is in case the position is on a gap.
|
228
|
+
if @target_length == 0 and label == :G
|
229
|
+
#puts "Returning nil"
|
230
|
+
@snp_in_gap = true
|
231
|
+
ret = target_start
|
232
|
+
end
|
233
|
+
raise ExonerateException.new(), "Return position #{ret} outside block (#{target_start}-#{target_end}, #{self.inspect})" unless ret.between?(target_start, target_end) or ret.between?(target_end, target_start)
|
234
|
+
ret
|
235
|
+
end
|
236
|
+
|
237
|
+
def query_region
|
238
|
+
reg = Bio::DB::Fasta::Region.new()
|
239
|
+
reg.entry = query_id
|
240
|
+
reg.orientation = record.query_strand
|
241
|
+
if record.query_strand == :forward
|
242
|
+
reg.start = @query_start + 1
|
243
|
+
reg.end = @query_end
|
244
|
+
elsif record.query_strand == :reverse
|
245
|
+
reg.start = @query_end + 1
|
246
|
+
reg.end = @query_start
|
247
|
+
else
|
248
|
+
raise ExonerateException.new(), "Ivalid query orientation #{@query_strand}"
|
249
|
+
end
|
250
|
+
reg
|
251
|
+
end
|
252
|
+
|
253
|
+
def target_region
|
254
|
+
reg = Bio::DB::Fasta::Region.new()
|
255
|
+
|
256
|
+
reg.entry = target_id
|
257
|
+
reg.orientation = record.target_strand
|
258
|
+
if record.target_strand == :forward
|
259
|
+
reg.start = @target_start + 1
|
260
|
+
reg.end = @target_end
|
261
|
+
elsif record.target_strand == :reverse
|
262
|
+
reg.start = @target_end + 1
|
263
|
+
reg.end = @target_start
|
264
|
+
else
|
265
|
+
raise ExonerateException.new(), "Ivalid target orientation #{@target_strand}"
|
266
|
+
end
|
267
|
+
reg
|
268
|
+
end
|
269
|
+
|
270
|
+
end
|
271
|
+
|
272
|
+
end
|