bio-polyploid-tools 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,56 @@
1
+
2
+ require_relative "SNP"
3
+ module Bio::PolyploidTools
4
+ class SNPSequenceException < RuntimeError
5
+ end
6
+
7
+ class SNPSequence < SNP
8
+
9
+ attr_accessor :sequence_original
10
+ #Format:
11
+ #snp name,chromsome from contig,microarray sequence
12
+ #BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
13
+ def self.parse(reg_str)
14
+ reg_str.chomp!
15
+ snp = SNPSequence.new
16
+
17
+ arr = reg_str.split(",")
18
+
19
+ if arr.size == 3
20
+ snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
21
+ elsif arr.size == 2
22
+ snp.gene, snp.sequence_original = arr
23
+ else
24
+ throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
25
+ end
26
+ #snp.position = snp.position.to_i
27
+ #snp.original.upcase!
28
+ #snp.snp.upcase!
29
+ snp.chromosome. strip!
30
+ snp.parse_sequence_snp
31
+ snp.exon_list = Hash.new()
32
+ snp
33
+ end
34
+
35
+ def parse_snp
36
+
37
+ end
38
+
39
+ def parse_sequence_snp
40
+ pos = 0
41
+ match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
42
+ if match_data
43
+ @position = Regexp.last_match(:pre).size + 1
44
+ @original = Regexp.last_match(:org)
45
+ @snp = Regexp.last_match(:snp)
46
+ amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
47
+
48
+ @template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
49
+
50
+ end
51
+ end
52
+
53
+
54
+
55
+ end
56
+ end
@@ -0,0 +1,284 @@
1
+ require 'rubygems'
2
+ require 'pathname'
3
+ #require_relative 'db/fasta.rb'
4
+ require 'bio'
5
+
6
+ require_relative 'db/fastadb.rb'
7
+
8
+ #require "set"
9
+ #require 'systemu'
10
+ #require 'json'
11
+
12
+ =begin
13
+
14
+ Extends the methods to be able to calculate the BFR and a consensus from the pileup
15
+
16
+ =end
17
+
18
+ class Bio::DB::Pileup
19
+
20
+ #attr_accessor :minumum_ratio_for_iup_consensus
21
+ #@minumum_ratio_for_iup_consensus = 0.20
22
+
23
+ #Returns a hash with the count of bases
24
+
25
+ def bases
26
+ return @bases if @bases
27
+ @bases = self.non_refs
28
+ #puts self.ref_count
29
+ @bases[self.ref_base.upcase.to_sym] = self.ref_count
30
+ @bases
31
+ end
32
+
33
+ def base_coverage
34
+ total = 0
35
+ @bases.each do |k,v|
36
+ total += v
37
+ end
38
+ total
39
+ end
40
+
41
+ def base_ratios
42
+ return @base_ratios if @base_ratios
43
+ bases = self.bases
44
+ @base_ratios = Hash.new
45
+ bases.each do |k,v|
46
+ @base_ratios[k] = v.to_f/self.base_coverage.to_f
47
+ end
48
+ @base_ratios
49
+ end
50
+
51
+ # returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
52
+ def consensus_iuap(minumum_ratio_for_iup_consensus)
53
+ minumum_ratio_for_iup_consensus
54
+ if @consensus_iuap.nil?
55
+ @consensus_iuap = self.ref_base.downcase
56
+ bases = self.bases
57
+ tmp = String.new
58
+ bases.each do |k,v|
59
+ tmp << k[0].to_s if v/self.coverage > minumum_ratio_for_iup_consensus
60
+ end
61
+ if tmp.length > 0
62
+ @consensus_iuap = Bio::NucleicAcid.to_IUAPC(tmp)
63
+ end
64
+ end
65
+ @consensus_iuap
66
+ end
67
+ end
68
+
69
+
70
+
71
+ class Bio::NucleicAcid
72
+
73
+
74
+
75
+ def self.to_IUAPC(bases)
76
+ #puts "TADA"
77
+ base = IUPAC_CODES[bases.to_s.downcase.chars.sort.uniq.join]
78
+ if base == nil
79
+ p "Invalid base! #{base}"
80
+ base = 'n' #This is a patch... as one of the scripts failed here.
81
+ end
82
+ base.upcase
83
+ end
84
+
85
+ def self.is_valid(code, base)
86
+ IUPAC_CODES[code.downcase].chars.include? base.downcase
87
+ end
88
+
89
+ end
90
+
91
+
92
+ #class Bio::DB::Sam::SAMException < RuntimeError
93
+
94
+ #end
95
+
96
+ class Bio::DB::Sam
97
+
98
+
99
+ attr_accessor :minumum_ratio_for_iup_consensus
100
+ attr_reader :cached_regions
101
+ #attr_accessor :pileup_cache
102
+ @minumum_ratio_for_iup_consensus = 0.20
103
+
104
+
105
+ #Same as mpilup, but it caches the pileup, so if you want several operations on the same set of regions
106
+ #the pile for different operations, it won't execute the mpilup command several times
107
+ #Whenever you finish using a region, call mpileup_clear_cache to free the cache
108
+ #The argument Region is required, as it will be the key for the underlying hash.
109
+ #We asume that the options are constant. If they are not, the cache mechanism may not be consistent.
110
+ #
111
+ #TODO: It may be good to load partially the pileup
112
+ def mpileup_cached (opts={})
113
+ raise SAMException.new(), "A region must be provided" unless opts[:r] or opts[:region]
114
+ @pileup_cache = Hash.new unless @pileup_cache
115
+ @cached_regions = Hash.new unless @cached_regions
116
+
117
+ region = opts[:r] ? opts[:r] : opts[:region]
118
+ opts[:r] = "#{region.to_s}"
119
+ opts[:region] = "#{region.to_s}"
120
+ opts[:A] = true
121
+ #reg = region.class == Bio::DB::Fasta::Region ? region : Bio::DB::Fasta::Region.parse_region(region.to_s)
122
+
123
+ unless @cached_regions[region.to_s]
124
+ @cached_regions[region.to_s] = Bio::DB::Fasta::Region.parse_region(region.to_s)
125
+ tmp = Array.new
126
+ @cached_regions[region.to_s].pileup = tmp
127
+ #puts "Loading #{region.to_s}"
128
+ mpileup(opts) do | pile |
129
+ # puts pile
130
+ tmp << pile
131
+ yield pile
132
+ end
133
+ else
134
+ # puts "Loaded, reruning #{region.to_s}"
135
+ @cached_regions.pileup[region.to_s] .each do | pile |
136
+ yield pile
137
+ end
138
+ end
139
+ end
140
+
141
+ #Clears the pileup cache. If a region is passed as argument, just the specified region is removed
142
+ #If no region is passed, the hash is emptied
143
+ def mpileup_clear_cache (region)
144
+ return unless @cached_regions
145
+ if region
146
+ @cached_regions[region.to_s] = nil
147
+ else
148
+ @cached_regions.clear
149
+ end
150
+ end
151
+
152
+ #Gets the coverage of a region from a pileup.
153
+ def average_coverage_from_pileup(opts={})
154
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
155
+ region = opts[:region]
156
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
157
+ @cached_regions[region].average_coverage
158
+ end
159
+
160
+ #
161
+ def coverages_from_pileup(opts={})
162
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
163
+ region = opts[:region]
164
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
165
+ @cached_regions[region].coverages
166
+ end
167
+
168
+ def consensus_with_ambiguities(opts={})
169
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
170
+ region = opts[:region]
171
+ # p "consensus with ambiguities for: " << opts[:region]
172
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
173
+ @cached_regions[region].consensus
174
+ end
175
+
176
+ def calculate_stats_from_pile(opts={})
177
+ min_cov = opts[:min_cov] ? opts[:min_cov] : 20
178
+
179
+
180
+ opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
181
+ region = opts[:region]
182
+
183
+ mark_case = true if opts[:case]
184
+ # puts "Marcase: #{mark_case}"
185
+ reference = self.fetch_reference(region.entry, region.start, region.end).downcase
186
+ # p "calculationg from pile..." << region.to_s
187
+ base_ratios = Array.new(region.size, BASE_COUNT_ZERO)
188
+ bases = Array.new(region.size, BASE_COUNT_ZERO)
189
+ coverages = Array.new(region.size, 0)
190
+ total_cov = 0
191
+
192
+ self.mpileup_cached(:region=>"#{region.to_s}") do | pile |
193
+ #puts pile
194
+ #puts pile.coverage
195
+ bef=reference[pile.pos - region.start - 1 ]
196
+ if pile.coverage > min_cov
197
+
198
+
199
+ base_ratios[pile.pos - region.start ] = pile.base_ratios
200
+ reference[pile.pos - region.start - 1 ] = pile.consensus_iuap(0.20).upcase
201
+ coverages[pile.pos - region.start ] = pile.coverage.to_i
202
+ bases[pile.pos - region.start ] = pile.bases
203
+
204
+
205
+ end
206
+ #puts "#{pile.pos}\t#{bef}\t#{reference[pile.pos - region.start - 1 ]} "
207
+ total_cov += pile.coverage
208
+ end
209
+
210
+ #puts ">Ref\n#{reference}"
211
+ #puts ">Original\n#{r}"
212
+ region = @cached_regions[region.to_s]
213
+ region.coverages = coverages
214
+ region.base_ratios = base_ratios
215
+ region.consensus = Bio::Sequence.new(reference)
216
+ region.consensus.na
217
+ if region.orientation == :reverse
218
+ region.consensus.reverse_complement!()
219
+ end
220
+ region.average_coverage = total_cov.to_f/region.size.to_f
221
+ region.bases = bases
222
+ region
223
+ end
224
+
225
+
226
+
227
+ #BASE_COUNT_ZERO = {:A => 0, :C => 0, :G => 0, :T => 0}
228
+
229
+ #Gets an array with the proportions of the bases in the region. If there is no coverage, a
230
+ def base_ratios_in_region(opts={})
231
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
232
+ region = opts[:region]
233
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
234
+ @cached_regions[region].base_ratios
235
+ end
236
+
237
+ #Gets an array with the bsaes count in the region. If there is no coverage, a
238
+ def bases_in_region(opts={})
239
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
240
+ region = opts[:region]
241
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
242
+ @cached_regions[region].bases
243
+ end
244
+
245
+
246
+
247
+ def extract_reads(opts={})
248
+ opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
249
+ fastq_filename = opts[:fastq]
250
+ fastq_file = opts[:fastq_file]
251
+
252
+ out = $stdout
253
+
254
+ print_fastq = Proc.new do |alignment|
255
+ out.puts "@#{alignment.qname}"
256
+ out.puts "#{alignment.seq}"
257
+ out.puts "+#{alignment.qname}"
258
+ out.puts "#{alignment.qual}"
259
+ end
260
+
261
+ fetch_with_function(chromosome, qstart, qstart+len, print_fastq)
262
+
263
+
264
+ end
265
+
266
+ end
267
+
268
+ class Bio::DB::Fasta::Region
269
+ attr_accessor :pileup, :average_coverage, :snps, :reference, :base_ratios, :consensus, :coverages, :bases
270
+
271
+ #TODO: Debug, as it hasnt been tested in the actual code.
272
+ def base_ratios_for_base(base)
273
+ @all_ratios = Hash.new unless @all_ratios
274
+ unless @all_ratios[base]
275
+ ratios = Array.new
276
+ for i in (0..region.size-1)
277
+ ratios << @base_ratios[i][base]
278
+ end
279
+ @all_ratios[base] = ratios
280
+ end
281
+ @all_ratios[base]
282
+ end
283
+
284
+ end
@@ -0,0 +1,272 @@
1
+ # RYO %S\t%pi\t%ql\t%tl\t%g\t%V\n
2
+
3
+
4
+ module Bio::DB::Exonerate
5
+
6
+
7
+ #TODO: Make a proper object with generic parser
8
+ def self.align(opts={})
9
+ opts = {
10
+ :model => 'affine:local' ,
11
+ :ryo => "RESULT:\\t%S\\t%pi\\t%ql\\t%tl\\t%g\\t%V\\n" ,
12
+ :bestn => 20,
13
+ :percentage => 50
14
+ }
15
+ .merge(opts)
16
+
17
+ target=opts[:target]
18
+ query=opts[:query]
19
+
20
+
21
+ cmdline = "exonerate --verbose 0 --showalignment no --bestn #{opts[:bestn]} --showvulgar no --model #{opts[:model]} --ryo '#{opts[:ryo]}' #{query} #{target}"
22
+ status, stdout, stderr = systemu cmdline
23
+ #$stderr.puts cmdline
24
+ if status.exitstatus == 0
25
+ alns = Array.new unless block_given?
26
+ stdout.each_line do |line|
27
+ aln = Alignment.parse_custom(line)
28
+ if aln
29
+ if block_given?
30
+ yield aln
31
+ else
32
+ alns << aln
33
+ end
34
+ end
35
+ end
36
+ return alns unless block_given?
37
+ else
38
+ raise ExonerateException.new(), "Error running exonerate. Command line was '#{cmdline}'\nExonerate STDERR was:\n#{stderr}"
39
+ end
40
+ end
41
+
42
+
43
+ class ExonerateException < RuntimeError
44
+ end
45
+
46
+ class Alignment
47
+ attr_accessor :query_id, :query_start, :query_end, :query_strand
48
+ attr_accessor :target_id, :target_start, :target_end, :target_strand, :score
49
+ attr_accessor :vulgar_block, :pi, :ql, :tl, :g
50
+ attr_accessor :line
51
+
52
+ #This one day may grow to work with complex ryo....
53
+ def self.parse_custom(line)
54
+ fields=line.split(/\t/)
55
+ if fields[0] == "RESULT:"
56
+ al = Bio::DB::Exonerate::Alignment.new()
57
+ al.parse_sugar(fields[1])
58
+ al.pi = fields[2].to_f
59
+ al.ql = fields[3].to_i
60
+ al.tl = fields[4].to_i
61
+ al.g = fields[5]
62
+ al.parse_vulgar(fields[6])
63
+ al.line = line
64
+ return al
65
+ else
66
+ return nil
67
+ end
68
+ end
69
+
70
+ def identity
71
+ @pi
72
+ end
73
+
74
+ def parse_sugar(sugar_str)
75
+ @query_id, @query_start, @query_end, @query_strand, @target_id, @target_start, @target_end, @target_strand, @score = sugar_str.split(/\s+/)
76
+
77
+ @query_start = @query_start.to_i
78
+ @query_end = @query_end.to_i
79
+ @target_start = @target_start.to_i
80
+ @target_end = @target_end.to_i
81
+ @score = @score.to_f
82
+
83
+ if @target_strand == "+"
84
+ @target_strand = :forward
85
+ elsif @target_strand == "-"
86
+ @target_strand = :reverse
87
+ else
88
+ raise ExonerateException.new(), "Ivalid target orientation #{@target_strand} for line:\n#{sugar_str}"
89
+ end
90
+
91
+
92
+ if @query_strand == "+"
93
+ @query_strand = :forward
94
+ elsif @query_strand == "-"
95
+ @query_strand = :reverse
96
+ else
97
+ raise ExonerateException.new(), "Ivalid query orientation #{@query_strand} for line:\n#{sugar_str}"
98
+ end
99
+
100
+ raise ExonerateException.new(), "Inconsistent orientation (forward, query)" if @query_strand == :forward and @query_start > @query_end
101
+ raise ExonerateException.new(), "Inconsistent orientation (reverse, query)" if @query_strand == :reverse and @query_start < @query_end
102
+ raise ExonerateException.new(), "Inconsistent orientation (forward, target)" if @target_strand == :forward and @target_start > @target_end
103
+ raise ExonerateException.new(), "Inconsistent orientation (reverse, target)" if @target_strand == :reverse and @target_start < @target_end
104
+
105
+
106
+ self
107
+ end
108
+
109
+
110
+ #The vulgar has to be parsed AFTER the sugar, otherwise it is impossible to determine the orientations
111
+ def parse_vulgar(vulgar_str)
112
+
113
+ tarcurrent = @target_start
114
+ query_current = @query_start
115
+ target_multiply = 1
116
+ query_multiply = 1
117
+
118
+ if @target_strand == :reverse
119
+ target_multiply = -1
120
+ end
121
+
122
+ if @query_strand == :reverse
123
+ query_multiply = -1
124
+ end
125
+
126
+ @vulgar_block = Array.new
127
+ # p "VULGAR #{vulgar_str}"
128
+ vulgar_str.split(/\s/).each_slice(3) do | block |
129
+ # p block
130
+ vulgar = Vulgar.new(block[0].to_sym, block[1].to_i, block[2].to_i, tarcurrent, target_multiply, query_current, query_multiply, self)
131
+ query_current = vulgar.query_end
132
+ tarcurrent = vulgar.target_end
133
+ vulgar_block << vulgar
134
+ end
135
+ self
136
+ end
137
+
138
+ #This assumes that the gene is the query and the chromosome is the target
139
+ def exon_on_gene_position(position)
140
+ @vulgar_block.each do |vulgar|
141
+ if position.between?(vulgar.query_start, vulgar.query_end)
142
+ return vulgar
143
+ end
144
+ end
145
+ nil
146
+ end
147
+
148
+ def tarpostion_from_query_position(position)
149
+ ret = nil
150
+ vulgar_block = exon_on_gene_position(position)
151
+ ret
152
+ end
153
+
154
+ def print_features
155
+ out = String.new
156
+
157
+ @vulgar_block.each do | vulgar |
158
+ out << vulgar.to_s << "\n"
159
+ end
160
+ out
161
+ end
162
+ end
163
+
164
+
165
+ class Vulgar
166
+ attr_reader :label, :query_length, :target_length, :query_start, :query_end, :target_start, :target_end, :record, :snp_in_gap
167
+ def initialize(label, ql, tl, target_start, target_multiply, query_start, query_multiply, record)
168
+ @label = label
169
+ @query_length = ql
170
+ @target_length = tl
171
+ @query_start = query_start
172
+ @query_end = query_start + (query_multiply * query_length)
173
+ @target_start = target_start
174
+ @target_end = target_start + (target_multiply * target_length)
175
+ @record = record
176
+ @snp_in_gap = false
177
+ end
178
+
179
+ def to_s
180
+ out = String.new
181
+ out << @label.to_s << "\t" << @query_length.to_s << "\t" << @target_length.to_s << "\t" << @query_start.to_s << "\t" << @query_end.to_s << "\t" << @target_start.to_s << "\t" << @target_end.to_s
182
+ out
183
+ end
184
+
185
+ def query_id
186
+ record.query_id
187
+ end
188
+
189
+ def target_id
190
+ record.target_id
191
+ end
192
+
193
+ def target_flanking_region_from_position(position, flanking_size)
194
+ reg = reg = Bio::DB::Fasta::Region.new()
195
+ reg.entry = target_id
196
+ target_snp_pos = target_position_from_query(position)
197
+ return nil if snp_in_gap
198
+ reg.orientation = record.target_strand
199
+ reg.start = target_snp_pos - flanking_size
200
+ reg.end = target_snp_pos + flanking_size
201
+ raise ExonerateException.new "Target Query out of bounds!" unless position.between?(query_start, query_end)
202
+ #puts "Flanking region for #{position} in exon between ( #{query_id}:#{query_start}-#{query_end}), the target in #{target_snp_pos} ( #{target_id}:#{target_start}-#{target_end}) "
203
+
204
+
205
+ reg
206
+ end
207
+
208
+ def target_position_from_query(position)
209
+ raise ExonerateException.new(), "Position: #{position} not in range (#{query_start}-#{query_end}) #{self.to_s} " unless position.between?(query_start, query_end) or position.between?(query_end, query_start)
210
+ offset = 0
211
+ ret = 0
212
+ if record.query_strand == :forward
213
+ offset = position - query_start
214
+ elsif record.query_strand == :reverse
215
+ offset = query_start - position
216
+ else
217
+ raise ExonerateException.new(), "The strand is not forward or reverse (#{record.query_strand}) ! #{self.inspect}"
218
+ end
219
+
220
+ if record.target_strand == :forward
221
+ ret = target_start + offset
222
+ elsif record.target_strand == :reverse
223
+ ret = target_start - offset + 1
224
+ else
225
+ raise ExonerateException.new(), "The strand is not forward or reverse! #{self.inspect}"
226
+ end
227
+ #THis is in case the position is on a gap.
228
+ if @target_length == 0 and label == :G
229
+ #puts "Returning nil"
230
+ @snp_in_gap = true
231
+ ret = target_start
232
+ end
233
+ raise ExonerateException.new(), "Return position #{ret} outside block (#{target_start}-#{target_end}, #{self.inspect})" unless ret.between?(target_start, target_end) or ret.between?(target_end, target_start)
234
+ ret
235
+ end
236
+
237
+ def query_region
238
+ reg = Bio::DB::Fasta::Region.new()
239
+ reg.entry = query_id
240
+ reg.orientation = record.query_strand
241
+ if record.query_strand == :forward
242
+ reg.start = @query_start + 1
243
+ reg.end = @query_end
244
+ elsif record.query_strand == :reverse
245
+ reg.start = @query_end + 1
246
+ reg.end = @query_start
247
+ else
248
+ raise ExonerateException.new(), "Ivalid query orientation #{@query_strand}"
249
+ end
250
+ reg
251
+ end
252
+
253
+ def target_region
254
+ reg = Bio::DB::Fasta::Region.new()
255
+
256
+ reg.entry = target_id
257
+ reg.orientation = record.target_strand
258
+ if record.target_strand == :forward
259
+ reg.start = @target_start + 1
260
+ reg.end = @target_end
261
+ elsif record.target_strand == :reverse
262
+ reg.start = @target_end + 1
263
+ reg.end = @target_start
264
+ else
265
+ raise ExonerateException.new(), "Ivalid target orientation #{@target_strand}"
266
+ end
267
+ reg
268
+ end
269
+
270
+ end
271
+
272
+ end