bio-polyploid-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,56 @@
1
+
2
+ require_relative "SNP"
3
+ module Bio::PolyploidTools
4
+ class SNPSequenceException < RuntimeError
5
+ end
6
+
7
+ class SNPSequence < SNP
8
+
9
+ attr_accessor :sequence_original
10
+ #Format:
11
+ #snp name,chromsome from contig,microarray sequence
12
+ #BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
13
+ def self.parse(reg_str)
14
+ reg_str.chomp!
15
+ snp = SNPSequence.new
16
+
17
+ arr = reg_str.split(",")
18
+
19
+ if arr.size == 3
20
+ snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
21
+ elsif arr.size == 2
22
+ snp.gene, snp.sequence_original = arr
23
+ else
24
+ throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
25
+ end
26
+ #snp.position = snp.position.to_i
27
+ #snp.original.upcase!
28
+ #snp.snp.upcase!
29
+ snp.chromosome. strip!
30
+ snp.parse_sequence_snp
31
+ snp.exon_list = Hash.new()
32
+ snp
33
+ end
34
+
35
+ def parse_snp
36
+
37
+ end
38
+
39
+ def parse_sequence_snp
40
+ pos = 0
41
+ match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
42
+ if match_data
43
+ @position = Regexp.last_match(:pre).size + 1
44
+ @original = Regexp.last_match(:org)
45
+ @snp = Regexp.last_match(:snp)
46
+ amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
47
+
48
+ @template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
49
+
50
+ end
51
+ end
52
+
53
+
54
+
55
+ end
56
+ end
@@ -0,0 +1,284 @@
1
+ require 'rubygems'
2
+ require 'pathname'
3
+ #require_relative 'db/fasta.rb'
4
+ require 'bio'
5
+
6
+ require_relative 'db/fastadb.rb'
7
+
8
+ #require "set"
9
+ #require 'systemu'
10
+ #require 'json'
11
+
12
+ =begin
13
+
14
+ Extends the methods to be able to calculate the BFR and a consensus from the pileup
15
+
16
+ =end
17
+
18
+ class Bio::DB::Pileup
19
+
20
+ #attr_accessor :minumum_ratio_for_iup_consensus
21
+ #@minumum_ratio_for_iup_consensus = 0.20
22
+
23
+ #Returns a hash with the count of bases
24
+
25
+ def bases
26
+ return @bases if @bases
27
+ @bases = self.non_refs
28
+ #puts self.ref_count
29
+ @bases[self.ref_base.upcase.to_sym] = self.ref_count
30
+ @bases
31
+ end
32
+
33
+ def base_coverage
34
+ total = 0
35
+ @bases.each do |k,v|
36
+ total += v
37
+ end
38
+ total
39
+ end
40
+
41
+ def base_ratios
42
+ return @base_ratios if @base_ratios
43
+ bases = self.bases
44
+ @base_ratios = Hash.new
45
+ bases.each do |k,v|
46
+ @base_ratios[k] = v.to_f/self.base_coverage.to_f
47
+ end
48
+ @base_ratios
49
+ end
50
+
51
+ # returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
52
+ def consensus_iuap(minumum_ratio_for_iup_consensus)
53
+ minumum_ratio_for_iup_consensus
54
+ if @consensus_iuap.nil?
55
+ @consensus_iuap = self.ref_base.downcase
56
+ bases = self.bases
57
+ tmp = String.new
58
+ bases.each do |k,v|
59
+ tmp << k[0].to_s if v/self.coverage > minumum_ratio_for_iup_consensus
60
+ end
61
+ if tmp.length > 0
62
+ @consensus_iuap = Bio::NucleicAcid.to_IUAPC(tmp)
63
+ end
64
+ end
65
+ @consensus_iuap
66
+ end
67
+ end
68
+
69
+
70
+
71
+ class Bio::NucleicAcid
72
+
73
+
74
+
75
+ def self.to_IUAPC(bases)
76
+ #puts "TADA"
77
+ base = IUPAC_CODES[bases.to_s.downcase.chars.sort.uniq.join]
78
+ if base == nil
79
+ p "Invalid base! #{base}"
80
+ base = 'n' #This is a patch... as one of the scripts failed here.
81
+ end
82
+ base.upcase
83
+ end
84
+
85
+ def self.is_valid(code, base)
86
+ IUPAC_CODES[code.downcase].chars.include? base.downcase
87
+ end
88
+
89
+ end
90
+
91
+
92
+ #class Bio::DB::Sam::SAMException < RuntimeError
93
+
94
+ #end
95
+
96
+ class Bio::DB::Sam
97
+
98
+
99
+ attr_accessor :minumum_ratio_for_iup_consensus
100
+ attr_reader :cached_regions
101
+ #attr_accessor :pileup_cache
102
+ @minumum_ratio_for_iup_consensus = 0.20
103
+
104
+
105
+ #Same as mpilup, but it caches the pileup, so if you want several operations on the same set of regions
106
+ #the pile for different operations, it won't execute the mpilup command several times
107
+ #Whenever you finish using a region, call mpileup_clear_cache to free the cache
108
+ #The argument Region is required, as it will be the key for the underlying hash.
109
+ #We asume that the options are constant. If they are not, the cache mechanism may not be consistent.
110
+ #
111
+ #TODO: It may be good to load partially the pileup
112
+ def mpileup_cached (opts={})
113
+ raise SAMException.new(), "A region must be provided" unless opts[:r] or opts[:region]
114
+ @pileup_cache = Hash.new unless @pileup_cache
115
+ @cached_regions = Hash.new unless @cached_regions
116
+
117
+ region = opts[:r] ? opts[:r] : opts[:region]
118
+ opts[:r] = "#{region.to_s}"
119
+ opts[:region] = "#{region.to_s}"
120
+ opts[:A] = true
121
+ #reg = region.class == Bio::DB::Fasta::Region ? region : Bio::DB::Fasta::Region.parse_region(region.to_s)
122
+
123
+ unless @cached_regions[region.to_s]
124
+ @cached_regions[region.to_s] = Bio::DB::Fasta::Region.parse_region(region.to_s)
125
+ tmp = Array.new
126
+ @cached_regions[region.to_s].pileup = tmp
127
+ #puts "Loading #{region.to_s}"
128
+ mpileup(opts) do | pile |
129
+ # puts pile
130
+ tmp << pile
131
+ yield pile
132
+ end
133
+ else
134
+ # puts "Loaded, reruning #{region.to_s}"
135
+ @cached_regions.pileup[region.to_s] .each do | pile |
136
+ yield pile
137
+ end
138
+ end
139
+ end
140
+
141
+ #Clears the pileup cache. If a region is passed as argument, just the specified region is removed
142
+ #If no region is passed, the hash is emptied
143
+ def mpileup_clear_cache (region)
144
+ return unless @cached_regions
145
+ if region
146
+ @cached_regions[region.to_s] = nil
147
+ else
148
+ @cached_regions.clear
149
+ end
150
+ end
151
+
152
+ #Gets the coverage of a region from a pileup.
153
+ def average_coverage_from_pileup(opts={})
154
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
155
+ region = opts[:region]
156
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
157
+ @cached_regions[region].average_coverage
158
+ end
159
+
160
+ #
161
+ def coverages_from_pileup(opts={})
162
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
163
+ region = opts[:region]
164
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
165
+ @cached_regions[region].coverages
166
+ end
167
+
168
+ def consensus_with_ambiguities(opts={})
169
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
170
+ region = opts[:region]
171
+ # p "consensus with ambiguities for: " << opts[:region]
172
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
173
+ @cached_regions[region].consensus
174
+ end
175
+
176
+ def calculate_stats_from_pile(opts={})
177
+ min_cov = opts[:min_cov] ? opts[:min_cov] : 20
178
+
179
+
180
+ opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
181
+ region = opts[:region]
182
+
183
+ mark_case = true if opts[:case]
184
+ # puts "Marcase: #{mark_case}"
185
+ reference = self.fetch_reference(region.entry, region.start, region.end).downcase
186
+ # p "calculationg from pile..." << region.to_s
187
+ base_ratios = Array.new(region.size, BASE_COUNT_ZERO)
188
+ bases = Array.new(region.size, BASE_COUNT_ZERO)
189
+ coverages = Array.new(region.size, 0)
190
+ total_cov = 0
191
+
192
+ self.mpileup_cached(:region=>"#{region.to_s}") do | pile |
193
+ #puts pile
194
+ #puts pile.coverage
195
+ bef=reference[pile.pos - region.start - 1 ]
196
+ if pile.coverage > min_cov
197
+
198
+
199
+ base_ratios[pile.pos - region.start ] = pile.base_ratios
200
+ reference[pile.pos - region.start - 1 ] = pile.consensus_iuap(0.20).upcase
201
+ coverages[pile.pos - region.start ] = pile.coverage.to_i
202
+ bases[pile.pos - region.start ] = pile.bases
203
+
204
+
205
+ end
206
+ #puts "#{pile.pos}\t#{bef}\t#{reference[pile.pos - region.start - 1 ]} "
207
+ total_cov += pile.coverage
208
+ end
209
+
210
+ #puts ">Ref\n#{reference}"
211
+ #puts ">Original\n#{r}"
212
+ region = @cached_regions[region.to_s]
213
+ region.coverages = coverages
214
+ region.base_ratios = base_ratios
215
+ region.consensus = Bio::Sequence.new(reference)
216
+ region.consensus.na
217
+ if region.orientation == :reverse
218
+ region.consensus.reverse_complement!()
219
+ end
220
+ region.average_coverage = total_cov.to_f/region.size.to_f
221
+ region.bases = bases
222
+ region
223
+ end
224
+
225
+
226
+
227
+ #BASE_COUNT_ZERO = {:A => 0, :C => 0, :G => 0, :T => 0}
228
+
229
+ #Gets an array with the proportions of the bases in the region. If there is no coverage, a
230
+ def base_ratios_in_region(opts={})
231
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
232
+ region = opts[:region]
233
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
234
+ @cached_regions[region].base_ratios
235
+ end
236
+
237
+ #Gets an array with the bsaes count in the region. If there is no coverage, a
238
+ def bases_in_region(opts={})
239
+ opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
240
+ region = opts[:region]
241
+ calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
242
+ @cached_regions[region].bases
243
+ end
244
+
245
+
246
+
247
+ def extract_reads(opts={})
248
+ opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
249
+ fastq_filename = opts[:fastq]
250
+ fastq_file = opts[:fastq_file]
251
+
252
+ out = $stdout
253
+
254
+ print_fastq = Proc.new do |alignment|
255
+ out.puts "@#{alignment.qname}"
256
+ out.puts "#{alignment.seq}"
257
+ out.puts "+#{alignment.qname}"
258
+ out.puts "#{alignment.qual}"
259
+ end
260
+
261
+ fetch_with_function(chromosome, qstart, qstart+len, print_fastq)
262
+
263
+
264
+ end
265
+
266
+ end
267
+
268
+ class Bio::DB::Fasta::Region
269
+ attr_accessor :pileup, :average_coverage, :snps, :reference, :base_ratios, :consensus, :coverages, :bases
270
+
271
+ #TODO: Debug, as it hasnt been tested in the actual code.
272
+ def base_ratios_for_base(base)
273
+ @all_ratios = Hash.new unless @all_ratios
274
+ unless @all_ratios[base]
275
+ ratios = Array.new
276
+ for i in (0..region.size-1)
277
+ ratios << @base_ratios[i][base]
278
+ end
279
+ @all_ratios[base] = ratios
280
+ end
281
+ @all_ratios[base]
282
+ end
283
+
284
+ end
@@ -0,0 +1,272 @@
1
+ # RYO %S\t%pi\t%ql\t%tl\t%g\t%V\n
2
+
3
+
4
+ module Bio::DB::Exonerate
5
+
6
+
7
+ #TODO: Make a proper object with generic parser
8
+ def self.align(opts={})
9
+ opts = {
10
+ :model => 'affine:local' ,
11
+ :ryo => "RESULT:\\t%S\\t%pi\\t%ql\\t%tl\\t%g\\t%V\\n" ,
12
+ :bestn => 20,
13
+ :percentage => 50
14
+ }
15
+ .merge(opts)
16
+
17
+ target=opts[:target]
18
+ query=opts[:query]
19
+
20
+
21
+ cmdline = "exonerate --verbose 0 --showalignment no --bestn #{opts[:bestn]} --showvulgar no --model #{opts[:model]} --ryo '#{opts[:ryo]}' #{query} #{target}"
22
+ status, stdout, stderr = systemu cmdline
23
+ #$stderr.puts cmdline
24
+ if status.exitstatus == 0
25
+ alns = Array.new unless block_given?
26
+ stdout.each_line do |line|
27
+ aln = Alignment.parse_custom(line)
28
+ if aln
29
+ if block_given?
30
+ yield aln
31
+ else
32
+ alns << aln
33
+ end
34
+ end
35
+ end
36
+ return alns unless block_given?
37
+ else
38
+ raise ExonerateException.new(), "Error running exonerate. Command line was '#{cmdline}'\nExonerate STDERR was:\n#{stderr}"
39
+ end
40
+ end
41
+
42
+
43
+ class ExonerateException < RuntimeError
44
+ end
45
+
46
+ class Alignment
47
+ attr_accessor :query_id, :query_start, :query_end, :query_strand
48
+ attr_accessor :target_id, :target_start, :target_end, :target_strand, :score
49
+ attr_accessor :vulgar_block, :pi, :ql, :tl, :g
50
+ attr_accessor :line
51
+
52
+ #This one day may grow to work with complex ryo....
53
+ def self.parse_custom(line)
54
+ fields=line.split(/\t/)
55
+ if fields[0] == "RESULT:"
56
+ al = Bio::DB::Exonerate::Alignment.new()
57
+ al.parse_sugar(fields[1])
58
+ al.pi = fields[2].to_f
59
+ al.ql = fields[3].to_i
60
+ al.tl = fields[4].to_i
61
+ al.g = fields[5]
62
+ al.parse_vulgar(fields[6])
63
+ al.line = line
64
+ return al
65
+ else
66
+ return nil
67
+ end
68
+ end
69
+
70
+ def identity
71
+ @pi
72
+ end
73
+
74
+ def parse_sugar(sugar_str)
75
+ @query_id, @query_start, @query_end, @query_strand, @target_id, @target_start, @target_end, @target_strand, @score = sugar_str.split(/\s+/)
76
+
77
+ @query_start = @query_start.to_i
78
+ @query_end = @query_end.to_i
79
+ @target_start = @target_start.to_i
80
+ @target_end = @target_end.to_i
81
+ @score = @score.to_f
82
+
83
+ if @target_strand == "+"
84
+ @target_strand = :forward
85
+ elsif @target_strand == "-"
86
+ @target_strand = :reverse
87
+ else
88
+ raise ExonerateException.new(), "Ivalid target orientation #{@target_strand} for line:\n#{sugar_str}"
89
+ end
90
+
91
+
92
+ if @query_strand == "+"
93
+ @query_strand = :forward
94
+ elsif @query_strand == "-"
95
+ @query_strand = :reverse
96
+ else
97
+ raise ExonerateException.new(), "Ivalid query orientation #{@query_strand} for line:\n#{sugar_str}"
98
+ end
99
+
100
+ raise ExonerateException.new(), "Inconsistent orientation (forward, query)" if @query_strand == :forward and @query_start > @query_end
101
+ raise ExonerateException.new(), "Inconsistent orientation (reverse, query)" if @query_strand == :reverse and @query_start < @query_end
102
+ raise ExonerateException.new(), "Inconsistent orientation (forward, target)" if @target_strand == :forward and @target_start > @target_end
103
+ raise ExonerateException.new(), "Inconsistent orientation (reverse, target)" if @target_strand == :reverse and @target_start < @target_end
104
+
105
+
106
+ self
107
+ end
108
+
109
+
110
+ #The vulgar has to be parsed AFTER the sugar, otherwise it is impossible to determine the orientations
111
+ def parse_vulgar(vulgar_str)
112
+
113
+ tarcurrent = @target_start
114
+ query_current = @query_start
115
+ target_multiply = 1
116
+ query_multiply = 1
117
+
118
+ if @target_strand == :reverse
119
+ target_multiply = -1
120
+ end
121
+
122
+ if @query_strand == :reverse
123
+ query_multiply = -1
124
+ end
125
+
126
+ @vulgar_block = Array.new
127
+ # p "VULGAR #{vulgar_str}"
128
+ vulgar_str.split(/\s/).each_slice(3) do | block |
129
+ # p block
130
+ vulgar = Vulgar.new(block[0].to_sym, block[1].to_i, block[2].to_i, tarcurrent, target_multiply, query_current, query_multiply, self)
131
+ query_current = vulgar.query_end
132
+ tarcurrent = vulgar.target_end
133
+ vulgar_block << vulgar
134
+ end
135
+ self
136
+ end
137
+
138
+ #This assumes that the gene is the query and the chromosome is the target
139
+ def exon_on_gene_position(position)
140
+ @vulgar_block.each do |vulgar|
141
+ if position.between?(vulgar.query_start, vulgar.query_end)
142
+ return vulgar
143
+ end
144
+ end
145
+ nil
146
+ end
147
+
148
+ def tarpostion_from_query_position(position)
149
+ ret = nil
150
+ vulgar_block = exon_on_gene_position(position)
151
+ ret
152
+ end
153
+
154
+ def print_features
155
+ out = String.new
156
+
157
+ @vulgar_block.each do | vulgar |
158
+ out << vulgar.to_s << "\n"
159
+ end
160
+ out
161
+ end
162
+ end
163
+
164
+
165
+ class Vulgar
166
+ attr_reader :label, :query_length, :target_length, :query_start, :query_end, :target_start, :target_end, :record, :snp_in_gap
167
+ def initialize(label, ql, tl, target_start, target_multiply, query_start, query_multiply, record)
168
+ @label = label
169
+ @query_length = ql
170
+ @target_length = tl
171
+ @query_start = query_start
172
+ @query_end = query_start + (query_multiply * query_length)
173
+ @target_start = target_start
174
+ @target_end = target_start + (target_multiply * target_length)
175
+ @record = record
176
+ @snp_in_gap = false
177
+ end
178
+
179
+ def to_s
180
+ out = String.new
181
+ out << @label.to_s << "\t" << @query_length.to_s << "\t" << @target_length.to_s << "\t" << @query_start.to_s << "\t" << @query_end.to_s << "\t" << @target_start.to_s << "\t" << @target_end.to_s
182
+ out
183
+ end
184
+
185
+ def query_id
186
+ record.query_id
187
+ end
188
+
189
+ def target_id
190
+ record.target_id
191
+ end
192
+
193
+ def target_flanking_region_from_position(position, flanking_size)
194
+ reg = reg = Bio::DB::Fasta::Region.new()
195
+ reg.entry = target_id
196
+ target_snp_pos = target_position_from_query(position)
197
+ return nil if snp_in_gap
198
+ reg.orientation = record.target_strand
199
+ reg.start = target_snp_pos - flanking_size
200
+ reg.end = target_snp_pos + flanking_size
201
+ raise ExonerateException.new "Target Query out of bounds!" unless position.between?(query_start, query_end)
202
+ #puts "Flanking region for #{position} in exon between ( #{query_id}:#{query_start}-#{query_end}), the target in #{target_snp_pos} ( #{target_id}:#{target_start}-#{target_end}) "
203
+
204
+
205
+ reg
206
+ end
207
+
208
+ def target_position_from_query(position)
209
+ raise ExonerateException.new(), "Position: #{position} not in range (#{query_start}-#{query_end}) #{self.to_s} " unless position.between?(query_start, query_end) or position.between?(query_end, query_start)
210
+ offset = 0
211
+ ret = 0
212
+ if record.query_strand == :forward
213
+ offset = position - query_start
214
+ elsif record.query_strand == :reverse
215
+ offset = query_start - position
216
+ else
217
+ raise ExonerateException.new(), "The strand is not forward or reverse (#{record.query_strand}) ! #{self.inspect}"
218
+ end
219
+
220
+ if record.target_strand == :forward
221
+ ret = target_start + offset
222
+ elsif record.target_strand == :reverse
223
+ ret = target_start - offset + 1
224
+ else
225
+ raise ExonerateException.new(), "The strand is not forward or reverse! #{self.inspect}"
226
+ end
227
+ #THis is in case the position is on a gap.
228
+ if @target_length == 0 and label == :G
229
+ #puts "Returning nil"
230
+ @snp_in_gap = true
231
+ ret = target_start
232
+ end
233
+ raise ExonerateException.new(), "Return position #{ret} outside block (#{target_start}-#{target_end}, #{self.inspect})" unless ret.between?(target_start, target_end) or ret.between?(target_end, target_start)
234
+ ret
235
+ end
236
+
237
+ def query_region
238
+ reg = Bio::DB::Fasta::Region.new()
239
+ reg.entry = query_id
240
+ reg.orientation = record.query_strand
241
+ if record.query_strand == :forward
242
+ reg.start = @query_start + 1
243
+ reg.end = @query_end
244
+ elsif record.query_strand == :reverse
245
+ reg.start = @query_end + 1
246
+ reg.end = @query_start
247
+ else
248
+ raise ExonerateException.new(), "Ivalid query orientation #{@query_strand}"
249
+ end
250
+ reg
251
+ end
252
+
253
+ def target_region
254
+ reg = Bio::DB::Fasta::Region.new()
255
+
256
+ reg.entry = target_id
257
+ reg.orientation = record.target_strand
258
+ if record.target_strand == :forward
259
+ reg.start = @target_start + 1
260
+ reg.end = @target_end
261
+ elsif record.target_strand == :reverse
262
+ reg.start = @target_end + 1
263
+ reg.end = @target_start
264
+ else
265
+ raise ExonerateException.new(), "Ivalid target orientation #{@target_strand}"
266
+ end
267
+ reg
268
+ end
269
+
270
+ end
271
+
272
+ end