bio-polyploid-tools 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,164 @@
1
+ #Module to hold the information about the fasta file
2
+
3
+ module Bio::DB::Fasta
4
+ class Index
5
+ include Enumerable
6
+ attr_reader :entries
7
+
8
+ def initialize
9
+ @entries=[]
10
+ @entries_map = Hash.new
11
+ end
12
+
13
+ #This doesnt validate if you are adding the same entry twice. I may add
14
+ #a validation for that.
15
+ def << (entry)
16
+ @entries << entry
17
+ @entries_map[entry.id] = entry
18
+ end
19
+
20
+ def each(&block)
21
+ @entries.entries(&block)
22
+ end
23
+
24
+ def length
25
+ @entries.length
26
+ end
27
+
28
+ #Returns a new Index just with the specified range, as if it was an Array.
29
+ #The return object is of type Index.
30
+ def [](args)
31
+ tmp = @entries[args]
32
+ new_index = Index.new
33
+ tmp.each do | entry |
34
+ @new_index << entry
35
+ end
36
+ end
37
+
38
+ def region_for_entry(entry)
39
+ @entries_map[entry]
40
+ end
41
+ end
42
+
43
+ class Entry
44
+ attr_reader :id, :length
45
+
46
+ def initialize(id, length)
47
+ @id=id
48
+ @length=length.to_i
49
+ end
50
+
51
+ def get_full_region
52
+ reg = Region.new
53
+ reg.entry = id
54
+ reg.start = 0
55
+ reg.end = @length
56
+ reg.orientation = :forward
57
+ reg
58
+ end
59
+
60
+ def to_region
61
+ get_full_region
62
+ end
63
+ end
64
+
65
+ #Class to wrap a region of a chromosome
66
+ class Region
67
+ attr_accessor :entry, :start, :end, :orientation
68
+
69
+ def to_s
70
+ string = @entry + ":" + @start.to_s + "-" + @end.to_s
71
+ string
72
+ end
73
+
74
+ def self.parse_region(reg_str)
75
+ string = reg_str.delete("'")
76
+ fields_1 = string.split(":")
77
+ fields_2 = fields_1[1].split("-")
78
+ raise FastaDBException.new(), "Invalid region. #{string}" if fields_1.length != 2 || fields_2.length != 2
79
+
80
+ reg = Region.new
81
+ reg.entry = fields_1[0]
82
+ reg.start = fields_2[0].to_i
83
+ reg.end = fields_2[1].to_i
84
+
85
+ if reg.end < reg.start
86
+ reg.orientation = :reverse
87
+ else
88
+ reg.orientation = :forward
89
+ end
90
+ reg
91
+ end
92
+
93
+ def size
94
+ @end - @start
95
+ end
96
+
97
+ end
98
+
99
+ class FastaDBException < StandardError; end
100
+
101
+ #Class that holds the fasta file. It is used as a database. It heavily relies ond samtools.
102
+ class FastaFile
103
+
104
+ attr_reader :index, :fasta_path
105
+
106
+ def FastaFile.finalize(id)
107
+ #id.close()
108
+ #puts "Finalizing #{id} at #{Time.new}"
109
+ end
110
+
111
+ def initialize(fasta_filename)
112
+ @fasta_path = fasta_filename
113
+ raise FastaDBException.new(), "No path for the refernce fasta file. " if @fasta_path.nil?
114
+ @fasta_index = Bio::DB::SAM::Tools.fai_load(@fasta_path)
115
+ if @fasta_index.null? then
116
+ $stderr.puts "Generating index for: " + @fasta_path
117
+ Bio::DB::SAM::Tools.fai_build(@fasta_path)
118
+ @fasta_index = Bio::DB::SAM::Tools.fai_load(@fasta_path)
119
+ raise FastaDBException.new(), "Unable to generate fasta index for: " + @fasta_path if @fasta_index.nil? || @fasta_index.null?
120
+ end
121
+ ObjectSpace.define_finalizer(self, self.class.method(:finalize).to_proc)
122
+ end
123
+
124
+ def load_fai_entries()
125
+ return @index.length if @index
126
+ @index = Index.new
127
+ fai_file = @fasta_path + ".fai"
128
+ File.open(fai_file).each do | line |
129
+ fields = line.split("\t")
130
+ @index << Entry.new(fields[0], fields[1])
131
+
132
+ end
133
+ @index.length
134
+ end
135
+
136
+ def close()
137
+ Bio::DB::SAM::Tools.fai_destroy(@fasta_index) unless @fasta_index.nil? || @fasta_index.null?
138
+ @fasta_index = nil
139
+ end
140
+
141
+ #The region needs to have a method to_region or a method to_s that ha the format "chromosome:start-end" as in samtools
142
+ def fetch_sequence(region)
143
+
144
+ raise FastaDBException.new(), "No fasta index for " if @fasta_index.nil? || @fasta_index.null?
145
+ query = region.to_s
146
+ query = region.to_region.to_s if region.respond_to?(:to_region)
147
+
148
+ len = FFI::MemoryPointer.new :int
149
+ str = Bio::DB::SAM::Tools.fai_fetch(@fasta_index, query, len)
150
+ raise FastaDBException.new(), "Unable to get sequence for reference: " + query if str.nil?
151
+ reference = Bio::Sequence.auto(str)
152
+
153
+ #
154
+
155
+ if region.orientation == :reverse
156
+ #puts "reversing! #{reference.to_s}"
157
+ reference.reverse_complement!()
158
+ end
159
+ reference
160
+ end
161
+
162
+
163
+ end
164
+ end
@@ -0,0 +1,673 @@
1
+
2
+ module Bio::DB::Primer3
3
+ class Primer3Exception < RuntimeError
4
+ end
5
+
6
+ def self.run(opts={})
7
+ puts "Primer3.run running..."
8
+
9
+ f_in=opts[:in]
10
+ f_out=opts[:out]
11
+ primer_3_in = File.read(f_in)
12
+ status = systemu "primer3_core", 0=>primer_3_in, 1=>stdout='', 2=>stderr=''
13
+ # $stderr.puts cmdline
14
+ if status.exitstatus == 0
15
+ File.open(f_out, 'w') { |f| f.write(stdout) }
16
+ else
17
+ raise Primer3Exception.new(), "Error running primer3. Command line was 'primer3_core'\nPrimer3 STDERR was:\n#{stderr}"
18
+ end
19
+ end
20
+
21
+ class SNP
22
+
23
+ attr_accessor :gene, :original, :position, :snp, :chromosome, :line_1, :line_2
24
+ attr_accessor :primer3_line_1, :primer3_line_2, :template_length
25
+ attr_accessor :primers_line_1, :primers_line_2
26
+ attr_accessor :used_contigs
27
+ attr_accessor :snp_from
28
+ attr_accessor :regions
29
+ attr_accessor :primer3_errors
30
+
31
+ def line_1_name
32
+ "#{gene}:#{position}#{original}>#{snp} #{line_1}}"
33
+ end
34
+
35
+ def initialize
36
+ @primers_line_1 = SortedSet.new
37
+ @primers_line_2 = SortedSet.new
38
+ @reguibs = SortedSet.new
39
+ end
40
+
41
+ def line_2_name
42
+ "#{gene}:#{position}#{original}>#{snp} #{line_2}}"
43
+ end
44
+
45
+ def to_s
46
+ "#{gene}:#{original}#{position}#{snp}:#{snp_from.chromosome}"
47
+ end
48
+
49
+ def find_left_primer_temp(primer)
50
+ primers_line_1.each do |pr|
51
+ return pr.find_left_tm(primer) if pr.find_left_tm(primer)
52
+ end
53
+ primers_line_2.each do |pr|
54
+ return pr.find_left_tm(primer) if pr.find_left_tm(primer)
55
+ end
56
+ return "NA"
57
+ end
58
+
59
+
60
+ def find_primer_pair_first
61
+ primers_line_1.each do |pr|
62
+ primer = pr.left_primer_snp(self)
63
+ return pr if find_left_primer_temp(primer) != "NA"
64
+ end
65
+ nil
66
+ end
67
+
68
+ def find_primer_pair_second
69
+ primers_line_2.each do |pr|
70
+ primer = pr.left_primer_snp(self)
71
+ return pr if find_left_primer_temp(primer) != "NA"
72
+ end
73
+ nil
74
+ end
75
+
76
+
77
+ def print_primers
78
+ #TODO: Retrieve error messages
79
+ left_start = 0
80
+ left_end = 0
81
+ right_start = 0
82
+ right_end = 0
83
+ # exons = snp_from.exon_list.values
84
+
85
+ # puts "Exons: #{exon_list.size}"
86
+
87
+ # puts "It has the following exons: #{snp_in.exon_list.to_s}"
88
+ values = Array.new
89
+ #values << "#{gene},,#{template_length},"
90
+ values << gene
91
+ values << "#{original}#{position}#{snp}"
92
+ values << template_length
93
+ values << snp_from.chromosome
94
+ values << regions.size
95
+ values << regions.join("|")
96
+ if primer3_line_1 and primer3_line_2
97
+ values << primer3_line_1.polymorphism
98
+
99
+ #Block that searches both if both pairs have a TM
100
+ primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
101
+ primer_2_tm = find_left_primer_temp(primer_2)
102
+ primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
103
+ primer_1_tm = find_left_primer_temp(primer_1)
104
+ # $stderr.puts primer_1
105
+ # $stderr.puts primer_2
106
+ if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
107
+ values << primer3_line_1.left_primer
108
+ values << primer_2
109
+ values << primer3_line_1.right_primer
110
+ values << primer3_line_1.type.to_s
111
+ values << primer3_line_1.orientation.to_s
112
+ values << primer3_line_1.shortest_pair.left.tm
113
+ values << primer_2_tm
114
+ values << primer3_line_1.shortest_pair.right.tm
115
+ values << "first"
116
+ values << primer3_line_1.shortest_pair.product_size
117
+ elsif primer_1_tm != "NA"
118
+ values << primer_1
119
+ values << primer3_line_2.left_primer
120
+ values << primer3_line_2.right_primer
121
+ values << primer3_line_2.type.to_s
122
+ values << primer3_line_2.orientation.to_s
123
+ values << primer_1_tm
124
+ values << primer3_line_2.shortest_pair.left.tm
125
+ values << primer3_line_2.shortest_pair.right.tm
126
+ values << "second"
127
+ values << primer3_line_2.shortest_pair.product_size
128
+ else
129
+ first_candidate = find_primer_pair_first
130
+ second_candidate = find_primer_pair_second
131
+
132
+ if first_candidate
133
+ primer_2 = primer3_line_2.left_primer_with_coordinates(first_candidate.left_coordinates, first_candidate.orientation)
134
+ primer_2_tm = find_left_primer_temp(primer_2)
135
+ end
136
+ if second_candidate
137
+ primer_1 = primer3_line_1.left_primer_with_coordinates(second_candidate.left_coordinates, second_candidate.orientation)
138
+ primer_1_tm = find_left_primer_temp(primer_1)
139
+ end
140
+
141
+ if first_candidate and second_candidate and first_candidate < second_candidate
142
+ values << first_candidate.left_primer
143
+ values << primer_2
144
+ values << first_candidate.right_primer
145
+ values << first_candidate.type.to_s
146
+ values << first_candidate.orientation.to_s
147
+ values << first_candidate.shortest_pair.left.tm
148
+ values << primer_2_tm
149
+ values << first_candidate.shortest_pair.right.tm
150
+ values << "first"
151
+ values << first_candidate.shortest_pair.product_size
152
+ elsif second_candidate
153
+ values << primer_1
154
+ values << second_candidate.left_primer
155
+ values << second_candidate.right_primer
156
+ values << second_candidate.type.to_s
157
+ values << second_candidate.orientation.to_s
158
+ values << primer_1_tm
159
+ values << second_candidate.shortest_pair.left.tm
160
+ values << second_candidate.shortest_pair.right.tm
161
+ values << "second"
162
+ values << second_candidate.shortest_pair.product_size
163
+ elsif first_candidate
164
+ values << primer_2
165
+ values << first_candidate.left_primer
166
+ values << first_candidate.right_primer
167
+ values << first_candidate.type.to_s
168
+ values << first_candidate.orientation.to_s
169
+ values << primer_2_tm
170
+ values << first_candidate.shortest_pair.left.tm
171
+ values << first_candidate.shortest_pair.right.tm
172
+ values << "first"
173
+ values << first_candidate.shortest_pair.product_size
174
+ # else
175
+ # values << ""
176
+ end
177
+
178
+ end
179
+
180
+ elsif primer3_line_1
181
+ values << primer3_line_1.polymorphism
182
+ values << primer3_line_1.left_primer
183
+ values << primer3_line_1.left_primer_snp(self)
184
+ values << primer3_line_1.right_primer
185
+ values << primer3_line_1.type.to_s
186
+ values << primer3_line_1.orientation.to_s
187
+ values << primer3_line_1.shortest_pair.left.tm
188
+ values << "NA"
189
+ values << primer3_line_1.shortest_pair.right.tm
190
+
191
+ values << "first+"
192
+ values << primer3_line_1.shortest_pair.product_size
193
+ elsif primer3_line_2
194
+ values << primer3_line_2.polymorphism
195
+ values << primer3_line_2.left_primer_snp(self)
196
+ values << primer3_line_2.left_primer
197
+ values << primer3_line_2.right_primer
198
+ values << primer3_line_2.type.to_s
199
+ values << primer3_line_2.orientation.to_s
200
+ values << "NA"
201
+ values << primer3_line_2.shortest_pair.left.tm
202
+ values << primer3_line_2.shortest_pair.right.tm
203
+ values << "second+"
204
+ values << primer3_line_2.shortest_pair.product_size
205
+
206
+ end
207
+ values.join(",")
208
+ end
209
+
210
+ def self.parse(reg_str)
211
+ reg_str.chomp!
212
+ snp = SNP.new
213
+ snp.gene, snp.original, snp.position, snp.snp = reg_str.split(",")
214
+ snp.position = snp.position.to_i
215
+ snp.original.upcase!
216
+ snp.snp.upcase!
217
+ snp
218
+ end
219
+
220
+ def self.parse_file(filename)
221
+ File.open(filename) do | f |
222
+ f.each_line do | line |
223
+ snp = SNP.parse(line)
224
+ if snp.position > 0
225
+ yield snp
226
+ end
227
+ end
228
+ end
229
+ end
230
+
231
+
232
+ def add_record(primer3record)
233
+ @primer3_errors = Array.new unless @primer3_errors
234
+ @template_length = primer3record.sequence_template.size
235
+ if primer3record.primer_error != nil
236
+ primer3_errors << primer3record
237
+ return
238
+ end
239
+ case
240
+ when primer3record.line == @line_1
241
+ @line_1_template = primer3record.sequence_template
242
+ when primer3record.line == @line_2
243
+ @line_2_template = primer3record.sequence_template
244
+ else
245
+ raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
246
+ end
247
+
248
+ if primer3record.primer_left_num_returned.to_i > 0
249
+ case
250
+ when primer3record.line == @line_1
251
+ primers_line_1 << primer3record
252
+ @primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
253
+ when primer3record.line == @line_2
254
+ primers_line_1 << primer3record
255
+ @primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
256
+ else
257
+ raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
258
+ end
259
+ end
260
+ end
261
+ end
262
+
263
+ class Primer3Record
264
+ include Comparable
265
+ attr_accessor :properties, :polymorphism
266
+
267
+ def shortest_pair
268
+ return @shortest_pair if @shortest_pair
269
+ @shortest_pair = nil
270
+ @primerPairs.each do | primer |
271
+ @shortest_pair = primer if @shortest_pair == nil
272
+ @shortest_pair = primer if primer.size < @shortest_pair.size
273
+ end
274
+ @shortest_pair
275
+ end
276
+
277
+ def primer_error
278
+ return @properties[:primer_error] if @properties[:primer_error]
279
+ return nil
280
+ end
281
+
282
+ def method_missing(method_name, *args)
283
+ return @properties[method_name] if @properties[method_name]
284
+ $stderr.puts "Missing #{method_name}"
285
+ $stderr.puts @properties.inspect
286
+ raise NoMethodError.new()
287
+ end
288
+
289
+ def find_left_tm(primer)
290
+ last = size - 1
291
+ (0..last).each do | i |
292
+ seq_prop = "primer_left_#{i}_sequence".to_sym
293
+ # $stderr.puts seq_prop
294
+ temp_property = "primer_left_#{i}_tm".to_sym
295
+ # $stderr.puts "comparing #{@properties[seq_prop] } == #{primer}"
296
+ return @properties[temp_property] if @properties[seq_prop] == primer
297
+
298
+ end
299
+ return nil
300
+ end
301
+
302
+ def <=>(anOther)
303
+ ret = snp <=> anOther.snp
304
+ return ret if ret != 0
305
+
306
+
307
+ #Sorting by the types.
308
+ if type == :chromosome_specific
309
+ if anOther.type != :chromosome_specific
310
+ return -1
311
+ end
312
+ elsif type == :chromosome_semispecific
313
+ if anOther.type == :chromosome_specific
314
+ return 1
315
+ else anOther.type == :chromosome_nonspecific
316
+ return -1
317
+ end
318
+ elsif type == :chromosome_nonspecific
319
+ if anOther.type != :chromosome_nonspecific
320
+ return 1
321
+ end
322
+ end
323
+
324
+ #Sorting if it is in intron or not This will give priority
325
+ #to the cases when we know for sure the sequence from the line
326
+ #and reduce the chances of getting messed with a short indel
327
+ if self.exon?
328
+ unless anOther.exon?
329
+ return -1
330
+ end
331
+ else
332
+ if anOther.exon?
333
+ return 1
334
+ end
335
+ end
336
+
337
+ #Sorting for how long the product is, the shorter, the better
338
+ return product_length <=> anOther.product_length
339
+
340
+ end
341
+
342
+ def parse_coordinates(str)
343
+ coords = str.split(',')
344
+ coords[0] = coords[0].to_i
345
+ coords[1] = coords[1].to_i
346
+ coords
347
+ end
348
+
349
+
350
+ def left_coordinates
351
+ #@left_coordinates = parse_coordinates(self.primer_left_0) unless @left_coordinates
352
+ @left_coordinates = shortest_pair.left.coordinates
353
+ @left_coordinates
354
+ end
355
+
356
+ def right_coordinates
357
+ unless @right_coordinates
358
+ @right_coordinates = shortest_pair.right.coordinates
359
+ @right_coordinates[0] = @right_coordinates[0] - @right_coordinates[1] + 1
360
+ end
361
+ @right_coordinates
362
+ end
363
+
364
+ def left_primer
365
+ #@left_primer = self.sequence_template[left_coordinates[0],left_coordinates[1]] unless @left_primer
366
+ @left_primer = shortest_pair.left.sequence
367
+ @left_primer
368
+ end
369
+
370
+ def left_primer_snp(snp)
371
+ tmp_primer = String.new(left_primer)
372
+ if self.orientation == :forward
373
+ base_original = snp.original
374
+ base_snp = snp.snp
375
+ elsif self.orientation == :reverse
376
+ base_original = reverse_complement_string(snp.original )
377
+ base_snp = reverse_complement_string(snp.snp)
378
+ else
379
+ raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
380
+ end
381
+
382
+ # puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
383
+ if tmp_primer[-1] == base_original
384
+ tmp_primer[-1] = base_snp
385
+ elsif tmp_primer[-1] == base_snp
386
+ tmp_primer[-1] = base_original
387
+ else
388
+ raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
389
+ end
390
+ return tmp_primer
391
+ end
392
+
393
+ def left_primer_with_coordinates(coordinates, other_orientation)
394
+
395
+ seq = self.sequence_template
396
+
397
+ seq = reverse_complement_string(seq) if self.orientation != other_orientation
398
+
399
+ seq[coordinates[0],coordinates[1]]
400
+ end
401
+
402
+ def reverse_complement_string(sequenc_str)
403
+ complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
404
+ complement.reverse!
405
+ end
406
+
407
+ def right_primer_delete
408
+ @right_primer = self.sequence_template[right_coordinates[0],right_coordinates[1]] unless @right_primer
409
+ @right_primer = reverse_complement_string(@right_primer)
410
+ @right_primer
411
+ end
412
+
413
+ def right_primer
414
+ return shortest_pair.right.sequence
415
+ end
416
+
417
+ def product_length
418
+ return shortest_pair.size
419
+ end
420
+
421
+ def initialize
422
+ @properties = Hash.new
423
+ end
424
+
425
+ def snp
426
+ return @snp if @snp
427
+ parse_header
428
+ @snp
429
+ end
430
+
431
+ #CL3339Contig1:T509C AvocetS chromosome_specific exon 4D forward
432
+ def parse_header
433
+ #puts "Parsing header: '#{self.sequence_id}'"
434
+ @snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = self.sequence_id.split(" ")
435
+ @type = @type.to_sym
436
+ if @in
437
+ @in = @in.to_sym == :exon
438
+ else
439
+ @exon = false
440
+ end
441
+
442
+ if @polymorphism.to_sym == :homeologous
443
+ @homeologous = true
444
+ else
445
+ @homeologous = false
446
+ end
447
+ @parsed = true
448
+ @orientation = @orientation.to_sym
449
+ end
450
+
451
+ def orientation
452
+ return @orientation if @parsed
453
+ parse_header
454
+ @orientation
455
+ end
456
+
457
+ def chromosome
458
+ return @chromosome if @parsed
459
+ parse_header
460
+ @chromosome
461
+ end
462
+
463
+ def homeologous?
464
+ return @homeologous if @parsed
465
+ parse_header
466
+ @homeologous
467
+ end
468
+
469
+ def type
470
+ return @type if @parsed
471
+ parse_header
472
+ @type
473
+ end
474
+
475
+ def exon?
476
+ return @exon if @parsed
477
+ parse_header
478
+ @exon
479
+ end
480
+
481
+ def line
482
+ return @line if @parsed
483
+ parse_header
484
+ @line
485
+ end
486
+
487
+ def size
488
+ @properties[:primer_pair_num_returned].to_i
489
+ end
490
+
491
+ def parse_blocks
492
+ total_blocks = size - 1
493
+ @primerPairs = Array.new
494
+ for i in 0..total_blocks
495
+ @primerPairs << PrimerPair.new(self, i)
496
+ end
497
+
498
+ end
499
+
500
+ def self.parse_file(filename)
501
+ File.open(filename) do | f |
502
+ record = Primer3Record.new
503
+ f.each_line do | line |
504
+ line.chomp!
505
+ if line == "="
506
+
507
+ record.parse_blocks
508
+ yield record
509
+ record = Primer3Record.new
510
+ else
511
+ tokens = line.split("=")
512
+ i = 0
513
+ reg = ""
514
+ #TODO: Look if there is a join function or something similar to go around this...
515
+ tokens.each do |tok|
516
+ if i > 0
517
+ if i > 1
518
+ reg << "="
519
+ end
520
+ reg << tok
521
+ end
522
+ i+=1
523
+ end
524
+ record.properties[tokens[0].downcase.to_sym] = reg
525
+ end
526
+ end
527
+ end
528
+ end
529
+ end
530
+
531
+
532
+ class Primer
533
+ attr_accessor :pair
534
+ def initialize
535
+ @values = Hash.new
536
+ end
537
+
538
+ def method_missing(m, *args, &block)
539
+
540
+ return @values[m.to_s] if @values[m.to_s] != nil
541
+ raise NoMethodError.new(), "There's no method called #{m}, available: #{@values.keys.to_s}."
542
+ end
543
+
544
+ def set_value(key, value)
545
+ @values[key] = value
546
+ end
547
+
548
+
549
+
550
+ end
551
+
552
+ class PrimerPair
553
+
554
+ attr_reader :record
555
+ attr_reader :left, :right
556
+
557
+ def parse_coordinates(str)
558
+ coords = str.split(',')
559
+ coords[0] = coords[0].to_i
560
+ coords[1] = coords[1].to_i
561
+ coords
562
+ end
563
+
564
+ def size
565
+ return product_size.to_i
566
+ end
567
+
568
+ def initialize(record, index)
569
+ raise Primer3Exception.new(), "Index #{index} is greater than the number of records" unless index < record.size
570
+ @record = record
571
+ @left = Primer.new
572
+ @right = Primer.new
573
+ @values = Hash.new
574
+
575
+
576
+ @left.set_value("added", false)
577
+ @right.set_value("added", false)
578
+ @left.pair = self
579
+ @right.pair = self
580
+ index_s = index.to_s
581
+ record.properties.each do |key, value|
582
+ tokens = key.to_s.split("_")
583
+ if tokens.size > 2 and tokens[2] == index_s
584
+ primer = nil
585
+ primer = @right if tokens[1] == "right"
586
+ primer = @left if tokens[1] == "left"
587
+ if primer != nil
588
+ primer.set_value("added", true)
589
+ if tokens.size == 3
590
+ primer.set_value("coordinates", parse_coordinates(value) )
591
+ else
592
+
593
+ to_add = value
594
+ to_add = value.to_f unless tokens[3]=="sequence"
595
+ n_key = tokens[3..6].join("_")
596
+ primer.set_value(n_key, to_add)
597
+ end
598
+ else
599
+ n_key = tokens[3..6].join("_")
600
+ @values[n_key] = value
601
+ end
602
+
603
+ end
604
+ end
605
+
606
+ raise Primer3Exception.new(), "The pair is not complete (l:#{left.added}, r:#{right.added})" if @left.added == false or @right.added == false
607
+
608
+ end
609
+
610
+ def method_missing(m, *args, &block)
611
+
612
+ return @values[m.to_s] if @values[m.to_s]
613
+ raise NoMethodError.new(), "There's no method called #{m}. Available methods: #{@values.keys.to_s}"
614
+ end
615
+ end
616
+
617
+ class KASPContainer
618
+
619
+ attr_accessor :line_1, :line_2
620
+ attr_accessor :snp_hash
621
+
622
+
623
+ def add_snp_file(filename)
624
+ @snp_hash=Hash.new unless @snp_hash
625
+ SNP.parse_file(filename) do |snp|
626
+ @snp_hash[snp.to_s] = snp
627
+ snp.line_1 = @line_1
628
+ snp.line_2 = @line_2
629
+ end
630
+ end
631
+
632
+ def add_snp(snp_in)
633
+ @snp_hash=Hash.new unless @snp_hash
634
+ snp = SNP.new
635
+ snp.gene = snp_in.gene
636
+ snp.original = snp_in.original
637
+
638
+ snp.position = snp_in.position
639
+ snp.snp = snp_in.snp
640
+
641
+ # snp.original.upcase!
642
+ # snp.snp.upcase!
643
+ snp.line_1 = @line_1
644
+ snp.line_2 = @line_2
645
+ snp.snp_from = snp_in
646
+ #puts "Kasp container, adding #{snp.to_s} #{snp.class} #{snp_in.class}"
647
+ #puts "#{snp.regions}"
648
+ snp.regions = snp_in.exon_list.values.collect { |x| x.target_region.to_s }
649
+ #puts "#{snp.regions}"
650
+ @snp_hash[snp.to_s] = snp
651
+ snp
652
+ end
653
+
654
+ def add_primers_file(filename)
655
+ Primer3Record.parse_file(filename) do | primer3record |
656
+ current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
657
+ current_snp.add_record(primer3record)
658
+ #puts current_snp.inspect
659
+ end
660
+ end
661
+
662
+ def print_primers
663
+ str = ""
664
+ snp_hash.each do |k, snp|
665
+ str << snp.print_primers << "\n"
666
+ end
667
+ return str
668
+ end
669
+
670
+ end
671
+
672
+ end
673
+