bio-polyploid-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,164 @@
1
+ #Module to hold the information about the fasta file
2
+
3
+ module Bio::DB::Fasta
4
+ class Index
5
+ include Enumerable
6
+ attr_reader :entries
7
+
8
+ def initialize
9
+ @entries=[]
10
+ @entries_map = Hash.new
11
+ end
12
+
13
+ #This doesnt validate if you are adding the same entry twice. I may add
14
+ #a validation for that.
15
+ def << (entry)
16
+ @entries << entry
17
+ @entries_map[entry.id] = entry
18
+ end
19
+
20
+ def each(&block)
21
+ @entries.entries(&block)
22
+ end
23
+
24
+ def length
25
+ @entries.length
26
+ end
27
+
28
+ #Returns a new Index just with the specified range, as if it was an Array.
29
+ #The return object is of type Index.
30
+ def [](args)
31
+ tmp = @entries[args]
32
+ new_index = Index.new
33
+ tmp.each do | entry |
34
+ @new_index << entry
35
+ end
36
+ end
37
+
38
+ def region_for_entry(entry)
39
+ @entries_map[entry]
40
+ end
41
+ end
42
+
43
+ class Entry
44
+ attr_reader :id, :length
45
+
46
+ def initialize(id, length)
47
+ @id=id
48
+ @length=length.to_i
49
+ end
50
+
51
+ def get_full_region
52
+ reg = Region.new
53
+ reg.entry = id
54
+ reg.start = 0
55
+ reg.end = @length
56
+ reg.orientation = :forward
57
+ reg
58
+ end
59
+
60
+ def to_region
61
+ get_full_region
62
+ end
63
+ end
64
+
65
+ #Class to wrap a region of a chromosome
66
+ class Region
67
+ attr_accessor :entry, :start, :end, :orientation
68
+
69
+ def to_s
70
+ string = @entry + ":" + @start.to_s + "-" + @end.to_s
71
+ string
72
+ end
73
+
74
+ def self.parse_region(reg_str)
75
+ string = reg_str.delete("'")
76
+ fields_1 = string.split(":")
77
+ fields_2 = fields_1[1].split("-")
78
+ raise FastaDBException.new(), "Invalid region. #{string}" if fields_1.length != 2 || fields_2.length != 2
79
+
80
+ reg = Region.new
81
+ reg.entry = fields_1[0]
82
+ reg.start = fields_2[0].to_i
83
+ reg.end = fields_2[1].to_i
84
+
85
+ if reg.end < reg.start
86
+ reg.orientation = :reverse
87
+ else
88
+ reg.orientation = :forward
89
+ end
90
+ reg
91
+ end
92
+
93
+ def size
94
+ @end - @start
95
+ end
96
+
97
+ end
98
+
99
+ class FastaDBException < StandardError; end
100
+
101
+ #Class that holds the fasta file. It is used as a database. It heavily relies ond samtools.
102
+ class FastaFile
103
+
104
+ attr_reader :index, :fasta_path
105
+
106
+ def FastaFile.finalize(id)
107
+ #id.close()
108
+ #puts "Finalizing #{id} at #{Time.new}"
109
+ end
110
+
111
+ def initialize(fasta_filename)
112
+ @fasta_path = fasta_filename
113
+ raise FastaDBException.new(), "No path for the refernce fasta file. " if @fasta_path.nil?
114
+ @fasta_index = Bio::DB::SAM::Tools.fai_load(@fasta_path)
115
+ if @fasta_index.null? then
116
+ $stderr.puts "Generating index for: " + @fasta_path
117
+ Bio::DB::SAM::Tools.fai_build(@fasta_path)
118
+ @fasta_index = Bio::DB::SAM::Tools.fai_load(@fasta_path)
119
+ raise FastaDBException.new(), "Unable to generate fasta index for: " + @fasta_path if @fasta_index.nil? || @fasta_index.null?
120
+ end
121
+ ObjectSpace.define_finalizer(self, self.class.method(:finalize).to_proc)
122
+ end
123
+
124
+ def load_fai_entries()
125
+ return @index.length if @index
126
+ @index = Index.new
127
+ fai_file = @fasta_path + ".fai"
128
+ File.open(fai_file).each do | line |
129
+ fields = line.split("\t")
130
+ @index << Entry.new(fields[0], fields[1])
131
+
132
+ end
133
+ @index.length
134
+ end
135
+
136
+ def close()
137
+ Bio::DB::SAM::Tools.fai_destroy(@fasta_index) unless @fasta_index.nil? || @fasta_index.null?
138
+ @fasta_index = nil
139
+ end
140
+
141
+ #The region needs to have a method to_region or a method to_s that ha the format "chromosome:start-end" as in samtools
142
+ def fetch_sequence(region)
143
+
144
+ raise FastaDBException.new(), "No fasta index for " if @fasta_index.nil? || @fasta_index.null?
145
+ query = region.to_s
146
+ query = region.to_region.to_s if region.respond_to?(:to_region)
147
+
148
+ len = FFI::MemoryPointer.new :int
149
+ str = Bio::DB::SAM::Tools.fai_fetch(@fasta_index, query, len)
150
+ raise FastaDBException.new(), "Unable to get sequence for reference: " + query if str.nil?
151
+ reference = Bio::Sequence.auto(str)
152
+
153
+ #
154
+
155
+ if region.orientation == :reverse
156
+ #puts "reversing! #{reference.to_s}"
157
+ reference.reverse_complement!()
158
+ end
159
+ reference
160
+ end
161
+
162
+
163
+ end
164
+ end
@@ -0,0 +1,673 @@
1
+
2
+ module Bio::DB::Primer3
3
+ class Primer3Exception < RuntimeError
4
+ end
5
+
6
+ def self.run(opts={})
7
+ puts "Primer3.run running..."
8
+
9
+ f_in=opts[:in]
10
+ f_out=opts[:out]
11
+ primer_3_in = File.read(f_in)
12
+ status = systemu "primer3_core", 0=>primer_3_in, 1=>stdout='', 2=>stderr=''
13
+ # $stderr.puts cmdline
14
+ if status.exitstatus == 0
15
+ File.open(f_out, 'w') { |f| f.write(stdout) }
16
+ else
17
+ raise Primer3Exception.new(), "Error running primer3. Command line was 'primer3_core'\nPrimer3 STDERR was:\n#{stderr}"
18
+ end
19
+ end
20
+
21
+ class SNP
22
+
23
+ attr_accessor :gene, :original, :position, :snp, :chromosome, :line_1, :line_2
24
+ attr_accessor :primer3_line_1, :primer3_line_2, :template_length
25
+ attr_accessor :primers_line_1, :primers_line_2
26
+ attr_accessor :used_contigs
27
+ attr_accessor :snp_from
28
+ attr_accessor :regions
29
+ attr_accessor :primer3_errors
30
+
31
+ def line_1_name
32
+ "#{gene}:#{position}#{original}>#{snp} #{line_1}}"
33
+ end
34
+
35
+ def initialize
36
+ @primers_line_1 = SortedSet.new
37
+ @primers_line_2 = SortedSet.new
38
+ @reguibs = SortedSet.new
39
+ end
40
+
41
+ def line_2_name
42
+ "#{gene}:#{position}#{original}>#{snp} #{line_2}}"
43
+ end
44
+
45
+ def to_s
46
+ "#{gene}:#{original}#{position}#{snp}:#{snp_from.chromosome}"
47
+ end
48
+
49
+ def find_left_primer_temp(primer)
50
+ primers_line_1.each do |pr|
51
+ return pr.find_left_tm(primer) if pr.find_left_tm(primer)
52
+ end
53
+ primers_line_2.each do |pr|
54
+ return pr.find_left_tm(primer) if pr.find_left_tm(primer)
55
+ end
56
+ return "NA"
57
+ end
58
+
59
+
60
+ def find_primer_pair_first
61
+ primers_line_1.each do |pr|
62
+ primer = pr.left_primer_snp(self)
63
+ return pr if find_left_primer_temp(primer) != "NA"
64
+ end
65
+ nil
66
+ end
67
+
68
+ def find_primer_pair_second
69
+ primers_line_2.each do |pr|
70
+ primer = pr.left_primer_snp(self)
71
+ return pr if find_left_primer_temp(primer) != "NA"
72
+ end
73
+ nil
74
+ end
75
+
76
+
77
+ def print_primers
78
+ #TODO: Retrieve error messages
79
+ left_start = 0
80
+ left_end = 0
81
+ right_start = 0
82
+ right_end = 0
83
+ # exons = snp_from.exon_list.values
84
+
85
+ # puts "Exons: #{exon_list.size}"
86
+
87
+ # puts "It has the following exons: #{snp_in.exon_list.to_s}"
88
+ values = Array.new
89
+ #values << "#{gene},,#{template_length},"
90
+ values << gene
91
+ values << "#{original}#{position}#{snp}"
92
+ values << template_length
93
+ values << snp_from.chromosome
94
+ values << regions.size
95
+ values << regions.join("|")
96
+ if primer3_line_1 and primer3_line_2
97
+ values << primer3_line_1.polymorphism
98
+
99
+ #Block that searches both if both pairs have a TM
100
+ primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
101
+ primer_2_tm = find_left_primer_temp(primer_2)
102
+ primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
103
+ primer_1_tm = find_left_primer_temp(primer_1)
104
+ # $stderr.puts primer_1
105
+ # $stderr.puts primer_2
106
+ if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
107
+ values << primer3_line_1.left_primer
108
+ values << primer_2
109
+ values << primer3_line_1.right_primer
110
+ values << primer3_line_1.type.to_s
111
+ values << primer3_line_1.orientation.to_s
112
+ values << primer3_line_1.shortest_pair.left.tm
113
+ values << primer_2_tm
114
+ values << primer3_line_1.shortest_pair.right.tm
115
+ values << "first"
116
+ values << primer3_line_1.shortest_pair.product_size
117
+ elsif primer_1_tm != "NA"
118
+ values << primer_1
119
+ values << primer3_line_2.left_primer
120
+ values << primer3_line_2.right_primer
121
+ values << primer3_line_2.type.to_s
122
+ values << primer3_line_2.orientation.to_s
123
+ values << primer_1_tm
124
+ values << primer3_line_2.shortest_pair.left.tm
125
+ values << primer3_line_2.shortest_pair.right.tm
126
+ values << "second"
127
+ values << primer3_line_2.shortest_pair.product_size
128
+ else
129
+ first_candidate = find_primer_pair_first
130
+ second_candidate = find_primer_pair_second
131
+
132
+ if first_candidate
133
+ primer_2 = primer3_line_2.left_primer_with_coordinates(first_candidate.left_coordinates, first_candidate.orientation)
134
+ primer_2_tm = find_left_primer_temp(primer_2)
135
+ end
136
+ if second_candidate
137
+ primer_1 = primer3_line_1.left_primer_with_coordinates(second_candidate.left_coordinates, second_candidate.orientation)
138
+ primer_1_tm = find_left_primer_temp(primer_1)
139
+ end
140
+
141
+ if first_candidate and second_candidate and first_candidate < second_candidate
142
+ values << first_candidate.left_primer
143
+ values << primer_2
144
+ values << first_candidate.right_primer
145
+ values << first_candidate.type.to_s
146
+ values << first_candidate.orientation.to_s
147
+ values << first_candidate.shortest_pair.left.tm
148
+ values << primer_2_tm
149
+ values << first_candidate.shortest_pair.right.tm
150
+ values << "first"
151
+ values << first_candidate.shortest_pair.product_size
152
+ elsif second_candidate
153
+ values << primer_1
154
+ values << second_candidate.left_primer
155
+ values << second_candidate.right_primer
156
+ values << second_candidate.type.to_s
157
+ values << second_candidate.orientation.to_s
158
+ values << primer_1_tm
159
+ values << second_candidate.shortest_pair.left.tm
160
+ values << second_candidate.shortest_pair.right.tm
161
+ values << "second"
162
+ values << second_candidate.shortest_pair.product_size
163
+ elsif first_candidate
164
+ values << primer_2
165
+ values << first_candidate.left_primer
166
+ values << first_candidate.right_primer
167
+ values << first_candidate.type.to_s
168
+ values << first_candidate.orientation.to_s
169
+ values << primer_2_tm
170
+ values << first_candidate.shortest_pair.left.tm
171
+ values << first_candidate.shortest_pair.right.tm
172
+ values << "first"
173
+ values << first_candidate.shortest_pair.product_size
174
+ # else
175
+ # values << ""
176
+ end
177
+
178
+ end
179
+
180
+ elsif primer3_line_1
181
+ values << primer3_line_1.polymorphism
182
+ values << primer3_line_1.left_primer
183
+ values << primer3_line_1.left_primer_snp(self)
184
+ values << primer3_line_1.right_primer
185
+ values << primer3_line_1.type.to_s
186
+ values << primer3_line_1.orientation.to_s
187
+ values << primer3_line_1.shortest_pair.left.tm
188
+ values << "NA"
189
+ values << primer3_line_1.shortest_pair.right.tm
190
+
191
+ values << "first+"
192
+ values << primer3_line_1.shortest_pair.product_size
193
+ elsif primer3_line_2
194
+ values << primer3_line_2.polymorphism
195
+ values << primer3_line_2.left_primer_snp(self)
196
+ values << primer3_line_2.left_primer
197
+ values << primer3_line_2.right_primer
198
+ values << primer3_line_2.type.to_s
199
+ values << primer3_line_2.orientation.to_s
200
+ values << "NA"
201
+ values << primer3_line_2.shortest_pair.left.tm
202
+ values << primer3_line_2.shortest_pair.right.tm
203
+ values << "second+"
204
+ values << primer3_line_2.shortest_pair.product_size
205
+
206
+ end
207
+ values.join(",")
208
+ end
209
+
210
+ def self.parse(reg_str)
211
+ reg_str.chomp!
212
+ snp = SNP.new
213
+ snp.gene, snp.original, snp.position, snp.snp = reg_str.split(",")
214
+ snp.position = snp.position.to_i
215
+ snp.original.upcase!
216
+ snp.snp.upcase!
217
+ snp
218
+ end
219
+
220
+ def self.parse_file(filename)
221
+ File.open(filename) do | f |
222
+ f.each_line do | line |
223
+ snp = SNP.parse(line)
224
+ if snp.position > 0
225
+ yield snp
226
+ end
227
+ end
228
+ end
229
+ end
230
+
231
+
232
+ def add_record(primer3record)
233
+ @primer3_errors = Array.new unless @primer3_errors
234
+ @template_length = primer3record.sequence_template.size
235
+ if primer3record.primer_error != nil
236
+ primer3_errors << primer3record
237
+ return
238
+ end
239
+ case
240
+ when primer3record.line == @line_1
241
+ @line_1_template = primer3record.sequence_template
242
+ when primer3record.line == @line_2
243
+ @line_2_template = primer3record.sequence_template
244
+ else
245
+ raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
246
+ end
247
+
248
+ if primer3record.primer_left_num_returned.to_i > 0
249
+ case
250
+ when primer3record.line == @line_1
251
+ primers_line_1 << primer3record
252
+ @primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
253
+ when primer3record.line == @line_2
254
+ primers_line_1 << primer3record
255
+ @primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
256
+ else
257
+ raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
258
+ end
259
+ end
260
+ end
261
+ end
262
+
263
+ class Primer3Record
264
+ include Comparable
265
+ attr_accessor :properties, :polymorphism
266
+
267
+ def shortest_pair
268
+ return @shortest_pair if @shortest_pair
269
+ @shortest_pair = nil
270
+ @primerPairs.each do | primer |
271
+ @shortest_pair = primer if @shortest_pair == nil
272
+ @shortest_pair = primer if primer.size < @shortest_pair.size
273
+ end
274
+ @shortest_pair
275
+ end
276
+
277
+ def primer_error
278
+ return @properties[:primer_error] if @properties[:primer_error]
279
+ return nil
280
+ end
281
+
282
+ def method_missing(method_name, *args)
283
+ return @properties[method_name] if @properties[method_name]
284
+ $stderr.puts "Missing #{method_name}"
285
+ $stderr.puts @properties.inspect
286
+ raise NoMethodError.new()
287
+ end
288
+
289
+ def find_left_tm(primer)
290
+ last = size - 1
291
+ (0..last).each do | i |
292
+ seq_prop = "primer_left_#{i}_sequence".to_sym
293
+ # $stderr.puts seq_prop
294
+ temp_property = "primer_left_#{i}_tm".to_sym
295
+ # $stderr.puts "comparing #{@properties[seq_prop] } == #{primer}"
296
+ return @properties[temp_property] if @properties[seq_prop] == primer
297
+
298
+ end
299
+ return nil
300
+ end
301
+
302
+ def <=>(anOther)
303
+ ret = snp <=> anOther.snp
304
+ return ret if ret != 0
305
+
306
+
307
+ #Sorting by the types.
308
+ if type == :chromosome_specific
309
+ if anOther.type != :chromosome_specific
310
+ return -1
311
+ end
312
+ elsif type == :chromosome_semispecific
313
+ if anOther.type == :chromosome_specific
314
+ return 1
315
+ else anOther.type == :chromosome_nonspecific
316
+ return -1
317
+ end
318
+ elsif type == :chromosome_nonspecific
319
+ if anOther.type != :chromosome_nonspecific
320
+ return 1
321
+ end
322
+ end
323
+
324
+ #Sorting if it is in intron or not This will give priority
325
+ #to the cases when we know for sure the sequence from the line
326
+ #and reduce the chances of getting messed with a short indel
327
+ if self.exon?
328
+ unless anOther.exon?
329
+ return -1
330
+ end
331
+ else
332
+ if anOther.exon?
333
+ return 1
334
+ end
335
+ end
336
+
337
+ #Sorting for how long the product is, the shorter, the better
338
+ return product_length <=> anOther.product_length
339
+
340
+ end
341
+
342
+ def parse_coordinates(str)
343
+ coords = str.split(',')
344
+ coords[0] = coords[0].to_i
345
+ coords[1] = coords[1].to_i
346
+ coords
347
+ end
348
+
349
+
350
+ def left_coordinates
351
+ #@left_coordinates = parse_coordinates(self.primer_left_0) unless @left_coordinates
352
+ @left_coordinates = shortest_pair.left.coordinates
353
+ @left_coordinates
354
+ end
355
+
356
+ def right_coordinates
357
+ unless @right_coordinates
358
+ @right_coordinates = shortest_pair.right.coordinates
359
+ @right_coordinates[0] = @right_coordinates[0] - @right_coordinates[1] + 1
360
+ end
361
+ @right_coordinates
362
+ end
363
+
364
+ def left_primer
365
+ #@left_primer = self.sequence_template[left_coordinates[0],left_coordinates[1]] unless @left_primer
366
+ @left_primer = shortest_pair.left.sequence
367
+ @left_primer
368
+ end
369
+
370
+ def left_primer_snp(snp)
371
+ tmp_primer = String.new(left_primer)
372
+ if self.orientation == :forward
373
+ base_original = snp.original
374
+ base_snp = snp.snp
375
+ elsif self.orientation == :reverse
376
+ base_original = reverse_complement_string(snp.original )
377
+ base_snp = reverse_complement_string(snp.snp)
378
+ else
379
+ raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
380
+ end
381
+
382
+ # puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
383
+ if tmp_primer[-1] == base_original
384
+ tmp_primer[-1] = base_snp
385
+ elsif tmp_primer[-1] == base_snp
386
+ tmp_primer[-1] = base_original
387
+ else
388
+ raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
389
+ end
390
+ return tmp_primer
391
+ end
392
+
393
+ def left_primer_with_coordinates(coordinates, other_orientation)
394
+
395
+ seq = self.sequence_template
396
+
397
+ seq = reverse_complement_string(seq) if self.orientation != other_orientation
398
+
399
+ seq[coordinates[0],coordinates[1]]
400
+ end
401
+
402
+ def reverse_complement_string(sequenc_str)
403
+ complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
404
+ complement.reverse!
405
+ end
406
+
407
+ def right_primer_delete
408
+ @right_primer = self.sequence_template[right_coordinates[0],right_coordinates[1]] unless @right_primer
409
+ @right_primer = reverse_complement_string(@right_primer)
410
+ @right_primer
411
+ end
412
+
413
+ def right_primer
414
+ return shortest_pair.right.sequence
415
+ end
416
+
417
+ def product_length
418
+ return shortest_pair.size
419
+ end
420
+
421
+ def initialize
422
+ @properties = Hash.new
423
+ end
424
+
425
+ def snp
426
+ return @snp if @snp
427
+ parse_header
428
+ @snp
429
+ end
430
+
431
+ #CL3339Contig1:T509C AvocetS chromosome_specific exon 4D forward
432
+ def parse_header
433
+ #puts "Parsing header: '#{self.sequence_id}'"
434
+ @snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = self.sequence_id.split(" ")
435
+ @type = @type.to_sym
436
+ if @in
437
+ @in = @in.to_sym == :exon
438
+ else
439
+ @exon = false
440
+ end
441
+
442
+ if @polymorphism.to_sym == :homeologous
443
+ @homeologous = true
444
+ else
445
+ @homeologous = false
446
+ end
447
+ @parsed = true
448
+ @orientation = @orientation.to_sym
449
+ end
450
+
451
+ def orientation
452
+ return @orientation if @parsed
453
+ parse_header
454
+ @orientation
455
+ end
456
+
457
+ def chromosome
458
+ return @chromosome if @parsed
459
+ parse_header
460
+ @chromosome
461
+ end
462
+
463
+ def homeologous?
464
+ return @homeologous if @parsed
465
+ parse_header
466
+ @homeologous
467
+ end
468
+
469
+ def type
470
+ return @type if @parsed
471
+ parse_header
472
+ @type
473
+ end
474
+
475
+ def exon?
476
+ return @exon if @parsed
477
+ parse_header
478
+ @exon
479
+ end
480
+
481
+ def line
482
+ return @line if @parsed
483
+ parse_header
484
+ @line
485
+ end
486
+
487
+ def size
488
+ @properties[:primer_pair_num_returned].to_i
489
+ end
490
+
491
+ def parse_blocks
492
+ total_blocks = size - 1
493
+ @primerPairs = Array.new
494
+ for i in 0..total_blocks
495
+ @primerPairs << PrimerPair.new(self, i)
496
+ end
497
+
498
+ end
499
+
500
+ def self.parse_file(filename)
501
+ File.open(filename) do | f |
502
+ record = Primer3Record.new
503
+ f.each_line do | line |
504
+ line.chomp!
505
+ if line == "="
506
+
507
+ record.parse_blocks
508
+ yield record
509
+ record = Primer3Record.new
510
+ else
511
+ tokens = line.split("=")
512
+ i = 0
513
+ reg = ""
514
+ #TODO: Look if there is a join function or something similar to go around this...
515
+ tokens.each do |tok|
516
+ if i > 0
517
+ if i > 1
518
+ reg << "="
519
+ end
520
+ reg << tok
521
+ end
522
+ i+=1
523
+ end
524
+ record.properties[tokens[0].downcase.to_sym] = reg
525
+ end
526
+ end
527
+ end
528
+ end
529
+ end
530
+
531
+
532
+ class Primer
533
+ attr_accessor :pair
534
+ def initialize
535
+ @values = Hash.new
536
+ end
537
+
538
+ def method_missing(m, *args, &block)
539
+
540
+ return @values[m.to_s] if @values[m.to_s] != nil
541
+ raise NoMethodError.new(), "There's no method called #{m}, available: #{@values.keys.to_s}."
542
+ end
543
+
544
+ def set_value(key, value)
545
+ @values[key] = value
546
+ end
547
+
548
+
549
+
550
+ end
551
+
552
+ class PrimerPair
553
+
554
+ attr_reader :record
555
+ attr_reader :left, :right
556
+
557
+ def parse_coordinates(str)
558
+ coords = str.split(',')
559
+ coords[0] = coords[0].to_i
560
+ coords[1] = coords[1].to_i
561
+ coords
562
+ end
563
+
564
+ def size
565
+ return product_size.to_i
566
+ end
567
+
568
+ def initialize(record, index)
569
+ raise Primer3Exception.new(), "Index #{index} is greater than the number of records" unless index < record.size
570
+ @record = record
571
+ @left = Primer.new
572
+ @right = Primer.new
573
+ @values = Hash.new
574
+
575
+
576
+ @left.set_value("added", false)
577
+ @right.set_value("added", false)
578
+ @left.pair = self
579
+ @right.pair = self
580
+ index_s = index.to_s
581
+ record.properties.each do |key, value|
582
+ tokens = key.to_s.split("_")
583
+ if tokens.size > 2 and tokens[2] == index_s
584
+ primer = nil
585
+ primer = @right if tokens[1] == "right"
586
+ primer = @left if tokens[1] == "left"
587
+ if primer != nil
588
+ primer.set_value("added", true)
589
+ if tokens.size == 3
590
+ primer.set_value("coordinates", parse_coordinates(value) )
591
+ else
592
+
593
+ to_add = value
594
+ to_add = value.to_f unless tokens[3]=="sequence"
595
+ n_key = tokens[3..6].join("_")
596
+ primer.set_value(n_key, to_add)
597
+ end
598
+ else
599
+ n_key = tokens[3..6].join("_")
600
+ @values[n_key] = value
601
+ end
602
+
603
+ end
604
+ end
605
+
606
+ raise Primer3Exception.new(), "The pair is not complete (l:#{left.added}, r:#{right.added})" if @left.added == false or @right.added == false
607
+
608
+ end
609
+
610
+ def method_missing(m, *args, &block)
611
+
612
+ return @values[m.to_s] if @values[m.to_s]
613
+ raise NoMethodError.new(), "There's no method called #{m}. Available methods: #{@values.keys.to_s}"
614
+ end
615
+ end
616
+
617
+ class KASPContainer
618
+
619
+ attr_accessor :line_1, :line_2
620
+ attr_accessor :snp_hash
621
+
622
+
623
+ def add_snp_file(filename)
624
+ @snp_hash=Hash.new unless @snp_hash
625
+ SNP.parse_file(filename) do |snp|
626
+ @snp_hash[snp.to_s] = snp
627
+ snp.line_1 = @line_1
628
+ snp.line_2 = @line_2
629
+ end
630
+ end
631
+
632
+ def add_snp(snp_in)
633
+ @snp_hash=Hash.new unless @snp_hash
634
+ snp = SNP.new
635
+ snp.gene = snp_in.gene
636
+ snp.original = snp_in.original
637
+
638
+ snp.position = snp_in.position
639
+ snp.snp = snp_in.snp
640
+
641
+ # snp.original.upcase!
642
+ # snp.snp.upcase!
643
+ snp.line_1 = @line_1
644
+ snp.line_2 = @line_2
645
+ snp.snp_from = snp_in
646
+ #puts "Kasp container, adding #{snp.to_s} #{snp.class} #{snp_in.class}"
647
+ #puts "#{snp.regions}"
648
+ snp.regions = snp_in.exon_list.values.collect { |x| x.target_region.to_s }
649
+ #puts "#{snp.regions}"
650
+ @snp_hash[snp.to_s] = snp
651
+ snp
652
+ end
653
+
654
+ def add_primers_file(filename)
655
+ Primer3Record.parse_file(filename) do | primer3record |
656
+ current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
657
+ current_snp.add_record(primer3record)
658
+ #puts current_snp.inspect
659
+ end
660
+ end
661
+
662
+ def print_primers
663
+ str = ""
664
+ snp_hash.each do |k, snp|
665
+ str << snp.print_primers << "\n"
666
+ end
667
+ return str
668
+ end
669
+
670
+ end
671
+
672
+ end
673
+