bio-polymarker 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
@@ -0,0 +1,820 @@
1
+ require 'pp'
2
+ module Bio::DB::Primer3
3
+ class Primer3Exception < RuntimeError
4
+ end
5
+
6
+ def self.read_primer_preferences(file, defaults)
7
+
8
+ File.open(file) do |f|
9
+ f.each_line do | line |
10
+ line.chomp!
11
+ arr = line.split("=")
12
+ defaults[arr[0].downcase.to_sym] = arr[1];
13
+ end
14
+ end
15
+
16
+ return defaults
17
+ end
18
+
19
+ def self.prepare_input_file(file, opts2={})
20
+ opts = {
21
+ :primer_product_size_range => "50-150" ,
22
+ :primer_max_size => 25 ,
23
+ :primer_lib_ambiguity_codes_consensus => 1,
24
+ :primer_liberal_base => 1,
25
+ :primer_num_return => 5,
26
+ :primer_explain_flag => 1,
27
+ :primer_thermodynamic_parameters_path => File.expand_path(File.dirname(__FILE__) + '../../../../conf/primer3_config/') + '/'
28
+ }.merge(opts2)
29
+
30
+ opts.each do |key,value|
31
+ file.puts "#{key.to_s.upcase}=#{value}\n"
32
+ end
33
+ end
34
+
35
+ def self.run(opts={})
36
+ puts "Primer3.run running..."
37
+ timeout = 600
38
+ f_in=opts[:in]
39
+ f_out=opts[:out]
40
+ timeout = opts[:timeout] if opts[:timeout]
41
+ opts.delete(:in)
42
+ opts.delete(:out)
43
+ primer_3_in = File.read(f_in)
44
+ status = systemu "primer3_core", 0=>primer_3_in, 1=>stdout='', 2=>stderr='' do |cid|
45
+ sleep timeout
46
+ Process.kill 9, cid
47
+ end
48
+ # $stderr.puts cmdline
49
+ if status.exitstatus == 0
50
+ File.open(f_out, 'w') { |f| f.write(stdout) }
51
+ else
52
+ raise Primer3Exception.new(), "Error running primer3. Command line was 'primer3_core'\nPrimer3 STDERR was:\n#{stderr}"
53
+ end
54
+ end
55
+
56
+ class SNP
57
+
58
+ attr_accessor :gene, :original, :position, :snp, :chromosome, :line_1, :line_2
59
+ attr_accessor :primer3_line_1, :primer3_line_2, :template_length
60
+ attr_accessor :primers_line_1, :primers_line_2
61
+ attr_accessor :used_contigs
62
+ attr_accessor :snp_from
63
+ attr_accessor :regions
64
+ attr_accessor :primer3_errors
65
+ attr_accessor :repetitive
66
+ attr_accessor :hit_count
67
+ attr_accessor :snp_type
68
+
69
+ def line_1_name
70
+ "#{gene}:#{position}#{original}>#{snp} #{line_1}}"
71
+ end
72
+
73
+ def initialize
74
+ @primers_line_1 = SortedSet.new
75
+ @primers_line_2 = SortedSet.new
76
+ @regions = SortedSet.new
77
+ @primer3_errors = Set.new
78
+ end
79
+
80
+ def line_2_name
81
+ "#{gene}:#{position}#{original}>#{snp} #{line_2}}"
82
+ end
83
+
84
+ def to_s
85
+ "#{gene}:#{original}#{position}#{snp}:#{snp_from.chromosome}"
86
+ end
87
+
88
+ def find_left_primer_temp(primer)
89
+ primers_line_1.each do |pr|
90
+ return pr.find_left_tm(primer) if pr.find_left_tm(primer)
91
+ end
92
+ primers_line_2.each do |pr|
93
+ return pr.find_left_tm(primer) if pr.find_left_tm(primer)
94
+ end
95
+ return "NA"
96
+ end
97
+
98
+
99
+ def find_primer_pair_first
100
+ primers_line_1.each do |pr|
101
+ primer = pr.left_primer_snp(self)
102
+ return pr if find_left_primer_temp(primer) != "NA"
103
+ end
104
+ nil
105
+ end
106
+
107
+ def find_primer_pair_second
108
+ primers_line_2.each do |pr|
109
+ primer = pr.left_primer_snp(self)
110
+ return pr if find_left_primer_temp(primer) != "NA"
111
+ end
112
+ nil
113
+ end
114
+
115
+ def values
116
+ return @values if @values
117
+ left_start = 0
118
+ left_end = 0
119
+ right_start = 0
120
+ right_end = 0
121
+ total_columns_before_messages=17
122
+ #puts "Values in primer3"
123
+ #puts snp_from.inspect
124
+ @values = Array.new
125
+ #@values << "#{gene},,#{template_length},"
126
+ @values << gene
127
+ @values << "#{original}#{position}#{snp}"
128
+ @values << template_length
129
+ @values << snp_from.chromosome
130
+ @values << regions.size
131
+ @values << regions.join("|")
132
+ @values << snp_type
133
+ if primer3_line_1 and primer3_line_2
134
+ #Block that searches both if both pairs have a TM
135
+ primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
136
+ primer_1_tm = find_left_primer_temp(primer_1)
137
+
138
+ primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
139
+ primer_2_tm = find_left_primer_temp(primer_2)
140
+
141
+ if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
142
+ @values << primer3_line_1.left_primer
143
+ @values << primer_2
144
+ @values << primer3_line_1.right_primer
145
+ @values << primer3_line_1.type.to_s
146
+ @values << primer3_line_1.orientation.to_s
147
+ @values << primer3_line_1.best_pair.left.tm
148
+ @values << primer_2_tm
149
+ @values << primer3_line_1.best_pair.right.tm
150
+ @values << "first"
151
+ @values << primer3_line_1.best_pair.product_size
152
+ elsif primer_1_tm != "NA"
153
+ @values << primer_1
154
+ @values << primer3_line_2.left_primer
155
+ @values << primer3_line_2.right_primer
156
+ @values << primer3_line_2.type.to_s
157
+ @values << primer3_line_2.orientation.to_s
158
+ @values << primer_1_tm
159
+ @values << primer3_line_2.best_pair.left.tm
160
+ @values << primer3_line_2.best_pair.right.tm
161
+ @values << "second"
162
+ @values << primer3_line_2.best_pair.product_size
163
+ else
164
+
165
+ first_candidate = find_primer_pair_first
166
+ second_candidate = find_primer_pair_second
167
+
168
+ if first_candidate
169
+ primer_2 = primer3_line_2.left_primer_with_coordinates(first_candidate.left_coordinates, first_candidate.orientation)
170
+ primer_2_tm = find_left_primer_temp(primer_2)
171
+ end
172
+ if second_candidate
173
+ #puts "input to search #{first_candidate.left_coordinates}"
174
+ primer_1 = primer3_line_1.left_primer_with_coordinates(second_candidate.left_coordinates, second_candidate.orientation)
175
+ primer_1_tm = find_left_primer_temp(primer_1)
176
+ #puts "In the other funky if #{primer_2}"
177
+ end
178
+
179
+ if first_candidate and second_candidate and first_candidate < second_candidate
180
+ #puts "A"
181
+ @values << first_candidate.left_primer
182
+ @values << primer_2
183
+ @values << first_candidate.right_primer
184
+ @values << first_candidate.type.to_s
185
+ @values << first_candidate.orientation.to_s
186
+ @values << first_candidate.best_pair.left.tm
187
+ @values << primer_2_tm
188
+ @values << first_candidate.best_pair.right.tm
189
+ @values << "first-"
190
+ @values << first_candidate.best_pair.product_size
191
+ elsif second_candidate
192
+ #puts "B"
193
+ @values << primer_1
194
+ @values << second_candidate.left_primer
195
+ @values << second_candidate.right_primer
196
+ @values << second_candidate.type.to_s
197
+ @values << second_candidate.orientation.to_s
198
+ @values << primer_1_tm
199
+ @values << second_candidate.best_pair.left.tm
200
+ @values << second_candidate.best_pair.right.tm
201
+ @values << "second-"
202
+ @values << second_candidate.best_pair.product_size
203
+ elsif first_candidate
204
+ #puts "C"
205
+ @values << first_candidate.left_primer
206
+ @values << primer_2
207
+ @values << first_candidate.right_primer
208
+ @values << first_candidate.type.to_s
209
+ @values << first_candidate.orientation.to_s
210
+ @values << primer_2_tm
211
+ @values << first_candidate.best_pair.left.tm
212
+ @values << first_candidate.best_pair.right.tm
213
+ @values << "first/"
214
+ @values << first_candidate.best_pair.product_size
215
+ end
216
+ end
217
+
218
+ elsif primer3_line_1
219
+ @values << primer3_line_1.left_primer
220
+ @values << primer3_line_1.left_primer_snp(self)
221
+ @values << primer3_line_1.right_primer
222
+ @values << primer3_line_1.type.to_s
223
+ @values << primer3_line_1.orientation.to_s
224
+ @values << primer3_line_1.best_pair.left.tm
225
+ @values << "NA"
226
+ @values << primer3_line_1.best_pair.right.tm
227
+
228
+ @values << "first+"
229
+ @values << primer3_line_1.best_pair.product_size
230
+ elsif primer3_line_2
231
+ @values << primer3_line_2.left_primer_snp(self)
232
+ @values << primer3_line_2.left_primer
233
+ @values << primer3_line_2.right_primer
234
+ @values << primer3_line_2.type.to_s
235
+ @values << primer3_line_2.orientation.to_s
236
+ @values << "NA"
237
+ @values << primer3_line_2.best_pair.left.tm
238
+ @values << primer3_line_2.best_pair.right.tm
239
+ @values << "second+"
240
+ @values << primer3_line_2.best_pair.product_size
241
+
242
+ end
243
+ if @values.size < total_columns_before_messages
244
+ @values[total_columns_before_messages] = primer3_errors.to_a.join("|")
245
+ else
246
+ @values << nil
247
+ end
248
+ return @values
249
+ end
250
+
251
+ def print_primers
252
+ to_print = values.dup
253
+ to_print << @repetitive
254
+ to_print << @hit_count
255
+ to_print.join(",")
256
+ end
257
+
258
+ def found_primers?
259
+ return self.values[7] && self.values[7] != nil
260
+ end
261
+
262
+ def first_primer
263
+ return self.values[7] if self.values[7] && self.values[7] != nil
264
+ return ""
265
+ end
266
+
267
+ def second_primer
268
+ return self.values[8] if self.values[8] && self.values[8] != nil
269
+ return ""
270
+ end
271
+
272
+ def common_primer
273
+ return self.values[9] if self.values[9] && self.values[9] != nil
274
+ return ""
275
+ end
276
+
277
+ def product_size
278
+ return self.values[16].to_i if self.values[16]&& self.values[16] != nil
279
+ return 0
280
+ end
281
+
282
+ def orientation
283
+ return self.values[11] if self.values[11]&& self.values[11] != nil
284
+ return 'unknown'
285
+ end
286
+
287
+
288
+ def first_product
289
+ left = first_primer
290
+ right = common_primer
291
+ nlen = product_size - left.size - right.size
292
+ product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
293
+ #puts "orientation: #{orientation}"
294
+
295
+ product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
296
+
297
+ product
298
+ end
299
+
300
+ def second_product
301
+ left = second_primer
302
+ right = common_primer
303
+ nlen = product_size - left.size - right.size
304
+ product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
305
+ product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
306
+
307
+
308
+ product
309
+ end
310
+
311
+
312
+ def realigned_primers_fasta
313
+ ret_str = ""
314
+ realigned_primers.each_pair do |name, seq|
315
+ ret_str << ">#{self.to_s}-#{name}\n#{seq}\n"
316
+ end
317
+ ret_str
318
+ end
319
+
320
+
321
+ def realigned_primers
322
+
323
+ return @realigned_primers if @realigned_primers
324
+ sequences_to_align = Hash.new
325
+ sequences_to_align["first_product"] = first_product
326
+ sequences_to_align["second_product"] = second_product
327
+ sequences_to_align.merge!(snp_from.surrounding_exon_sequences)
328
+ if sequences_to_align.size == 1
329
+ @realigned_primers = sequences_to_align
330
+ return @realigned_primers
331
+ end
332
+ options = ['--maxiterate', '1000', '--localpair', '--quiet']
333
+ mafft = Bio::MAFFT.new( "mafft" , options)
334
+ #puts "Before MAFT:#{sequences_to_align.inspect}"
335
+ report = mafft.query_align(sequences_to_align)
336
+ @realigned_primers = report.alignment
337
+ #puts "MAFFT: #{report.alignment.inspect}"
338
+ @realigned_primers
339
+ end
340
+
341
+ def self.parse(reg_str)
342
+ reg_str.chomp!
343
+ snp = SNP.new
344
+ snp.gene, snp.original, snp.position, snp.snp = reg_str.split(",")
345
+ snp.position = snp.position.to_i
346
+ snp.original.upcase!
347
+ snp.snp.upcase!
348
+ snp
349
+ end
350
+
351
+ def self.parse_file(filename)
352
+ File.open(filename) do | f |
353
+ f.each_line do | line |
354
+ snp = SNP.parse(line)
355
+ if snp.position > 0
356
+ yield snp
357
+ end
358
+ end
359
+ end
360
+ end
361
+
362
+
363
+ def add_record(primer3record)
364
+ @primer3_errors = Set.new unless @primer3_errors
365
+ @template_length = primer3record.sequence_template.size
366
+ if primer3record.primer_error != nil
367
+ primer3_errors << primer3record.primer_error
368
+ return
369
+ end
370
+ case
371
+
372
+ when primer3record.line == @line_1
373
+
374
+ @line_1_template = primer3record.sequence_template
375
+
376
+ when primer3record.line == @line_2
377
+ @line_2_template = primer3record.sequence_template
378
+ else
379
+ raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
380
+ end
381
+
382
+ if primer3record.primer_left_num_returned.to_i > 0
383
+ case
384
+ when primer3record.line == @line_1
385
+ primers_line_1 << primer3record
386
+ #puts primer3record.inspect
387
+ @primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
388
+ when primer3record.line == @line_2
389
+ primers_line_2 << primer3record
390
+ @primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
391
+ else
392
+ raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
393
+ end
394
+ else
395
+ primer3_errors << "#{primer3record.line}(#{primer3record.orientation}):#{primer3record.primer_left_explain.gsub!(',',';')}"
396
+ primer3_errors << "common(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_right_explain.gsub!(',',';')}"
397
+ primer3_errors << "pair(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_pair_explain.gsub!(',',';')}"
398
+ end
399
+ end
400
+ end
401
+
402
+ class Primer3Record
403
+ include Comparable
404
+ attr_accessor :properties, :polymorphism
405
+ attr_accessor :scores
406
+
407
+
408
+ def best_pair
409
+ return @best_pair if @best_pair
410
+ @best_pair = nil
411
+ @primerPairs.each do | primer |
412
+ @best_pair = primer if @best_pair.nil?
413
+ @best_pair = primer if primer.size < @best_pair.size
414
+ end
415
+ #@best_pair = @primerPairs.min
416
+ @best_pair
417
+ end
418
+
419
+ def primer_error
420
+ return @properties[:primer_error] if @properties[:primer_error]
421
+ return nil
422
+ end
423
+
424
+ def method_missing(method_name, *args)
425
+ return @properties[method_name] if @properties[method_name]
426
+ $stderr.puts "Missing #{method_name}"
427
+ $stderr.puts @properties.inspect
428
+ return "" #if a property is missing, return blank.
429
+ raise NoMethodError.new()
430
+ end
431
+
432
+ def find_left_tm(primer)
433
+ last = size - 1
434
+ (0..last).each do | i |
435
+ seq_prop = "primer_left_#{i}_sequence".to_sym
436
+ # $stderr.puts seq_prop
437
+ temp_property = "primer_left_#{i}_tm".to_sym
438
+ # $stderr.puts "comparing #{@properties[seq_prop] } == #{primer}"
439
+ return @properties[temp_property] if @properties[seq_prop] == primer
440
+
441
+ end
442
+ return nil
443
+ end
444
+
445
+ def score
446
+ ret = 0
447
+ ret += @scores[type]
448
+ ret += @scores[:exon] if exon?
449
+ ret -= product_length
450
+ ret
451
+ end
452
+
453
+ def <=>(anOther)
454
+ return anOther.score <=> score
455
+ end
456
+
457
+ def parse_coordinates(str)
458
+ coords = str.split(',')
459
+ coords[0] = coords[0].to_i
460
+ coords[1] = coords[1].to_i
461
+ coords
462
+ end
463
+
464
+
465
+ def left_coordinates
466
+ #@left_coordinates = parse_coordinates(self.primer_left_0) unless @left_coordinates
467
+ @left_coordinates = best_pair.left.coordinates
468
+ @left_coordinates
469
+ end
470
+
471
+ def right_coordinates
472
+ unless @right_coordinates
473
+ @right_coordinates = best_pair.right.coordinates
474
+ @right_coordinates[0] = @right_coordinates[0] - @right_coordinates[1] + 1
475
+ end
476
+ @right_coordinates
477
+ end
478
+
479
+ def left_primer
480
+ #@left_primer = self.sequence_template[left_coordinates[0],left_coordinates[1]] unless @left_primer
481
+ @left_primer = best_pair.left.sequence
482
+ @left_primer
483
+ end
484
+
485
+ def left_primer_snp(snp)
486
+ tmp_primer = String.new(left_primer)
487
+ if self.orientation == :forward
488
+ base_original = snp.original
489
+ base_snp = snp.snp
490
+ elsif self.orientation == :reverse
491
+ #puts self.inspect
492
+ base_original =Primer3Record.reverse_complement_string(snp.original )
493
+ base_snp = Primer3Record.reverse_complement_string(snp.snp)
494
+ else
495
+ raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
496
+ end
497
+
498
+ #puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
499
+ if tmp_primer[-1] == base_original
500
+ tmp_primer[-1] = base_snp
501
+ elsif tmp_primer[-1] == base_snp
502
+ tmp_primer[-1] = base_original
503
+ else
504
+ raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
505
+ end
506
+ #puts "tmp_primer: #{tmp_primer}"
507
+ return tmp_primer
508
+ end
509
+
510
+ def left_primer_with_coordinates(coordinates, other_orientation)
511
+
512
+ seq = self.sequence_template
513
+ seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
514
+ seq[coordinates[0],coordinates[1]]
515
+ end
516
+
517
+ def self.reverse_complement_string(sequenc_str)
518
+ complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
519
+ complement.reverse!
520
+ end
521
+
522
+ def right_primer_delete
523
+ @right_primer = self.sequence_template[right_coordinates[0],right_coordinates[1]] unless @right_primer
524
+ @right_primer = Primer3Record.reverse_complement_string(@right_primer)
525
+ @right_primer
526
+ end
527
+
528
+ def right_primer
529
+ return best_pair.right.sequence
530
+ end
531
+
532
+ def product_length
533
+ return best_pair.size
534
+ end
535
+
536
+ def initialize
537
+ @properties = Hash.new
538
+ @scores = Hash.new
539
+ @scores[:chromosome_specific] = 1000
540
+ @scores[:chromosome_semispecific] = 100
541
+ @scores[:chromosome_nonspecific] = 0
542
+ @scores[:exon] = 50
543
+
544
+ end
545
+
546
+ def snp
547
+ return @snp if @snp
548
+ parse_header
549
+ @snp
550
+ end
551
+
552
+ #CL3339Contig1:T509C AvocetS chromosome_specific exon 4D forward
553
+ def parse_header
554
+ #puts "Parsing header: '#{self.sequence_id}'"
555
+ arr = self.sequence_id.split(" ")
556
+
557
+ #if arr.size == 7 This validation can be useful to get the best primers regardless of the chromosome,
558
+ #But it is commented as it will require further testing.
559
+ @snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = arr
560
+ #else
561
+ # if arr.size == 6
562
+ # @snp, @line, @type, @in, @polymorphism, @orientation = arr
563
+ # @chromosome = ""
564
+ # end
565
+ #end
566
+
567
+ @type = @type.to_sym
568
+ if @in
569
+ @in = @in.to_sym == :exon
570
+ else
571
+ @exon = false
572
+ end
573
+
574
+ if @polymorphism.to_sym == :homoeologous
575
+ @homoeologous = true
576
+ else
577
+ @homoeologous = false
578
+ end
579
+ @parsed = true
580
+ @orientation = @orientation.to_sym
581
+ end
582
+
583
+ def orientation
584
+ return @orientation if @parsed
585
+ parse_header
586
+ @orientation
587
+ end
588
+
589
+ def chromosome
590
+ return @chromosome if @parsed
591
+ parse_header
592
+ @chromosome
593
+ end
594
+
595
+ def homoeologous?
596
+ return @homoeologous if @parsed
597
+ parse_header
598
+ @homoeologous
599
+ end
600
+
601
+ def type
602
+ return @type if @parsed
603
+ parse_header
604
+ @type
605
+ end
606
+
607
+ def exon?
608
+ return @exon if @parsed
609
+ parse_header
610
+ @exon
611
+ end
612
+
613
+ def line
614
+ return @line if @parsed
615
+ parse_header
616
+ @line
617
+ end
618
+
619
+ def size
620
+ @properties[:primer_pair_num_returned].to_i
621
+ end
622
+
623
+ def parse_blocks
624
+ total_blocks = size - 1
625
+ @primerPairs = Array.new
626
+ for i in 0..total_blocks
627
+ @primerPairs << PrimerPair.new(self, i)
628
+ end
629
+
630
+ end
631
+
632
+ def self.parse_file(filename, scores: nil)
633
+ File.open(filename) do | f |
634
+ record = Primer3Record.new
635
+ record.scores = scores if scores
636
+ f.each_line do | line |
637
+ line.chomp!
638
+ if line == "="
639
+
640
+ record.parse_blocks
641
+ yield record
642
+ record = Primer3Record.new
643
+ record.scores = scores if scores
644
+ else
645
+ tokens = line.split("=")
646
+ i = 0
647
+ reg = ""
648
+ #TODO: Look if there is a join function or something similar to go around this...
649
+ tokens.each do |tok|
650
+ if i > 0
651
+ if i > 1
652
+ reg << "="
653
+ end
654
+ reg << tok
655
+ end
656
+ i+=1
657
+ end
658
+ record.properties[tokens[0].downcase.to_sym] = reg
659
+ end
660
+ end
661
+ end
662
+ end
663
+ end
664
+
665
+
666
+ class Primer
667
+ attr_accessor :pair
668
+ def initialize
669
+ @values = Hash.new
670
+ end
671
+
672
+ def method_missing(m, *args, &block)
673
+ return @values[m.to_s] if @values[m.to_s] != nil
674
+ raise NoMethodError.new(), "There's no method called #{m}, available: #{@values.keys.to_s}."
675
+ end
676
+
677
+ def set_value(key, value)
678
+ @values[key] = value
679
+ end
680
+
681
+
682
+
683
+ end
684
+
685
+ class PrimerPair
686
+ include Comparable
687
+ attr_reader :record
688
+ attr_reader :left, :right
689
+
690
+ def parse_coordinates(str)
691
+ coords = str.split(',')
692
+ coords[0] = coords[0].to_i
693
+ coords[1] = coords[1].to_i
694
+ coords
695
+ end
696
+
697
+ def size
698
+ return product_size.to_i
699
+ end
700
+
701
+ def <=>(anOther)
702
+ penalty.to_f <=> anOther.penalty.to_f
703
+ end
704
+
705
+ def initialize(record, index)
706
+ raise Primer3Exception.new(), "Index #{index} is greater than the number of records" unless index < record.size
707
+ @record = record
708
+ @left = Primer.new
709
+ @right = Primer.new
710
+ @values = Hash.new
711
+
712
+
713
+ @left.set_value("added", false)
714
+ @right.set_value("added", false)
715
+ @left.pair = self
716
+ @right.pair = self
717
+ index_s = index.to_s
718
+ record.properties.each do |key, value|
719
+ tokens = key.to_s.split("_")
720
+ if tokens.size > 2 and tokens[2] == index_s
721
+ primer = nil
722
+ primer = @right if tokens[1] == "right"
723
+ primer = @left if tokens[1] == "left"
724
+ if primer != nil
725
+ primer.set_value("added", true)
726
+ if tokens.size == 3
727
+ primer.set_value("coordinates", parse_coordinates(value) )
728
+ else
729
+
730
+ to_add = value
731
+ to_add = value.to_f unless tokens[3]=="sequence"
732
+ n_key = tokens[3..6].join("_")
733
+ primer.set_value(n_key, to_add)
734
+ end
735
+ else
736
+ n_key = tokens[3..6].join("_")
737
+ @values[n_key] = value
738
+ end
739
+
740
+ end
741
+ end
742
+
743
+ raise Primer3Exception.new(), "The pair is not complete (l:#{left.added}, r:#{right.added})" if @left.added == false or @right.added == false
744
+
745
+ end
746
+
747
+ def method_missing(m, *args, &block)
748
+
749
+ return @values[m.to_s] if @values[m.to_s]
750
+ raise NoMethodError.new(), "There's no method called #{m}. Available methods: #{@values.keys.to_s}"
751
+ end
752
+ end
753
+
754
+ class KASPContainer
755
+
756
+ attr_accessor :line_1, :line_2
757
+ attr_accessor :snp_hash
758
+ attr_accessor :scores
759
+
760
+ def add_snp_file(filename)
761
+ @snp_hash=Hash.new unless @snp_hash
762
+ SNP.parse_file(filename) do |snp|
763
+ @snp_hash[snp.to_s] = snp
764
+ snp.line_1 = @line_1
765
+ snp.line_2 = @line_2
766
+ end
767
+ end
768
+
769
+ def add_snp(snp_in)
770
+ #TODO: Here we need to also copy the errors that will be printed.
771
+ @snp_hash=Hash.new unless @snp_hash
772
+ snp = SNP.new
773
+ snp.gene = snp_in.gene
774
+ snp.original = snp_in.original
775
+ snp.primer3_errors = Set.new snp_in.errors
776
+ snp.position = snp_in.position
777
+ snp.snp = snp_in.snp
778
+ snp.repetitive = snp_in.repetitive
779
+ #puts snp_in.inspect
780
+ snp.hit_count = snp_in.hit_count
781
+ snp.snp_type = snp_in.snp_type
782
+ snp.line_1 = @line_1
783
+ snp.line_2 = @line_2
784
+ snp.snp_from = snp_in
785
+ snp.regions = snp_in.exon_list.values.collect { |x| x.collect {|y| y.target_region.to_s }}
786
+ @snp_hash[snp.to_s] = snp
787
+ snp
788
+ end
789
+
790
+ def add_primers_file(filename)
791
+ #primer3record.scores = @scores if @scores
792
+ Primer3Record.parse_file(filename, scores: @scores) do | primer3record |
793
+ current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
794
+ current_snp.add_record(primer3record)
795
+ end
796
+ end
797
+
798
+ def print_primers
799
+ str = ""
800
+ snp_hash.each do |k, snp|
801
+ str << snp.print_primers << "\n"
802
+ end
803
+ return str
804
+ end
805
+
806
+ def print_primers_with_tails(tail_a: "GAAGGTCGGAGTCAACGGATT", tail_b: "GAAGGTGACCAAGTTCATGCT")
807
+ str = ""
808
+ snp_hash.each do |k, snp|
809
+ if snp.found_primers?
810
+ str << snp.gene << snp.original << "_1st\t" << tail_a << snp.first_primer << "\n"
811
+ str << snp.gene << snp.snp << "_2nd\t" << tail_b << snp.second_primer << "\n"
812
+ str << snp.gene << "_common\t" << snp.common_primer << "\n"
813
+ end
814
+ end
815
+ return str
816
+ end
817
+
818
+ end
819
+ end
820
+