bio-polymarker 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
@@ -0,0 +1,820 @@
1
+ require 'pp'
2
+ module Bio::DB::Primer3
3
+ class Primer3Exception < RuntimeError
4
+ end
5
+
6
+ def self.read_primer_preferences(file, defaults)
7
+
8
+ File.open(file) do |f|
9
+ f.each_line do | line |
10
+ line.chomp!
11
+ arr = line.split("=")
12
+ defaults[arr[0].downcase.to_sym] = arr[1];
13
+ end
14
+ end
15
+
16
+ return defaults
17
+ end
18
+
19
+ def self.prepare_input_file(file, opts2={})
20
+ opts = {
21
+ :primer_product_size_range => "50-150" ,
22
+ :primer_max_size => 25 ,
23
+ :primer_lib_ambiguity_codes_consensus => 1,
24
+ :primer_liberal_base => 1,
25
+ :primer_num_return => 5,
26
+ :primer_explain_flag => 1,
27
+ :primer_thermodynamic_parameters_path => File.expand_path(File.dirname(__FILE__) + '../../../../conf/primer3_config/') + '/'
28
+ }.merge(opts2)
29
+
30
+ opts.each do |key,value|
31
+ file.puts "#{key.to_s.upcase}=#{value}\n"
32
+ end
33
+ end
34
+
35
+ def self.run(opts={})
36
+ puts "Primer3.run running..."
37
+ timeout = 600
38
+ f_in=opts[:in]
39
+ f_out=opts[:out]
40
+ timeout = opts[:timeout] if opts[:timeout]
41
+ opts.delete(:in)
42
+ opts.delete(:out)
43
+ primer_3_in = File.read(f_in)
44
+ status = systemu "primer3_core", 0=>primer_3_in, 1=>stdout='', 2=>stderr='' do |cid|
45
+ sleep timeout
46
+ Process.kill 9, cid
47
+ end
48
+ # $stderr.puts cmdline
49
+ if status.exitstatus == 0
50
+ File.open(f_out, 'w') { |f| f.write(stdout) }
51
+ else
52
+ raise Primer3Exception.new(), "Error running primer3. Command line was 'primer3_core'\nPrimer3 STDERR was:\n#{stderr}"
53
+ end
54
+ end
55
+
56
+ class SNP
57
+
58
+ attr_accessor :gene, :original, :position, :snp, :chromosome, :line_1, :line_2
59
+ attr_accessor :primer3_line_1, :primer3_line_2, :template_length
60
+ attr_accessor :primers_line_1, :primers_line_2
61
+ attr_accessor :used_contigs
62
+ attr_accessor :snp_from
63
+ attr_accessor :regions
64
+ attr_accessor :primer3_errors
65
+ attr_accessor :repetitive
66
+ attr_accessor :hit_count
67
+ attr_accessor :snp_type
68
+
69
+ def line_1_name
70
+ "#{gene}:#{position}#{original}>#{snp} #{line_1}}"
71
+ end
72
+
73
+ def initialize
74
+ @primers_line_1 = SortedSet.new
75
+ @primers_line_2 = SortedSet.new
76
+ @regions = SortedSet.new
77
+ @primer3_errors = Set.new
78
+ end
79
+
80
+ def line_2_name
81
+ "#{gene}:#{position}#{original}>#{snp} #{line_2}}"
82
+ end
83
+
84
+ def to_s
85
+ "#{gene}:#{original}#{position}#{snp}:#{snp_from.chromosome}"
86
+ end
87
+
88
+ def find_left_primer_temp(primer)
89
+ primers_line_1.each do |pr|
90
+ return pr.find_left_tm(primer) if pr.find_left_tm(primer)
91
+ end
92
+ primers_line_2.each do |pr|
93
+ return pr.find_left_tm(primer) if pr.find_left_tm(primer)
94
+ end
95
+ return "NA"
96
+ end
97
+
98
+
99
+ def find_primer_pair_first
100
+ primers_line_1.each do |pr|
101
+ primer = pr.left_primer_snp(self)
102
+ return pr if find_left_primer_temp(primer) != "NA"
103
+ end
104
+ nil
105
+ end
106
+
107
+ def find_primer_pair_second
108
+ primers_line_2.each do |pr|
109
+ primer = pr.left_primer_snp(self)
110
+ return pr if find_left_primer_temp(primer) != "NA"
111
+ end
112
+ nil
113
+ end
114
+
115
+ def values
116
+ return @values if @values
117
+ left_start = 0
118
+ left_end = 0
119
+ right_start = 0
120
+ right_end = 0
121
+ total_columns_before_messages=17
122
+ #puts "Values in primer3"
123
+ #puts snp_from.inspect
124
+ @values = Array.new
125
+ #@values << "#{gene},,#{template_length},"
126
+ @values << gene
127
+ @values << "#{original}#{position}#{snp}"
128
+ @values << template_length
129
+ @values << snp_from.chromosome
130
+ @values << regions.size
131
+ @values << regions.join("|")
132
+ @values << snp_type
133
+ if primer3_line_1 and primer3_line_2
134
+ #Block that searches both if both pairs have a TM
135
+ primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
136
+ primer_1_tm = find_left_primer_temp(primer_1)
137
+
138
+ primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
139
+ primer_2_tm = find_left_primer_temp(primer_2)
140
+
141
+ if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
142
+ @values << primer3_line_1.left_primer
143
+ @values << primer_2
144
+ @values << primer3_line_1.right_primer
145
+ @values << primer3_line_1.type.to_s
146
+ @values << primer3_line_1.orientation.to_s
147
+ @values << primer3_line_1.best_pair.left.tm
148
+ @values << primer_2_tm
149
+ @values << primer3_line_1.best_pair.right.tm
150
+ @values << "first"
151
+ @values << primer3_line_1.best_pair.product_size
152
+ elsif primer_1_tm != "NA"
153
+ @values << primer_1
154
+ @values << primer3_line_2.left_primer
155
+ @values << primer3_line_2.right_primer
156
+ @values << primer3_line_2.type.to_s
157
+ @values << primer3_line_2.orientation.to_s
158
+ @values << primer_1_tm
159
+ @values << primer3_line_2.best_pair.left.tm
160
+ @values << primer3_line_2.best_pair.right.tm
161
+ @values << "second"
162
+ @values << primer3_line_2.best_pair.product_size
163
+ else
164
+
165
+ first_candidate = find_primer_pair_first
166
+ second_candidate = find_primer_pair_second
167
+
168
+ if first_candidate
169
+ primer_2 = primer3_line_2.left_primer_with_coordinates(first_candidate.left_coordinates, first_candidate.orientation)
170
+ primer_2_tm = find_left_primer_temp(primer_2)
171
+ end
172
+ if second_candidate
173
+ #puts "input to search #{first_candidate.left_coordinates}"
174
+ primer_1 = primer3_line_1.left_primer_with_coordinates(second_candidate.left_coordinates, second_candidate.orientation)
175
+ primer_1_tm = find_left_primer_temp(primer_1)
176
+ #puts "In the other funky if #{primer_2}"
177
+ end
178
+
179
+ if first_candidate and second_candidate and first_candidate < second_candidate
180
+ #puts "A"
181
+ @values << first_candidate.left_primer
182
+ @values << primer_2
183
+ @values << first_candidate.right_primer
184
+ @values << first_candidate.type.to_s
185
+ @values << first_candidate.orientation.to_s
186
+ @values << first_candidate.best_pair.left.tm
187
+ @values << primer_2_tm
188
+ @values << first_candidate.best_pair.right.tm
189
+ @values << "first-"
190
+ @values << first_candidate.best_pair.product_size
191
+ elsif second_candidate
192
+ #puts "B"
193
+ @values << primer_1
194
+ @values << second_candidate.left_primer
195
+ @values << second_candidate.right_primer
196
+ @values << second_candidate.type.to_s
197
+ @values << second_candidate.orientation.to_s
198
+ @values << primer_1_tm
199
+ @values << second_candidate.best_pair.left.tm
200
+ @values << second_candidate.best_pair.right.tm
201
+ @values << "second-"
202
+ @values << second_candidate.best_pair.product_size
203
+ elsif first_candidate
204
+ #puts "C"
205
+ @values << first_candidate.left_primer
206
+ @values << primer_2
207
+ @values << first_candidate.right_primer
208
+ @values << first_candidate.type.to_s
209
+ @values << first_candidate.orientation.to_s
210
+ @values << primer_2_tm
211
+ @values << first_candidate.best_pair.left.tm
212
+ @values << first_candidate.best_pair.right.tm
213
+ @values << "first/"
214
+ @values << first_candidate.best_pair.product_size
215
+ end
216
+ end
217
+
218
+ elsif primer3_line_1
219
+ @values << primer3_line_1.left_primer
220
+ @values << primer3_line_1.left_primer_snp(self)
221
+ @values << primer3_line_1.right_primer
222
+ @values << primer3_line_1.type.to_s
223
+ @values << primer3_line_1.orientation.to_s
224
+ @values << primer3_line_1.best_pair.left.tm
225
+ @values << "NA"
226
+ @values << primer3_line_1.best_pair.right.tm
227
+
228
+ @values << "first+"
229
+ @values << primer3_line_1.best_pair.product_size
230
+ elsif primer3_line_2
231
+ @values << primer3_line_2.left_primer_snp(self)
232
+ @values << primer3_line_2.left_primer
233
+ @values << primer3_line_2.right_primer
234
+ @values << primer3_line_2.type.to_s
235
+ @values << primer3_line_2.orientation.to_s
236
+ @values << "NA"
237
+ @values << primer3_line_2.best_pair.left.tm
238
+ @values << primer3_line_2.best_pair.right.tm
239
+ @values << "second+"
240
+ @values << primer3_line_2.best_pair.product_size
241
+
242
+ end
243
+ if @values.size < total_columns_before_messages
244
+ @values[total_columns_before_messages] = primer3_errors.to_a.join("|")
245
+ else
246
+ @values << nil
247
+ end
248
+ return @values
249
+ end
250
+
251
+ def print_primers
252
+ to_print = values.dup
253
+ to_print << @repetitive
254
+ to_print << @hit_count
255
+ to_print.join(",")
256
+ end
257
+
258
+ def found_primers?
259
+ return self.values[7] && self.values[7] != nil
260
+ end
261
+
262
+ def first_primer
263
+ return self.values[7] if self.values[7] && self.values[7] != nil
264
+ return ""
265
+ end
266
+
267
+ def second_primer
268
+ return self.values[8] if self.values[8] && self.values[8] != nil
269
+ return ""
270
+ end
271
+
272
+ def common_primer
273
+ return self.values[9] if self.values[9] && self.values[9] != nil
274
+ return ""
275
+ end
276
+
277
+ def product_size
278
+ return self.values[16].to_i if self.values[16]&& self.values[16] != nil
279
+ return 0
280
+ end
281
+
282
+ def orientation
283
+ return self.values[11] if self.values[11]&& self.values[11] != nil
284
+ return 'unknown'
285
+ end
286
+
287
+
288
+ def first_product
289
+ left = first_primer
290
+ right = common_primer
291
+ nlen = product_size - left.size - right.size
292
+ product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
293
+ #puts "orientation: #{orientation}"
294
+
295
+ product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
296
+
297
+ product
298
+ end
299
+
300
+ def second_product
301
+ left = second_primer
302
+ right = common_primer
303
+ nlen = product_size - left.size - right.size
304
+ product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
305
+ product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
306
+
307
+
308
+ product
309
+ end
310
+
311
+
312
+ def realigned_primers_fasta
313
+ ret_str = ""
314
+ realigned_primers.each_pair do |name, seq|
315
+ ret_str << ">#{self.to_s}-#{name}\n#{seq}\n"
316
+ end
317
+ ret_str
318
+ end
319
+
320
+
321
+ def realigned_primers
322
+
323
+ return @realigned_primers if @realigned_primers
324
+ sequences_to_align = Hash.new
325
+ sequences_to_align["first_product"] = first_product
326
+ sequences_to_align["second_product"] = second_product
327
+ sequences_to_align.merge!(snp_from.surrounding_exon_sequences)
328
+ if sequences_to_align.size == 1
329
+ @realigned_primers = sequences_to_align
330
+ return @realigned_primers
331
+ end
332
+ options = ['--maxiterate', '1000', '--localpair', '--quiet']
333
+ mafft = Bio::MAFFT.new( "mafft" , options)
334
+ #puts "Before MAFT:#{sequences_to_align.inspect}"
335
+ report = mafft.query_align(sequences_to_align)
336
+ @realigned_primers = report.alignment
337
+ #puts "MAFFT: #{report.alignment.inspect}"
338
+ @realigned_primers
339
+ end
340
+
341
+ def self.parse(reg_str)
342
+ reg_str.chomp!
343
+ snp = SNP.new
344
+ snp.gene, snp.original, snp.position, snp.snp = reg_str.split(",")
345
+ snp.position = snp.position.to_i
346
+ snp.original.upcase!
347
+ snp.snp.upcase!
348
+ snp
349
+ end
350
+
351
+ def self.parse_file(filename)
352
+ File.open(filename) do | f |
353
+ f.each_line do | line |
354
+ snp = SNP.parse(line)
355
+ if snp.position > 0
356
+ yield snp
357
+ end
358
+ end
359
+ end
360
+ end
361
+
362
+
363
+ def add_record(primer3record)
364
+ @primer3_errors = Set.new unless @primer3_errors
365
+ @template_length = primer3record.sequence_template.size
366
+ if primer3record.primer_error != nil
367
+ primer3_errors << primer3record.primer_error
368
+ return
369
+ end
370
+ case
371
+
372
+ when primer3record.line == @line_1
373
+
374
+ @line_1_template = primer3record.sequence_template
375
+
376
+ when primer3record.line == @line_2
377
+ @line_2_template = primer3record.sequence_template
378
+ else
379
+ raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
380
+ end
381
+
382
+ if primer3record.primer_left_num_returned.to_i > 0
383
+ case
384
+ when primer3record.line == @line_1
385
+ primers_line_1 << primer3record
386
+ #puts primer3record.inspect
387
+ @primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
388
+ when primer3record.line == @line_2
389
+ primers_line_2 << primer3record
390
+ @primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
391
+ else
392
+ raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
393
+ end
394
+ else
395
+ primer3_errors << "#{primer3record.line}(#{primer3record.orientation}):#{primer3record.primer_left_explain.gsub!(',',';')}"
396
+ primer3_errors << "common(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_right_explain.gsub!(',',';')}"
397
+ primer3_errors << "pair(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_pair_explain.gsub!(',',';')}"
398
+ end
399
+ end
400
+ end
401
+
402
+ class Primer3Record
403
+ include Comparable
404
+ attr_accessor :properties, :polymorphism
405
+ attr_accessor :scores
406
+
407
+
408
+ def best_pair
409
+ return @best_pair if @best_pair
410
+ @best_pair = nil
411
+ @primerPairs.each do | primer |
412
+ @best_pair = primer if @best_pair.nil?
413
+ @best_pair = primer if primer.size < @best_pair.size
414
+ end
415
+ #@best_pair = @primerPairs.min
416
+ @best_pair
417
+ end
418
+
419
+ def primer_error
420
+ return @properties[:primer_error] if @properties[:primer_error]
421
+ return nil
422
+ end
423
+
424
+ def method_missing(method_name, *args)
425
+ return @properties[method_name] if @properties[method_name]
426
+ $stderr.puts "Missing #{method_name}"
427
+ $stderr.puts @properties.inspect
428
+ return "" #if a property is missing, return blank.
429
+ raise NoMethodError.new()
430
+ end
431
+
432
+ def find_left_tm(primer)
433
+ last = size - 1
434
+ (0..last).each do | i |
435
+ seq_prop = "primer_left_#{i}_sequence".to_sym
436
+ # $stderr.puts seq_prop
437
+ temp_property = "primer_left_#{i}_tm".to_sym
438
+ # $stderr.puts "comparing #{@properties[seq_prop] } == #{primer}"
439
+ return @properties[temp_property] if @properties[seq_prop] == primer
440
+
441
+ end
442
+ return nil
443
+ end
444
+
445
+ def score
446
+ ret = 0
447
+ ret += @scores[type]
448
+ ret += @scores[:exon] if exon?
449
+ ret -= product_length
450
+ ret
451
+ end
452
+
453
+ def <=>(anOther)
454
+ return anOther.score <=> score
455
+ end
456
+
457
+ def parse_coordinates(str)
458
+ coords = str.split(',')
459
+ coords[0] = coords[0].to_i
460
+ coords[1] = coords[1].to_i
461
+ coords
462
+ end
463
+
464
+
465
+ def left_coordinates
466
+ #@left_coordinates = parse_coordinates(self.primer_left_0) unless @left_coordinates
467
+ @left_coordinates = best_pair.left.coordinates
468
+ @left_coordinates
469
+ end
470
+
471
+ def right_coordinates
472
+ unless @right_coordinates
473
+ @right_coordinates = best_pair.right.coordinates
474
+ @right_coordinates[0] = @right_coordinates[0] - @right_coordinates[1] + 1
475
+ end
476
+ @right_coordinates
477
+ end
478
+
479
+ def left_primer
480
+ #@left_primer = self.sequence_template[left_coordinates[0],left_coordinates[1]] unless @left_primer
481
+ @left_primer = best_pair.left.sequence
482
+ @left_primer
483
+ end
484
+
485
+ def left_primer_snp(snp)
486
+ tmp_primer = String.new(left_primer)
487
+ if self.orientation == :forward
488
+ base_original = snp.original
489
+ base_snp = snp.snp
490
+ elsif self.orientation == :reverse
491
+ #puts self.inspect
492
+ base_original =Primer3Record.reverse_complement_string(snp.original )
493
+ base_snp = Primer3Record.reverse_complement_string(snp.snp)
494
+ else
495
+ raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
496
+ end
497
+
498
+ #puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
499
+ if tmp_primer[-1] == base_original
500
+ tmp_primer[-1] = base_snp
501
+ elsif tmp_primer[-1] == base_snp
502
+ tmp_primer[-1] = base_original
503
+ else
504
+ raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
505
+ end
506
+ #puts "tmp_primer: #{tmp_primer}"
507
+ return tmp_primer
508
+ end
509
+
510
+ def left_primer_with_coordinates(coordinates, other_orientation)
511
+
512
+ seq = self.sequence_template
513
+ seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
514
+ seq[coordinates[0],coordinates[1]]
515
+ end
516
+
517
+ def self.reverse_complement_string(sequenc_str)
518
+ complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
519
+ complement.reverse!
520
+ end
521
+
522
+ def right_primer_delete
523
+ @right_primer = self.sequence_template[right_coordinates[0],right_coordinates[1]] unless @right_primer
524
+ @right_primer = Primer3Record.reverse_complement_string(@right_primer)
525
+ @right_primer
526
+ end
527
+
528
+ def right_primer
529
+ return best_pair.right.sequence
530
+ end
531
+
532
+ def product_length
533
+ return best_pair.size
534
+ end
535
+
536
+ def initialize
537
+ @properties = Hash.new
538
+ @scores = Hash.new
539
+ @scores[:chromosome_specific] = 1000
540
+ @scores[:chromosome_semispecific] = 100
541
+ @scores[:chromosome_nonspecific] = 0
542
+ @scores[:exon] = 50
543
+
544
+ end
545
+
546
+ def snp
547
+ return @snp if @snp
548
+ parse_header
549
+ @snp
550
+ end
551
+
552
+ #CL3339Contig1:T509C AvocetS chromosome_specific exon 4D forward
553
+ def parse_header
554
+ #puts "Parsing header: '#{self.sequence_id}'"
555
+ arr = self.sequence_id.split(" ")
556
+
557
+ #if arr.size == 7 This validation can be useful to get the best primers regardless of the chromosome,
558
+ #But it is commented as it will require further testing.
559
+ @snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = arr
560
+ #else
561
+ # if arr.size == 6
562
+ # @snp, @line, @type, @in, @polymorphism, @orientation = arr
563
+ # @chromosome = ""
564
+ # end
565
+ #end
566
+
567
+ @type = @type.to_sym
568
+ if @in
569
+ @in = @in.to_sym == :exon
570
+ else
571
+ @exon = false
572
+ end
573
+
574
+ if @polymorphism.to_sym == :homoeologous
575
+ @homoeologous = true
576
+ else
577
+ @homoeologous = false
578
+ end
579
+ @parsed = true
580
+ @orientation = @orientation.to_sym
581
+ end
582
+
583
+ def orientation
584
+ return @orientation if @parsed
585
+ parse_header
586
+ @orientation
587
+ end
588
+
589
+ def chromosome
590
+ return @chromosome if @parsed
591
+ parse_header
592
+ @chromosome
593
+ end
594
+
595
+ def homoeologous?
596
+ return @homoeologous if @parsed
597
+ parse_header
598
+ @homoeologous
599
+ end
600
+
601
+ def type
602
+ return @type if @parsed
603
+ parse_header
604
+ @type
605
+ end
606
+
607
+ def exon?
608
+ return @exon if @parsed
609
+ parse_header
610
+ @exon
611
+ end
612
+
613
+ def line
614
+ return @line if @parsed
615
+ parse_header
616
+ @line
617
+ end
618
+
619
+ def size
620
+ @properties[:primer_pair_num_returned].to_i
621
+ end
622
+
623
+ def parse_blocks
624
+ total_blocks = size - 1
625
+ @primerPairs = Array.new
626
+ for i in 0..total_blocks
627
+ @primerPairs << PrimerPair.new(self, i)
628
+ end
629
+
630
+ end
631
+
632
+ def self.parse_file(filename, scores: nil)
633
+ File.open(filename) do | f |
634
+ record = Primer3Record.new
635
+ record.scores = scores if scores
636
+ f.each_line do | line |
637
+ line.chomp!
638
+ if line == "="
639
+
640
+ record.parse_blocks
641
+ yield record
642
+ record = Primer3Record.new
643
+ record.scores = scores if scores
644
+ else
645
+ tokens = line.split("=")
646
+ i = 0
647
+ reg = ""
648
+ #TODO: Look if there is a join function or something similar to go around this...
649
+ tokens.each do |tok|
650
+ if i > 0
651
+ if i > 1
652
+ reg << "="
653
+ end
654
+ reg << tok
655
+ end
656
+ i+=1
657
+ end
658
+ record.properties[tokens[0].downcase.to_sym] = reg
659
+ end
660
+ end
661
+ end
662
+ end
663
+ end
664
+
665
+
666
+ class Primer
667
+ attr_accessor :pair
668
+ def initialize
669
+ @values = Hash.new
670
+ end
671
+
672
+ def method_missing(m, *args, &block)
673
+ return @values[m.to_s] if @values[m.to_s] != nil
674
+ raise NoMethodError.new(), "There's no method called #{m}, available: #{@values.keys.to_s}."
675
+ end
676
+
677
+ def set_value(key, value)
678
+ @values[key] = value
679
+ end
680
+
681
+
682
+
683
+ end
684
+
685
+ class PrimerPair
686
+ include Comparable
687
+ attr_reader :record
688
+ attr_reader :left, :right
689
+
690
+ def parse_coordinates(str)
691
+ coords = str.split(',')
692
+ coords[0] = coords[0].to_i
693
+ coords[1] = coords[1].to_i
694
+ coords
695
+ end
696
+
697
+ def size
698
+ return product_size.to_i
699
+ end
700
+
701
+ def <=>(anOther)
702
+ penalty.to_f <=> anOther.penalty.to_f
703
+ end
704
+
705
+ def initialize(record, index)
706
+ raise Primer3Exception.new(), "Index #{index} is greater than the number of records" unless index < record.size
707
+ @record = record
708
+ @left = Primer.new
709
+ @right = Primer.new
710
+ @values = Hash.new
711
+
712
+
713
+ @left.set_value("added", false)
714
+ @right.set_value("added", false)
715
+ @left.pair = self
716
+ @right.pair = self
717
+ index_s = index.to_s
718
+ record.properties.each do |key, value|
719
+ tokens = key.to_s.split("_")
720
+ if tokens.size > 2 and tokens[2] == index_s
721
+ primer = nil
722
+ primer = @right if tokens[1] == "right"
723
+ primer = @left if tokens[1] == "left"
724
+ if primer != nil
725
+ primer.set_value("added", true)
726
+ if tokens.size == 3
727
+ primer.set_value("coordinates", parse_coordinates(value) )
728
+ else
729
+
730
+ to_add = value
731
+ to_add = value.to_f unless tokens[3]=="sequence"
732
+ n_key = tokens[3..6].join("_")
733
+ primer.set_value(n_key, to_add)
734
+ end
735
+ else
736
+ n_key = tokens[3..6].join("_")
737
+ @values[n_key] = value
738
+ end
739
+
740
+ end
741
+ end
742
+
743
+ raise Primer3Exception.new(), "The pair is not complete (l:#{left.added}, r:#{right.added})" if @left.added == false or @right.added == false
744
+
745
+ end
746
+
747
+ def method_missing(m, *args, &block)
748
+
749
+ return @values[m.to_s] if @values[m.to_s]
750
+ raise NoMethodError.new(), "There's no method called #{m}. Available methods: #{@values.keys.to_s}"
751
+ end
752
+ end
753
+
754
+ class KASPContainer
755
+
756
+ attr_accessor :line_1, :line_2
757
+ attr_accessor :snp_hash
758
+ attr_accessor :scores
759
+
760
+ def add_snp_file(filename)
761
+ @snp_hash=Hash.new unless @snp_hash
762
+ SNP.parse_file(filename) do |snp|
763
+ @snp_hash[snp.to_s] = snp
764
+ snp.line_1 = @line_1
765
+ snp.line_2 = @line_2
766
+ end
767
+ end
768
+
769
+ def add_snp(snp_in)
770
+ #TODO: Here we need to also copy the errors that will be printed.
771
+ @snp_hash=Hash.new unless @snp_hash
772
+ snp = SNP.new
773
+ snp.gene = snp_in.gene
774
+ snp.original = snp_in.original
775
+ snp.primer3_errors = Set.new snp_in.errors
776
+ snp.position = snp_in.position
777
+ snp.snp = snp_in.snp
778
+ snp.repetitive = snp_in.repetitive
779
+ #puts snp_in.inspect
780
+ snp.hit_count = snp_in.hit_count
781
+ snp.snp_type = snp_in.snp_type
782
+ snp.line_1 = @line_1
783
+ snp.line_2 = @line_2
784
+ snp.snp_from = snp_in
785
+ snp.regions = snp_in.exon_list.values.collect { |x| x.collect {|y| y.target_region.to_s }}
786
+ @snp_hash[snp.to_s] = snp
787
+ snp
788
+ end
789
+
790
+ def add_primers_file(filename)
791
+ #primer3record.scores = @scores if @scores
792
+ Primer3Record.parse_file(filename, scores: @scores) do | primer3record |
793
+ current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
794
+ current_snp.add_record(primer3record)
795
+ end
796
+ end
797
+
798
+ def print_primers
799
+ str = ""
800
+ snp_hash.each do |k, snp|
801
+ str << snp.print_primers << "\n"
802
+ end
803
+ return str
804
+ end
805
+
806
+ def print_primers_with_tails(tail_a: "GAAGGTCGGAGTCAACGGATT", tail_b: "GAAGGTGACCAAGTTCATGCT")
807
+ str = ""
808
+ snp_hash.each do |k, snp|
809
+ if snp.found_primers?
810
+ str << snp.gene << snp.original << "_1st\t" << tail_a << snp.first_primer << "\n"
811
+ str << snp.gene << snp.snp << "_2nd\t" << tail_b << snp.second_primer << "\n"
812
+ str << snp.gene << "_common\t" << snp.common_primer << "\n"
813
+ end
814
+ end
815
+ return str
816
+ end
817
+
818
+ end
819
+ end
820
+