bio-polymarker 1.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +24 -0
- data/Gemfile +23 -0
- data/README.md +205 -0
- data/Rakefile +61 -0
- data/SECURITY.md +16 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +128 -0
- data/bin/blast_triads.rb +166 -0
- data/bin/blast_triads_promoters.rb +192 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +69 -0
- data/bin/filter_exonerate_by_identity.rb +38 -0
- data/bin/find_best_blat_hit.rb +33 -0
- data/bin/find_best_exonerate.rb +17 -0
- data/bin/get_longest_hsp_blastx_triads.rb +66 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +183 -0
- data/bin/mafft_triads.rb +120 -0
- data/bin/mafft_triads_promoters.rb +403 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/marker_to_vcf.rb +241 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/mask_triads.rb +169 -0
- data/bin/polymarker.rb +410 -0
- data/bin/polymarker_capillary.rb +443 -0
- data/bin/polymarker_deletions.rb +350 -0
- data/bin/snp_position_to_polymarker.rb +101 -0
- data/bin/snps_between_bams.rb +107 -0
- data/bin/tag_stats.rb +75 -0
- data/bin/vcfLineToTable.rb +56 -0
- data/bin/vcfToPolyMarker.rb +82 -0
- data/bio-polymarker.gemspec +227 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +465 -0
- data/lib/bio/BIOExtensions.rb +153 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/Mask.rb +116 -0
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
- data/lib/bio/PolyploidTools/SNP.rb +804 -0
- data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
- data/lib/bio/db/blast.rb +114 -0
- data/lib/bio/db/exonerate.rb +333 -0
- data/lib/bio/db/primer3.rb +820 -0
- data/lib/bio-polymarker.rb +28 -0
- data/test/data/7B_amplicon_test.fa +12 -0
- data/test/data/7B_amplicon_test.fa.fai +1 -0
- data/test/data/7B_amplicon_test_reference.fa +110 -0
- data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
- data/test/data/7B_marker_test.txt +1 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_blast.tab +4 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_contigs.fa.fai +4 -0
- data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
- data/test/data/BS00068396_51_contigs.fa.nin +0 -0
- data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
- data/test/data/BS00068396_51_contigs.nhr +0 -0
- data/test/data/BS00068396_51_contigs.nin +0 -0
- data/test/data/BS00068396_51_contigs.nsq +0 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_for_polymarker.txt +1 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/PST130_7067.csv +1 -0
- data/test/data/PST130_7067.fa +2 -0
- data/test/data/PST130_7067.fa.fai +1 -0
- data/test/data/PST130_7067.fa.ndb +0 -0
- data/test/data/PST130_7067.fa.nhr +0 -0
- data/test/data/PST130_7067.fa.nin +0 -0
- data/test/data/PST130_7067.fa.not +0 -0
- data/test/data/PST130_7067.fa.nsq +0 -0
- data/test/data/PST130_7067.fa.ntf +0 -0
- data/test/data/PST130_7067.fa.nto +0 -0
- data/test/data/PST130_reverse_primer.csv +1 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/S22380157.vcf +67 -0
- data/test/data/S58861868/LIB1716.bam +0 -0
- data/test/data/S58861868/LIB1716.sam +651 -0
- data/test/data/S58861868/LIB1719.bam +0 -0
- data/test/data/S58861868/LIB1719.sam +805 -0
- data/test/data/S58861868/LIB1721.bam +0 -0
- data/test/data/S58861868/LIB1721.sam +1790 -0
- data/test/data/S58861868/LIB1722.bam +0 -0
- data/test/data/S58861868/LIB1722.sam +1271 -0
- data/test/data/S58861868/S58861868.fa +16 -0
- data/test/data/S58861868/S58861868.fa.fai +1 -0
- data/test/data/S58861868/S58861868.vcf +76 -0
- data/test/data/S58861868/header.txt +9 -0
- data/test/data/S58861868/merged.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam.bai +0 -0
- data/test/data/Test3Aspecific.csv +2 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/bfr_out_test.csv +5 -0
- data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
- data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
- data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
- data/test/data/headerMergeed.txt +9 -0
- data/test/data/headerS2238015 +1 -0
- data/test/data/mergedLibs.bam +0 -0
- data/test/data/mergedLibsReheader.bam +0 -0
- data/test/data/mergedLibsSorted.bam +0 -0
- data/test/data/mergedLibsSorted.bam.bai +0 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/primer_3_input_header_test +5 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/some_tests/some_tests.csv +201 -0
- data/test/data/test_from_mutant.csv +3 -0
- data/test/data/test_iselect.csv +196 -0
- data/test/data/test_iselect_reference.fa +1868 -0
- data/test/data/test_iselect_reference.fa.fai +934 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +135 -0
- data/test/test_blast.rb +47 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +48 -0
- data/test/test_integration.rb +76 -0
- data/test/test_snp_parsing.rb +121 -0
- data/test/test_wrong_selection.sh +5 -0
- metadata +356 -0
@@ -0,0 +1,820 @@
|
|
1
|
+
require 'pp'
|
2
|
+
module Bio::DB::Primer3
|
3
|
+
class Primer3Exception < RuntimeError
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.read_primer_preferences(file, defaults)
|
7
|
+
|
8
|
+
File.open(file) do |f|
|
9
|
+
f.each_line do | line |
|
10
|
+
line.chomp!
|
11
|
+
arr = line.split("=")
|
12
|
+
defaults[arr[0].downcase.to_sym] = arr[1];
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
return defaults
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.prepare_input_file(file, opts2={})
|
20
|
+
opts = {
|
21
|
+
:primer_product_size_range => "50-150" ,
|
22
|
+
:primer_max_size => 25 ,
|
23
|
+
:primer_lib_ambiguity_codes_consensus => 1,
|
24
|
+
:primer_liberal_base => 1,
|
25
|
+
:primer_num_return => 5,
|
26
|
+
:primer_explain_flag => 1,
|
27
|
+
:primer_thermodynamic_parameters_path => File.expand_path(File.dirname(__FILE__) + '../../../../conf/primer3_config/') + '/'
|
28
|
+
}.merge(opts2)
|
29
|
+
|
30
|
+
opts.each do |key,value|
|
31
|
+
file.puts "#{key.to_s.upcase}=#{value}\n"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.run(opts={})
|
36
|
+
puts "Primer3.run running..."
|
37
|
+
timeout = 600
|
38
|
+
f_in=opts[:in]
|
39
|
+
f_out=opts[:out]
|
40
|
+
timeout = opts[:timeout] if opts[:timeout]
|
41
|
+
opts.delete(:in)
|
42
|
+
opts.delete(:out)
|
43
|
+
primer_3_in = File.read(f_in)
|
44
|
+
status = systemu "primer3_core", 0=>primer_3_in, 1=>stdout='', 2=>stderr='' do |cid|
|
45
|
+
sleep timeout
|
46
|
+
Process.kill 9, cid
|
47
|
+
end
|
48
|
+
# $stderr.puts cmdline
|
49
|
+
if status.exitstatus == 0
|
50
|
+
File.open(f_out, 'w') { |f| f.write(stdout) }
|
51
|
+
else
|
52
|
+
raise Primer3Exception.new(), "Error running primer3. Command line was 'primer3_core'\nPrimer3 STDERR was:\n#{stderr}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class SNP
|
57
|
+
|
58
|
+
attr_accessor :gene, :original, :position, :snp, :chromosome, :line_1, :line_2
|
59
|
+
attr_accessor :primer3_line_1, :primer3_line_2, :template_length
|
60
|
+
attr_accessor :primers_line_1, :primers_line_2
|
61
|
+
attr_accessor :used_contigs
|
62
|
+
attr_accessor :snp_from
|
63
|
+
attr_accessor :regions
|
64
|
+
attr_accessor :primer3_errors
|
65
|
+
attr_accessor :repetitive
|
66
|
+
attr_accessor :hit_count
|
67
|
+
attr_accessor :snp_type
|
68
|
+
|
69
|
+
def line_1_name
|
70
|
+
"#{gene}:#{position}#{original}>#{snp} #{line_1}}"
|
71
|
+
end
|
72
|
+
|
73
|
+
def initialize
|
74
|
+
@primers_line_1 = SortedSet.new
|
75
|
+
@primers_line_2 = SortedSet.new
|
76
|
+
@regions = SortedSet.new
|
77
|
+
@primer3_errors = Set.new
|
78
|
+
end
|
79
|
+
|
80
|
+
def line_2_name
|
81
|
+
"#{gene}:#{position}#{original}>#{snp} #{line_2}}"
|
82
|
+
end
|
83
|
+
|
84
|
+
def to_s
|
85
|
+
"#{gene}:#{original}#{position}#{snp}:#{snp_from.chromosome}"
|
86
|
+
end
|
87
|
+
|
88
|
+
def find_left_primer_temp(primer)
|
89
|
+
primers_line_1.each do |pr|
|
90
|
+
return pr.find_left_tm(primer) if pr.find_left_tm(primer)
|
91
|
+
end
|
92
|
+
primers_line_2.each do |pr|
|
93
|
+
return pr.find_left_tm(primer) if pr.find_left_tm(primer)
|
94
|
+
end
|
95
|
+
return "NA"
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
def find_primer_pair_first
|
100
|
+
primers_line_1.each do |pr|
|
101
|
+
primer = pr.left_primer_snp(self)
|
102
|
+
return pr if find_left_primer_temp(primer) != "NA"
|
103
|
+
end
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
|
107
|
+
def find_primer_pair_second
|
108
|
+
primers_line_2.each do |pr|
|
109
|
+
primer = pr.left_primer_snp(self)
|
110
|
+
return pr if find_left_primer_temp(primer) != "NA"
|
111
|
+
end
|
112
|
+
nil
|
113
|
+
end
|
114
|
+
|
115
|
+
def values
|
116
|
+
return @values if @values
|
117
|
+
left_start = 0
|
118
|
+
left_end = 0
|
119
|
+
right_start = 0
|
120
|
+
right_end = 0
|
121
|
+
total_columns_before_messages=17
|
122
|
+
#puts "Values in primer3"
|
123
|
+
#puts snp_from.inspect
|
124
|
+
@values = Array.new
|
125
|
+
#@values << "#{gene},,#{template_length},"
|
126
|
+
@values << gene
|
127
|
+
@values << "#{original}#{position}#{snp}"
|
128
|
+
@values << template_length
|
129
|
+
@values << snp_from.chromosome
|
130
|
+
@values << regions.size
|
131
|
+
@values << regions.join("|")
|
132
|
+
@values << snp_type
|
133
|
+
if primer3_line_1 and primer3_line_2
|
134
|
+
#Block that searches both if both pairs have a TM
|
135
|
+
primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
|
136
|
+
primer_1_tm = find_left_primer_temp(primer_1)
|
137
|
+
|
138
|
+
primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
|
139
|
+
primer_2_tm = find_left_primer_temp(primer_2)
|
140
|
+
|
141
|
+
if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
|
142
|
+
@values << primer3_line_1.left_primer
|
143
|
+
@values << primer_2
|
144
|
+
@values << primer3_line_1.right_primer
|
145
|
+
@values << primer3_line_1.type.to_s
|
146
|
+
@values << primer3_line_1.orientation.to_s
|
147
|
+
@values << primer3_line_1.best_pair.left.tm
|
148
|
+
@values << primer_2_tm
|
149
|
+
@values << primer3_line_1.best_pair.right.tm
|
150
|
+
@values << "first"
|
151
|
+
@values << primer3_line_1.best_pair.product_size
|
152
|
+
elsif primer_1_tm != "NA"
|
153
|
+
@values << primer_1
|
154
|
+
@values << primer3_line_2.left_primer
|
155
|
+
@values << primer3_line_2.right_primer
|
156
|
+
@values << primer3_line_2.type.to_s
|
157
|
+
@values << primer3_line_2.orientation.to_s
|
158
|
+
@values << primer_1_tm
|
159
|
+
@values << primer3_line_2.best_pair.left.tm
|
160
|
+
@values << primer3_line_2.best_pair.right.tm
|
161
|
+
@values << "second"
|
162
|
+
@values << primer3_line_2.best_pair.product_size
|
163
|
+
else
|
164
|
+
|
165
|
+
first_candidate = find_primer_pair_first
|
166
|
+
second_candidate = find_primer_pair_second
|
167
|
+
|
168
|
+
if first_candidate
|
169
|
+
primer_2 = primer3_line_2.left_primer_with_coordinates(first_candidate.left_coordinates, first_candidate.orientation)
|
170
|
+
primer_2_tm = find_left_primer_temp(primer_2)
|
171
|
+
end
|
172
|
+
if second_candidate
|
173
|
+
#puts "input to search #{first_candidate.left_coordinates}"
|
174
|
+
primer_1 = primer3_line_1.left_primer_with_coordinates(second_candidate.left_coordinates, second_candidate.orientation)
|
175
|
+
primer_1_tm = find_left_primer_temp(primer_1)
|
176
|
+
#puts "In the other funky if #{primer_2}"
|
177
|
+
end
|
178
|
+
|
179
|
+
if first_candidate and second_candidate and first_candidate < second_candidate
|
180
|
+
#puts "A"
|
181
|
+
@values << first_candidate.left_primer
|
182
|
+
@values << primer_2
|
183
|
+
@values << first_candidate.right_primer
|
184
|
+
@values << first_candidate.type.to_s
|
185
|
+
@values << first_candidate.orientation.to_s
|
186
|
+
@values << first_candidate.best_pair.left.tm
|
187
|
+
@values << primer_2_tm
|
188
|
+
@values << first_candidate.best_pair.right.tm
|
189
|
+
@values << "first-"
|
190
|
+
@values << first_candidate.best_pair.product_size
|
191
|
+
elsif second_candidate
|
192
|
+
#puts "B"
|
193
|
+
@values << primer_1
|
194
|
+
@values << second_candidate.left_primer
|
195
|
+
@values << second_candidate.right_primer
|
196
|
+
@values << second_candidate.type.to_s
|
197
|
+
@values << second_candidate.orientation.to_s
|
198
|
+
@values << primer_1_tm
|
199
|
+
@values << second_candidate.best_pair.left.tm
|
200
|
+
@values << second_candidate.best_pair.right.tm
|
201
|
+
@values << "second-"
|
202
|
+
@values << second_candidate.best_pair.product_size
|
203
|
+
elsif first_candidate
|
204
|
+
#puts "C"
|
205
|
+
@values << first_candidate.left_primer
|
206
|
+
@values << primer_2
|
207
|
+
@values << first_candidate.right_primer
|
208
|
+
@values << first_candidate.type.to_s
|
209
|
+
@values << first_candidate.orientation.to_s
|
210
|
+
@values << primer_2_tm
|
211
|
+
@values << first_candidate.best_pair.left.tm
|
212
|
+
@values << first_candidate.best_pair.right.tm
|
213
|
+
@values << "first/"
|
214
|
+
@values << first_candidate.best_pair.product_size
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
elsif primer3_line_1
|
219
|
+
@values << primer3_line_1.left_primer
|
220
|
+
@values << primer3_line_1.left_primer_snp(self)
|
221
|
+
@values << primer3_line_1.right_primer
|
222
|
+
@values << primer3_line_1.type.to_s
|
223
|
+
@values << primer3_line_1.orientation.to_s
|
224
|
+
@values << primer3_line_1.best_pair.left.tm
|
225
|
+
@values << "NA"
|
226
|
+
@values << primer3_line_1.best_pair.right.tm
|
227
|
+
|
228
|
+
@values << "first+"
|
229
|
+
@values << primer3_line_1.best_pair.product_size
|
230
|
+
elsif primer3_line_2
|
231
|
+
@values << primer3_line_2.left_primer_snp(self)
|
232
|
+
@values << primer3_line_2.left_primer
|
233
|
+
@values << primer3_line_2.right_primer
|
234
|
+
@values << primer3_line_2.type.to_s
|
235
|
+
@values << primer3_line_2.orientation.to_s
|
236
|
+
@values << "NA"
|
237
|
+
@values << primer3_line_2.best_pair.left.tm
|
238
|
+
@values << primer3_line_2.best_pair.right.tm
|
239
|
+
@values << "second+"
|
240
|
+
@values << primer3_line_2.best_pair.product_size
|
241
|
+
|
242
|
+
end
|
243
|
+
if @values.size < total_columns_before_messages
|
244
|
+
@values[total_columns_before_messages] = primer3_errors.to_a.join("|")
|
245
|
+
else
|
246
|
+
@values << nil
|
247
|
+
end
|
248
|
+
return @values
|
249
|
+
end
|
250
|
+
|
251
|
+
def print_primers
|
252
|
+
to_print = values.dup
|
253
|
+
to_print << @repetitive
|
254
|
+
to_print << @hit_count
|
255
|
+
to_print.join(",")
|
256
|
+
end
|
257
|
+
|
258
|
+
def found_primers?
|
259
|
+
return self.values[7] && self.values[7] != nil
|
260
|
+
end
|
261
|
+
|
262
|
+
def first_primer
|
263
|
+
return self.values[7] if self.values[7] && self.values[7] != nil
|
264
|
+
return ""
|
265
|
+
end
|
266
|
+
|
267
|
+
def second_primer
|
268
|
+
return self.values[8] if self.values[8] && self.values[8] != nil
|
269
|
+
return ""
|
270
|
+
end
|
271
|
+
|
272
|
+
def common_primer
|
273
|
+
return self.values[9] if self.values[9] && self.values[9] != nil
|
274
|
+
return ""
|
275
|
+
end
|
276
|
+
|
277
|
+
def product_size
|
278
|
+
return self.values[16].to_i if self.values[16]&& self.values[16] != nil
|
279
|
+
return 0
|
280
|
+
end
|
281
|
+
|
282
|
+
def orientation
|
283
|
+
return self.values[11] if self.values[11]&& self.values[11] != nil
|
284
|
+
return 'unknown'
|
285
|
+
end
|
286
|
+
|
287
|
+
|
288
|
+
def first_product
|
289
|
+
left = first_primer
|
290
|
+
right = common_primer
|
291
|
+
nlen = product_size - left.size - right.size
|
292
|
+
product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
|
293
|
+
#puts "orientation: #{orientation}"
|
294
|
+
|
295
|
+
product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
|
296
|
+
|
297
|
+
product
|
298
|
+
end
|
299
|
+
|
300
|
+
def second_product
|
301
|
+
left = second_primer
|
302
|
+
right = common_primer
|
303
|
+
nlen = product_size - left.size - right.size
|
304
|
+
product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
|
305
|
+
product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
|
306
|
+
|
307
|
+
|
308
|
+
product
|
309
|
+
end
|
310
|
+
|
311
|
+
|
312
|
+
def realigned_primers_fasta
|
313
|
+
ret_str = ""
|
314
|
+
realigned_primers.each_pair do |name, seq|
|
315
|
+
ret_str << ">#{self.to_s}-#{name}\n#{seq}\n"
|
316
|
+
end
|
317
|
+
ret_str
|
318
|
+
end
|
319
|
+
|
320
|
+
|
321
|
+
def realigned_primers
|
322
|
+
|
323
|
+
return @realigned_primers if @realigned_primers
|
324
|
+
sequences_to_align = Hash.new
|
325
|
+
sequences_to_align["first_product"] = first_product
|
326
|
+
sequences_to_align["second_product"] = second_product
|
327
|
+
sequences_to_align.merge!(snp_from.surrounding_exon_sequences)
|
328
|
+
if sequences_to_align.size == 1
|
329
|
+
@realigned_primers = sequences_to_align
|
330
|
+
return @realigned_primers
|
331
|
+
end
|
332
|
+
options = ['--maxiterate', '1000', '--localpair', '--quiet']
|
333
|
+
mafft = Bio::MAFFT.new( "mafft" , options)
|
334
|
+
#puts "Before MAFT:#{sequences_to_align.inspect}"
|
335
|
+
report = mafft.query_align(sequences_to_align)
|
336
|
+
@realigned_primers = report.alignment
|
337
|
+
#puts "MAFFT: #{report.alignment.inspect}"
|
338
|
+
@realigned_primers
|
339
|
+
end
|
340
|
+
|
341
|
+
def self.parse(reg_str)
|
342
|
+
reg_str.chomp!
|
343
|
+
snp = SNP.new
|
344
|
+
snp.gene, snp.original, snp.position, snp.snp = reg_str.split(",")
|
345
|
+
snp.position = snp.position.to_i
|
346
|
+
snp.original.upcase!
|
347
|
+
snp.snp.upcase!
|
348
|
+
snp
|
349
|
+
end
|
350
|
+
|
351
|
+
def self.parse_file(filename)
|
352
|
+
File.open(filename) do | f |
|
353
|
+
f.each_line do | line |
|
354
|
+
snp = SNP.parse(line)
|
355
|
+
if snp.position > 0
|
356
|
+
yield snp
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
|
363
|
+
def add_record(primer3record)
|
364
|
+
@primer3_errors = Set.new unless @primer3_errors
|
365
|
+
@template_length = primer3record.sequence_template.size
|
366
|
+
if primer3record.primer_error != nil
|
367
|
+
primer3_errors << primer3record.primer_error
|
368
|
+
return
|
369
|
+
end
|
370
|
+
case
|
371
|
+
|
372
|
+
when primer3record.line == @line_1
|
373
|
+
|
374
|
+
@line_1_template = primer3record.sequence_template
|
375
|
+
|
376
|
+
when primer3record.line == @line_2
|
377
|
+
@line_2_template = primer3record.sequence_template
|
378
|
+
else
|
379
|
+
raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
|
380
|
+
end
|
381
|
+
|
382
|
+
if primer3record.primer_left_num_returned.to_i > 0
|
383
|
+
case
|
384
|
+
when primer3record.line == @line_1
|
385
|
+
primers_line_1 << primer3record
|
386
|
+
#puts primer3record.inspect
|
387
|
+
@primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
|
388
|
+
when primer3record.line == @line_2
|
389
|
+
primers_line_2 << primer3record
|
390
|
+
@primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
|
391
|
+
else
|
392
|
+
raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
|
393
|
+
end
|
394
|
+
else
|
395
|
+
primer3_errors << "#{primer3record.line}(#{primer3record.orientation}):#{primer3record.primer_left_explain.gsub!(',',';')}"
|
396
|
+
primer3_errors << "common(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_right_explain.gsub!(',',';')}"
|
397
|
+
primer3_errors << "pair(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_pair_explain.gsub!(',',';')}"
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
class Primer3Record
|
403
|
+
include Comparable
|
404
|
+
attr_accessor :properties, :polymorphism
|
405
|
+
attr_accessor :scores
|
406
|
+
|
407
|
+
|
408
|
+
def best_pair
|
409
|
+
return @best_pair if @best_pair
|
410
|
+
@best_pair = nil
|
411
|
+
@primerPairs.each do | primer |
|
412
|
+
@best_pair = primer if @best_pair.nil?
|
413
|
+
@best_pair = primer if primer.size < @best_pair.size
|
414
|
+
end
|
415
|
+
#@best_pair = @primerPairs.min
|
416
|
+
@best_pair
|
417
|
+
end
|
418
|
+
|
419
|
+
def primer_error
|
420
|
+
return @properties[:primer_error] if @properties[:primer_error]
|
421
|
+
return nil
|
422
|
+
end
|
423
|
+
|
424
|
+
def method_missing(method_name, *args)
|
425
|
+
return @properties[method_name] if @properties[method_name]
|
426
|
+
$stderr.puts "Missing #{method_name}"
|
427
|
+
$stderr.puts @properties.inspect
|
428
|
+
return "" #if a property is missing, return blank.
|
429
|
+
raise NoMethodError.new()
|
430
|
+
end
|
431
|
+
|
432
|
+
def find_left_tm(primer)
|
433
|
+
last = size - 1
|
434
|
+
(0..last).each do | i |
|
435
|
+
seq_prop = "primer_left_#{i}_sequence".to_sym
|
436
|
+
# $stderr.puts seq_prop
|
437
|
+
temp_property = "primer_left_#{i}_tm".to_sym
|
438
|
+
# $stderr.puts "comparing #{@properties[seq_prop] } == #{primer}"
|
439
|
+
return @properties[temp_property] if @properties[seq_prop] == primer
|
440
|
+
|
441
|
+
end
|
442
|
+
return nil
|
443
|
+
end
|
444
|
+
|
445
|
+
def score
|
446
|
+
ret = 0
|
447
|
+
ret += @scores[type]
|
448
|
+
ret += @scores[:exon] if exon?
|
449
|
+
ret -= product_length
|
450
|
+
ret
|
451
|
+
end
|
452
|
+
|
453
|
+
def <=>(anOther)
|
454
|
+
return anOther.score <=> score
|
455
|
+
end
|
456
|
+
|
457
|
+
def parse_coordinates(str)
|
458
|
+
coords = str.split(',')
|
459
|
+
coords[0] = coords[0].to_i
|
460
|
+
coords[1] = coords[1].to_i
|
461
|
+
coords
|
462
|
+
end
|
463
|
+
|
464
|
+
|
465
|
+
def left_coordinates
|
466
|
+
#@left_coordinates = parse_coordinates(self.primer_left_0) unless @left_coordinates
|
467
|
+
@left_coordinates = best_pair.left.coordinates
|
468
|
+
@left_coordinates
|
469
|
+
end
|
470
|
+
|
471
|
+
def right_coordinates
|
472
|
+
unless @right_coordinates
|
473
|
+
@right_coordinates = best_pair.right.coordinates
|
474
|
+
@right_coordinates[0] = @right_coordinates[0] - @right_coordinates[1] + 1
|
475
|
+
end
|
476
|
+
@right_coordinates
|
477
|
+
end
|
478
|
+
|
479
|
+
def left_primer
|
480
|
+
#@left_primer = self.sequence_template[left_coordinates[0],left_coordinates[1]] unless @left_primer
|
481
|
+
@left_primer = best_pair.left.sequence
|
482
|
+
@left_primer
|
483
|
+
end
|
484
|
+
|
485
|
+
def left_primer_snp(snp)
|
486
|
+
tmp_primer = String.new(left_primer)
|
487
|
+
if self.orientation == :forward
|
488
|
+
base_original = snp.original
|
489
|
+
base_snp = snp.snp
|
490
|
+
elsif self.orientation == :reverse
|
491
|
+
#puts self.inspect
|
492
|
+
base_original =Primer3Record.reverse_complement_string(snp.original )
|
493
|
+
base_snp = Primer3Record.reverse_complement_string(snp.snp)
|
494
|
+
else
|
495
|
+
raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
|
496
|
+
end
|
497
|
+
|
498
|
+
#puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
|
499
|
+
if tmp_primer[-1] == base_original
|
500
|
+
tmp_primer[-1] = base_snp
|
501
|
+
elsif tmp_primer[-1] == base_snp
|
502
|
+
tmp_primer[-1] = base_original
|
503
|
+
else
|
504
|
+
raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
|
505
|
+
end
|
506
|
+
#puts "tmp_primer: #{tmp_primer}"
|
507
|
+
return tmp_primer
|
508
|
+
end
|
509
|
+
|
510
|
+
def left_primer_with_coordinates(coordinates, other_orientation)
|
511
|
+
|
512
|
+
seq = self.sequence_template
|
513
|
+
seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
|
514
|
+
seq[coordinates[0],coordinates[1]]
|
515
|
+
end
|
516
|
+
|
517
|
+
def self.reverse_complement_string(sequenc_str)
|
518
|
+
complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
|
519
|
+
complement.reverse!
|
520
|
+
end
|
521
|
+
|
522
|
+
def right_primer_delete
|
523
|
+
@right_primer = self.sequence_template[right_coordinates[0],right_coordinates[1]] unless @right_primer
|
524
|
+
@right_primer = Primer3Record.reverse_complement_string(@right_primer)
|
525
|
+
@right_primer
|
526
|
+
end
|
527
|
+
|
528
|
+
def right_primer
|
529
|
+
return best_pair.right.sequence
|
530
|
+
end
|
531
|
+
|
532
|
+
def product_length
|
533
|
+
return best_pair.size
|
534
|
+
end
|
535
|
+
|
536
|
+
def initialize
|
537
|
+
@properties = Hash.new
|
538
|
+
@scores = Hash.new
|
539
|
+
@scores[:chromosome_specific] = 1000
|
540
|
+
@scores[:chromosome_semispecific] = 100
|
541
|
+
@scores[:chromosome_nonspecific] = 0
|
542
|
+
@scores[:exon] = 50
|
543
|
+
|
544
|
+
end
|
545
|
+
|
546
|
+
def snp
|
547
|
+
return @snp if @snp
|
548
|
+
parse_header
|
549
|
+
@snp
|
550
|
+
end
|
551
|
+
|
552
|
+
#CL3339Contig1:T509C AvocetS chromosome_specific exon 4D forward
|
553
|
+
def parse_header
|
554
|
+
#puts "Parsing header: '#{self.sequence_id}'"
|
555
|
+
arr = self.sequence_id.split(" ")
|
556
|
+
|
557
|
+
#if arr.size == 7 This validation can be useful to get the best primers regardless of the chromosome,
|
558
|
+
#But it is commented as it will require further testing.
|
559
|
+
@snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = arr
|
560
|
+
#else
|
561
|
+
# if arr.size == 6
|
562
|
+
# @snp, @line, @type, @in, @polymorphism, @orientation = arr
|
563
|
+
# @chromosome = ""
|
564
|
+
# end
|
565
|
+
#end
|
566
|
+
|
567
|
+
@type = @type.to_sym
|
568
|
+
if @in
|
569
|
+
@in = @in.to_sym == :exon
|
570
|
+
else
|
571
|
+
@exon = false
|
572
|
+
end
|
573
|
+
|
574
|
+
if @polymorphism.to_sym == :homoeologous
|
575
|
+
@homoeologous = true
|
576
|
+
else
|
577
|
+
@homoeologous = false
|
578
|
+
end
|
579
|
+
@parsed = true
|
580
|
+
@orientation = @orientation.to_sym
|
581
|
+
end
|
582
|
+
|
583
|
+
def orientation
|
584
|
+
return @orientation if @parsed
|
585
|
+
parse_header
|
586
|
+
@orientation
|
587
|
+
end
|
588
|
+
|
589
|
+
def chromosome
|
590
|
+
return @chromosome if @parsed
|
591
|
+
parse_header
|
592
|
+
@chromosome
|
593
|
+
end
|
594
|
+
|
595
|
+
def homoeologous?
|
596
|
+
return @homoeologous if @parsed
|
597
|
+
parse_header
|
598
|
+
@homoeologous
|
599
|
+
end
|
600
|
+
|
601
|
+
def type
|
602
|
+
return @type if @parsed
|
603
|
+
parse_header
|
604
|
+
@type
|
605
|
+
end
|
606
|
+
|
607
|
+
def exon?
|
608
|
+
return @exon if @parsed
|
609
|
+
parse_header
|
610
|
+
@exon
|
611
|
+
end
|
612
|
+
|
613
|
+
def line
|
614
|
+
return @line if @parsed
|
615
|
+
parse_header
|
616
|
+
@line
|
617
|
+
end
|
618
|
+
|
619
|
+
def size
|
620
|
+
@properties[:primer_pair_num_returned].to_i
|
621
|
+
end
|
622
|
+
|
623
|
+
def parse_blocks
|
624
|
+
total_blocks = size - 1
|
625
|
+
@primerPairs = Array.new
|
626
|
+
for i in 0..total_blocks
|
627
|
+
@primerPairs << PrimerPair.new(self, i)
|
628
|
+
end
|
629
|
+
|
630
|
+
end
|
631
|
+
|
632
|
+
def self.parse_file(filename, scores: nil)
|
633
|
+
File.open(filename) do | f |
|
634
|
+
record = Primer3Record.new
|
635
|
+
record.scores = scores if scores
|
636
|
+
f.each_line do | line |
|
637
|
+
line.chomp!
|
638
|
+
if line == "="
|
639
|
+
|
640
|
+
record.parse_blocks
|
641
|
+
yield record
|
642
|
+
record = Primer3Record.new
|
643
|
+
record.scores = scores if scores
|
644
|
+
else
|
645
|
+
tokens = line.split("=")
|
646
|
+
i = 0
|
647
|
+
reg = ""
|
648
|
+
#TODO: Look if there is a join function or something similar to go around this...
|
649
|
+
tokens.each do |tok|
|
650
|
+
if i > 0
|
651
|
+
if i > 1
|
652
|
+
reg << "="
|
653
|
+
end
|
654
|
+
reg << tok
|
655
|
+
end
|
656
|
+
i+=1
|
657
|
+
end
|
658
|
+
record.properties[tokens[0].downcase.to_sym] = reg
|
659
|
+
end
|
660
|
+
end
|
661
|
+
end
|
662
|
+
end
|
663
|
+
end
|
664
|
+
|
665
|
+
|
666
|
+
class Primer
|
667
|
+
attr_accessor :pair
|
668
|
+
def initialize
|
669
|
+
@values = Hash.new
|
670
|
+
end
|
671
|
+
|
672
|
+
def method_missing(m, *args, &block)
|
673
|
+
return @values[m.to_s] if @values[m.to_s] != nil
|
674
|
+
raise NoMethodError.new(), "There's no method called #{m}, available: #{@values.keys.to_s}."
|
675
|
+
end
|
676
|
+
|
677
|
+
def set_value(key, value)
|
678
|
+
@values[key] = value
|
679
|
+
end
|
680
|
+
|
681
|
+
|
682
|
+
|
683
|
+
end
|
684
|
+
|
685
|
+
class PrimerPair
|
686
|
+
include Comparable
|
687
|
+
attr_reader :record
|
688
|
+
attr_reader :left, :right
|
689
|
+
|
690
|
+
def parse_coordinates(str)
|
691
|
+
coords = str.split(',')
|
692
|
+
coords[0] = coords[0].to_i
|
693
|
+
coords[1] = coords[1].to_i
|
694
|
+
coords
|
695
|
+
end
|
696
|
+
|
697
|
+
def size
|
698
|
+
return product_size.to_i
|
699
|
+
end
|
700
|
+
|
701
|
+
def <=>(anOther)
|
702
|
+
penalty.to_f <=> anOther.penalty.to_f
|
703
|
+
end
|
704
|
+
|
705
|
+
def initialize(record, index)
|
706
|
+
raise Primer3Exception.new(), "Index #{index} is greater than the number of records" unless index < record.size
|
707
|
+
@record = record
|
708
|
+
@left = Primer.new
|
709
|
+
@right = Primer.new
|
710
|
+
@values = Hash.new
|
711
|
+
|
712
|
+
|
713
|
+
@left.set_value("added", false)
|
714
|
+
@right.set_value("added", false)
|
715
|
+
@left.pair = self
|
716
|
+
@right.pair = self
|
717
|
+
index_s = index.to_s
|
718
|
+
record.properties.each do |key, value|
|
719
|
+
tokens = key.to_s.split("_")
|
720
|
+
if tokens.size > 2 and tokens[2] == index_s
|
721
|
+
primer = nil
|
722
|
+
primer = @right if tokens[1] == "right"
|
723
|
+
primer = @left if tokens[1] == "left"
|
724
|
+
if primer != nil
|
725
|
+
primer.set_value("added", true)
|
726
|
+
if tokens.size == 3
|
727
|
+
primer.set_value("coordinates", parse_coordinates(value) )
|
728
|
+
else
|
729
|
+
|
730
|
+
to_add = value
|
731
|
+
to_add = value.to_f unless tokens[3]=="sequence"
|
732
|
+
n_key = tokens[3..6].join("_")
|
733
|
+
primer.set_value(n_key, to_add)
|
734
|
+
end
|
735
|
+
else
|
736
|
+
n_key = tokens[3..6].join("_")
|
737
|
+
@values[n_key] = value
|
738
|
+
end
|
739
|
+
|
740
|
+
end
|
741
|
+
end
|
742
|
+
|
743
|
+
raise Primer3Exception.new(), "The pair is not complete (l:#{left.added}, r:#{right.added})" if @left.added == false or @right.added == false
|
744
|
+
|
745
|
+
end
|
746
|
+
|
747
|
+
def method_missing(m, *args, &block)
|
748
|
+
|
749
|
+
return @values[m.to_s] if @values[m.to_s]
|
750
|
+
raise NoMethodError.new(), "There's no method called #{m}. Available methods: #{@values.keys.to_s}"
|
751
|
+
end
|
752
|
+
end
|
753
|
+
|
754
|
+
class KASPContainer
|
755
|
+
|
756
|
+
attr_accessor :line_1, :line_2
|
757
|
+
attr_accessor :snp_hash
|
758
|
+
attr_accessor :scores
|
759
|
+
|
760
|
+
def add_snp_file(filename)
|
761
|
+
@snp_hash=Hash.new unless @snp_hash
|
762
|
+
SNP.parse_file(filename) do |snp|
|
763
|
+
@snp_hash[snp.to_s] = snp
|
764
|
+
snp.line_1 = @line_1
|
765
|
+
snp.line_2 = @line_2
|
766
|
+
end
|
767
|
+
end
|
768
|
+
|
769
|
+
def add_snp(snp_in)
|
770
|
+
#TODO: Here we need to also copy the errors that will be printed.
|
771
|
+
@snp_hash=Hash.new unless @snp_hash
|
772
|
+
snp = SNP.new
|
773
|
+
snp.gene = snp_in.gene
|
774
|
+
snp.original = snp_in.original
|
775
|
+
snp.primer3_errors = Set.new snp_in.errors
|
776
|
+
snp.position = snp_in.position
|
777
|
+
snp.snp = snp_in.snp
|
778
|
+
snp.repetitive = snp_in.repetitive
|
779
|
+
#puts snp_in.inspect
|
780
|
+
snp.hit_count = snp_in.hit_count
|
781
|
+
snp.snp_type = snp_in.snp_type
|
782
|
+
snp.line_1 = @line_1
|
783
|
+
snp.line_2 = @line_2
|
784
|
+
snp.snp_from = snp_in
|
785
|
+
snp.regions = snp_in.exon_list.values.collect { |x| x.collect {|y| y.target_region.to_s }}
|
786
|
+
@snp_hash[snp.to_s] = snp
|
787
|
+
snp
|
788
|
+
end
|
789
|
+
|
790
|
+
def add_primers_file(filename)
|
791
|
+
#primer3record.scores = @scores if @scores
|
792
|
+
Primer3Record.parse_file(filename, scores: @scores) do | primer3record |
|
793
|
+
current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
|
794
|
+
current_snp.add_record(primer3record)
|
795
|
+
end
|
796
|
+
end
|
797
|
+
|
798
|
+
def print_primers
|
799
|
+
str = ""
|
800
|
+
snp_hash.each do |k, snp|
|
801
|
+
str << snp.print_primers << "\n"
|
802
|
+
end
|
803
|
+
return str
|
804
|
+
end
|
805
|
+
|
806
|
+
def print_primers_with_tails(tail_a: "GAAGGTCGGAGTCAACGGATT", tail_b: "GAAGGTGACCAAGTTCATGCT")
|
807
|
+
str = ""
|
808
|
+
snp_hash.each do |k, snp|
|
809
|
+
if snp.found_primers?
|
810
|
+
str << snp.gene << snp.original << "_1st\t" << tail_a << snp.first_primer << "\n"
|
811
|
+
str << snp.gene << snp.snp << "_2nd\t" << tail_b << snp.second_primer << "\n"
|
812
|
+
str << snp.gene << "_common\t" << snp.common_primer << "\n"
|
813
|
+
end
|
814
|
+
end
|
815
|
+
return str
|
816
|
+
end
|
817
|
+
|
818
|
+
end
|
819
|
+
end
|
820
|
+
|