bio-polymarker 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +24 -0
- data/Gemfile +23 -0
- data/README.md +205 -0
- data/Rakefile +61 -0
- data/SECURITY.md +16 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +128 -0
- data/bin/blast_triads.rb +166 -0
- data/bin/blast_triads_promoters.rb +192 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +69 -0
- data/bin/filter_exonerate_by_identity.rb +38 -0
- data/bin/find_best_blat_hit.rb +33 -0
- data/bin/find_best_exonerate.rb +17 -0
- data/bin/get_longest_hsp_blastx_triads.rb +66 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +183 -0
- data/bin/mafft_triads.rb +120 -0
- data/bin/mafft_triads_promoters.rb +403 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/marker_to_vcf.rb +241 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/mask_triads.rb +169 -0
- data/bin/polymarker.rb +410 -0
- data/bin/polymarker_capillary.rb +443 -0
- data/bin/polymarker_deletions.rb +350 -0
- data/bin/snp_position_to_polymarker.rb +101 -0
- data/bin/snps_between_bams.rb +107 -0
- data/bin/tag_stats.rb +75 -0
- data/bin/vcfLineToTable.rb +56 -0
- data/bin/vcfToPolyMarker.rb +82 -0
- data/bio-polymarker.gemspec +227 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +465 -0
- data/lib/bio/BIOExtensions.rb +153 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/Mask.rb +116 -0
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
- data/lib/bio/PolyploidTools/SNP.rb +804 -0
- data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
- data/lib/bio/db/blast.rb +114 -0
- data/lib/bio/db/exonerate.rb +333 -0
- data/lib/bio/db/primer3.rb +820 -0
- data/lib/bio-polymarker.rb +28 -0
- data/test/data/7B_amplicon_test.fa +12 -0
- data/test/data/7B_amplicon_test.fa.fai +1 -0
- data/test/data/7B_amplicon_test_reference.fa +110 -0
- data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
- data/test/data/7B_marker_test.txt +1 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_blast.tab +4 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_contigs.fa.fai +4 -0
- data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
- data/test/data/BS00068396_51_contigs.fa.nin +0 -0
- data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
- data/test/data/BS00068396_51_contigs.nhr +0 -0
- data/test/data/BS00068396_51_contigs.nin +0 -0
- data/test/data/BS00068396_51_contigs.nsq +0 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_for_polymarker.txt +1 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/PST130_7067.csv +1 -0
- data/test/data/PST130_7067.fa +2 -0
- data/test/data/PST130_7067.fa.fai +1 -0
- data/test/data/PST130_7067.fa.ndb +0 -0
- data/test/data/PST130_7067.fa.nhr +0 -0
- data/test/data/PST130_7067.fa.nin +0 -0
- data/test/data/PST130_7067.fa.not +0 -0
- data/test/data/PST130_7067.fa.nsq +0 -0
- data/test/data/PST130_7067.fa.ntf +0 -0
- data/test/data/PST130_7067.fa.nto +0 -0
- data/test/data/PST130_reverse_primer.csv +1 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/S22380157.vcf +67 -0
- data/test/data/S58861868/LIB1716.bam +0 -0
- data/test/data/S58861868/LIB1716.sam +651 -0
- data/test/data/S58861868/LIB1719.bam +0 -0
- data/test/data/S58861868/LIB1719.sam +805 -0
- data/test/data/S58861868/LIB1721.bam +0 -0
- data/test/data/S58861868/LIB1721.sam +1790 -0
- data/test/data/S58861868/LIB1722.bam +0 -0
- data/test/data/S58861868/LIB1722.sam +1271 -0
- data/test/data/S58861868/S58861868.fa +16 -0
- data/test/data/S58861868/S58861868.fa.fai +1 -0
- data/test/data/S58861868/S58861868.vcf +76 -0
- data/test/data/S58861868/header.txt +9 -0
- data/test/data/S58861868/merged.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam.bai +0 -0
- data/test/data/Test3Aspecific.csv +2 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/bfr_out_test.csv +5 -0
- data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
- data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
- data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
- data/test/data/headerMergeed.txt +9 -0
- data/test/data/headerS2238015 +1 -0
- data/test/data/mergedLibs.bam +0 -0
- data/test/data/mergedLibsReheader.bam +0 -0
- data/test/data/mergedLibsSorted.bam +0 -0
- data/test/data/mergedLibsSorted.bam.bai +0 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/primer_3_input_header_test +5 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/some_tests/some_tests.csv +201 -0
- data/test/data/test_from_mutant.csv +3 -0
- data/test/data/test_iselect.csv +196 -0
- data/test/data/test_iselect_reference.fa +1868 -0
- data/test/data/test_iselect_reference.fa.fai +934 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +135 -0
- data/test/test_blast.rb +47 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +48 -0
- data/test/test_integration.rb +76 -0
- data/test/test_snp_parsing.rb +121 -0
- data/test/test_wrong_selection.sh +5 -0
- metadata +356 -0
@@ -0,0 +1,820 @@
|
|
1
|
+
require 'pp'
|
2
|
+
module Bio::DB::Primer3
|
3
|
+
class Primer3Exception < RuntimeError
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.read_primer_preferences(file, defaults)
|
7
|
+
|
8
|
+
File.open(file) do |f|
|
9
|
+
f.each_line do | line |
|
10
|
+
line.chomp!
|
11
|
+
arr = line.split("=")
|
12
|
+
defaults[arr[0].downcase.to_sym] = arr[1];
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
return defaults
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.prepare_input_file(file, opts2={})
|
20
|
+
opts = {
|
21
|
+
:primer_product_size_range => "50-150" ,
|
22
|
+
:primer_max_size => 25 ,
|
23
|
+
:primer_lib_ambiguity_codes_consensus => 1,
|
24
|
+
:primer_liberal_base => 1,
|
25
|
+
:primer_num_return => 5,
|
26
|
+
:primer_explain_flag => 1,
|
27
|
+
:primer_thermodynamic_parameters_path => File.expand_path(File.dirname(__FILE__) + '../../../../conf/primer3_config/') + '/'
|
28
|
+
}.merge(opts2)
|
29
|
+
|
30
|
+
opts.each do |key,value|
|
31
|
+
file.puts "#{key.to_s.upcase}=#{value}\n"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.run(opts={})
|
36
|
+
puts "Primer3.run running..."
|
37
|
+
timeout = 600
|
38
|
+
f_in=opts[:in]
|
39
|
+
f_out=opts[:out]
|
40
|
+
timeout = opts[:timeout] if opts[:timeout]
|
41
|
+
opts.delete(:in)
|
42
|
+
opts.delete(:out)
|
43
|
+
primer_3_in = File.read(f_in)
|
44
|
+
status = systemu "primer3_core", 0=>primer_3_in, 1=>stdout='', 2=>stderr='' do |cid|
|
45
|
+
sleep timeout
|
46
|
+
Process.kill 9, cid
|
47
|
+
end
|
48
|
+
# $stderr.puts cmdline
|
49
|
+
if status.exitstatus == 0
|
50
|
+
File.open(f_out, 'w') { |f| f.write(stdout) }
|
51
|
+
else
|
52
|
+
raise Primer3Exception.new(), "Error running primer3. Command line was 'primer3_core'\nPrimer3 STDERR was:\n#{stderr}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class SNP
|
57
|
+
|
58
|
+
attr_accessor :gene, :original, :position, :snp, :chromosome, :line_1, :line_2
|
59
|
+
attr_accessor :primer3_line_1, :primer3_line_2, :template_length
|
60
|
+
attr_accessor :primers_line_1, :primers_line_2
|
61
|
+
attr_accessor :used_contigs
|
62
|
+
attr_accessor :snp_from
|
63
|
+
attr_accessor :regions
|
64
|
+
attr_accessor :primer3_errors
|
65
|
+
attr_accessor :repetitive
|
66
|
+
attr_accessor :hit_count
|
67
|
+
attr_accessor :snp_type
|
68
|
+
|
69
|
+
def line_1_name
|
70
|
+
"#{gene}:#{position}#{original}>#{snp} #{line_1}}"
|
71
|
+
end
|
72
|
+
|
73
|
+
def initialize
|
74
|
+
@primers_line_1 = SortedSet.new
|
75
|
+
@primers_line_2 = SortedSet.new
|
76
|
+
@regions = SortedSet.new
|
77
|
+
@primer3_errors = Set.new
|
78
|
+
end
|
79
|
+
|
80
|
+
def line_2_name
|
81
|
+
"#{gene}:#{position}#{original}>#{snp} #{line_2}}"
|
82
|
+
end
|
83
|
+
|
84
|
+
def to_s
|
85
|
+
"#{gene}:#{original}#{position}#{snp}:#{snp_from.chromosome}"
|
86
|
+
end
|
87
|
+
|
88
|
+
def find_left_primer_temp(primer)
|
89
|
+
primers_line_1.each do |pr|
|
90
|
+
return pr.find_left_tm(primer) if pr.find_left_tm(primer)
|
91
|
+
end
|
92
|
+
primers_line_2.each do |pr|
|
93
|
+
return pr.find_left_tm(primer) if pr.find_left_tm(primer)
|
94
|
+
end
|
95
|
+
return "NA"
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
def find_primer_pair_first
|
100
|
+
primers_line_1.each do |pr|
|
101
|
+
primer = pr.left_primer_snp(self)
|
102
|
+
return pr if find_left_primer_temp(primer) != "NA"
|
103
|
+
end
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
|
107
|
+
def find_primer_pair_second
|
108
|
+
primers_line_2.each do |pr|
|
109
|
+
primer = pr.left_primer_snp(self)
|
110
|
+
return pr if find_left_primer_temp(primer) != "NA"
|
111
|
+
end
|
112
|
+
nil
|
113
|
+
end
|
114
|
+
|
115
|
+
def values
|
116
|
+
return @values if @values
|
117
|
+
left_start = 0
|
118
|
+
left_end = 0
|
119
|
+
right_start = 0
|
120
|
+
right_end = 0
|
121
|
+
total_columns_before_messages=17
|
122
|
+
#puts "Values in primer3"
|
123
|
+
#puts snp_from.inspect
|
124
|
+
@values = Array.new
|
125
|
+
#@values << "#{gene},,#{template_length},"
|
126
|
+
@values << gene
|
127
|
+
@values << "#{original}#{position}#{snp}"
|
128
|
+
@values << template_length
|
129
|
+
@values << snp_from.chromosome
|
130
|
+
@values << regions.size
|
131
|
+
@values << regions.join("|")
|
132
|
+
@values << snp_type
|
133
|
+
if primer3_line_1 and primer3_line_2
|
134
|
+
#Block that searches both if both pairs have a TM
|
135
|
+
primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
|
136
|
+
primer_1_tm = find_left_primer_temp(primer_1)
|
137
|
+
|
138
|
+
primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
|
139
|
+
primer_2_tm = find_left_primer_temp(primer_2)
|
140
|
+
|
141
|
+
if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
|
142
|
+
@values << primer3_line_1.left_primer
|
143
|
+
@values << primer_2
|
144
|
+
@values << primer3_line_1.right_primer
|
145
|
+
@values << primer3_line_1.type.to_s
|
146
|
+
@values << primer3_line_1.orientation.to_s
|
147
|
+
@values << primer3_line_1.best_pair.left.tm
|
148
|
+
@values << primer_2_tm
|
149
|
+
@values << primer3_line_1.best_pair.right.tm
|
150
|
+
@values << "first"
|
151
|
+
@values << primer3_line_1.best_pair.product_size
|
152
|
+
elsif primer_1_tm != "NA"
|
153
|
+
@values << primer_1
|
154
|
+
@values << primer3_line_2.left_primer
|
155
|
+
@values << primer3_line_2.right_primer
|
156
|
+
@values << primer3_line_2.type.to_s
|
157
|
+
@values << primer3_line_2.orientation.to_s
|
158
|
+
@values << primer_1_tm
|
159
|
+
@values << primer3_line_2.best_pair.left.tm
|
160
|
+
@values << primer3_line_2.best_pair.right.tm
|
161
|
+
@values << "second"
|
162
|
+
@values << primer3_line_2.best_pair.product_size
|
163
|
+
else
|
164
|
+
|
165
|
+
first_candidate = find_primer_pair_first
|
166
|
+
second_candidate = find_primer_pair_second
|
167
|
+
|
168
|
+
if first_candidate
|
169
|
+
primer_2 = primer3_line_2.left_primer_with_coordinates(first_candidate.left_coordinates, first_candidate.orientation)
|
170
|
+
primer_2_tm = find_left_primer_temp(primer_2)
|
171
|
+
end
|
172
|
+
if second_candidate
|
173
|
+
#puts "input to search #{first_candidate.left_coordinates}"
|
174
|
+
primer_1 = primer3_line_1.left_primer_with_coordinates(second_candidate.left_coordinates, second_candidate.orientation)
|
175
|
+
primer_1_tm = find_left_primer_temp(primer_1)
|
176
|
+
#puts "In the other funky if #{primer_2}"
|
177
|
+
end
|
178
|
+
|
179
|
+
if first_candidate and second_candidate and first_candidate < second_candidate
|
180
|
+
#puts "A"
|
181
|
+
@values << first_candidate.left_primer
|
182
|
+
@values << primer_2
|
183
|
+
@values << first_candidate.right_primer
|
184
|
+
@values << first_candidate.type.to_s
|
185
|
+
@values << first_candidate.orientation.to_s
|
186
|
+
@values << first_candidate.best_pair.left.tm
|
187
|
+
@values << primer_2_tm
|
188
|
+
@values << first_candidate.best_pair.right.tm
|
189
|
+
@values << "first-"
|
190
|
+
@values << first_candidate.best_pair.product_size
|
191
|
+
elsif second_candidate
|
192
|
+
#puts "B"
|
193
|
+
@values << primer_1
|
194
|
+
@values << second_candidate.left_primer
|
195
|
+
@values << second_candidate.right_primer
|
196
|
+
@values << second_candidate.type.to_s
|
197
|
+
@values << second_candidate.orientation.to_s
|
198
|
+
@values << primer_1_tm
|
199
|
+
@values << second_candidate.best_pair.left.tm
|
200
|
+
@values << second_candidate.best_pair.right.tm
|
201
|
+
@values << "second-"
|
202
|
+
@values << second_candidate.best_pair.product_size
|
203
|
+
elsif first_candidate
|
204
|
+
#puts "C"
|
205
|
+
@values << first_candidate.left_primer
|
206
|
+
@values << primer_2
|
207
|
+
@values << first_candidate.right_primer
|
208
|
+
@values << first_candidate.type.to_s
|
209
|
+
@values << first_candidate.orientation.to_s
|
210
|
+
@values << primer_2_tm
|
211
|
+
@values << first_candidate.best_pair.left.tm
|
212
|
+
@values << first_candidate.best_pair.right.tm
|
213
|
+
@values << "first/"
|
214
|
+
@values << first_candidate.best_pair.product_size
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
elsif primer3_line_1
|
219
|
+
@values << primer3_line_1.left_primer
|
220
|
+
@values << primer3_line_1.left_primer_snp(self)
|
221
|
+
@values << primer3_line_1.right_primer
|
222
|
+
@values << primer3_line_1.type.to_s
|
223
|
+
@values << primer3_line_1.orientation.to_s
|
224
|
+
@values << primer3_line_1.best_pair.left.tm
|
225
|
+
@values << "NA"
|
226
|
+
@values << primer3_line_1.best_pair.right.tm
|
227
|
+
|
228
|
+
@values << "first+"
|
229
|
+
@values << primer3_line_1.best_pair.product_size
|
230
|
+
elsif primer3_line_2
|
231
|
+
@values << primer3_line_2.left_primer_snp(self)
|
232
|
+
@values << primer3_line_2.left_primer
|
233
|
+
@values << primer3_line_2.right_primer
|
234
|
+
@values << primer3_line_2.type.to_s
|
235
|
+
@values << primer3_line_2.orientation.to_s
|
236
|
+
@values << "NA"
|
237
|
+
@values << primer3_line_2.best_pair.left.tm
|
238
|
+
@values << primer3_line_2.best_pair.right.tm
|
239
|
+
@values << "second+"
|
240
|
+
@values << primer3_line_2.best_pair.product_size
|
241
|
+
|
242
|
+
end
|
243
|
+
if @values.size < total_columns_before_messages
|
244
|
+
@values[total_columns_before_messages] = primer3_errors.to_a.join("|")
|
245
|
+
else
|
246
|
+
@values << nil
|
247
|
+
end
|
248
|
+
return @values
|
249
|
+
end
|
250
|
+
|
251
|
+
def print_primers
|
252
|
+
to_print = values.dup
|
253
|
+
to_print << @repetitive
|
254
|
+
to_print << @hit_count
|
255
|
+
to_print.join(",")
|
256
|
+
end
|
257
|
+
|
258
|
+
def found_primers?
|
259
|
+
return self.values[7] && self.values[7] != nil
|
260
|
+
end
|
261
|
+
|
262
|
+
def first_primer
|
263
|
+
return self.values[7] if self.values[7] && self.values[7] != nil
|
264
|
+
return ""
|
265
|
+
end
|
266
|
+
|
267
|
+
def second_primer
|
268
|
+
return self.values[8] if self.values[8] && self.values[8] != nil
|
269
|
+
return ""
|
270
|
+
end
|
271
|
+
|
272
|
+
def common_primer
|
273
|
+
return self.values[9] if self.values[9] && self.values[9] != nil
|
274
|
+
return ""
|
275
|
+
end
|
276
|
+
|
277
|
+
def product_size
|
278
|
+
return self.values[16].to_i if self.values[16]&& self.values[16] != nil
|
279
|
+
return 0
|
280
|
+
end
|
281
|
+
|
282
|
+
def orientation
|
283
|
+
return self.values[11] if self.values[11]&& self.values[11] != nil
|
284
|
+
return 'unknown'
|
285
|
+
end
|
286
|
+
|
287
|
+
|
288
|
+
def first_product
|
289
|
+
left = first_primer
|
290
|
+
right = common_primer
|
291
|
+
nlen = product_size - left.size - right.size
|
292
|
+
product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
|
293
|
+
#puts "orientation: #{orientation}"
|
294
|
+
|
295
|
+
product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
|
296
|
+
|
297
|
+
product
|
298
|
+
end
|
299
|
+
|
300
|
+
def second_product
|
301
|
+
left = second_primer
|
302
|
+
right = common_primer
|
303
|
+
nlen = product_size - left.size - right.size
|
304
|
+
product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
|
305
|
+
product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
|
306
|
+
|
307
|
+
|
308
|
+
product
|
309
|
+
end
|
310
|
+
|
311
|
+
|
312
|
+
def realigned_primers_fasta
|
313
|
+
ret_str = ""
|
314
|
+
realigned_primers.each_pair do |name, seq|
|
315
|
+
ret_str << ">#{self.to_s}-#{name}\n#{seq}\n"
|
316
|
+
end
|
317
|
+
ret_str
|
318
|
+
end
|
319
|
+
|
320
|
+
|
321
|
+
def realigned_primers
|
322
|
+
|
323
|
+
return @realigned_primers if @realigned_primers
|
324
|
+
sequences_to_align = Hash.new
|
325
|
+
sequences_to_align["first_product"] = first_product
|
326
|
+
sequences_to_align["second_product"] = second_product
|
327
|
+
sequences_to_align.merge!(snp_from.surrounding_exon_sequences)
|
328
|
+
if sequences_to_align.size == 1
|
329
|
+
@realigned_primers = sequences_to_align
|
330
|
+
return @realigned_primers
|
331
|
+
end
|
332
|
+
options = ['--maxiterate', '1000', '--localpair', '--quiet']
|
333
|
+
mafft = Bio::MAFFT.new( "mafft" , options)
|
334
|
+
#puts "Before MAFT:#{sequences_to_align.inspect}"
|
335
|
+
report = mafft.query_align(sequences_to_align)
|
336
|
+
@realigned_primers = report.alignment
|
337
|
+
#puts "MAFFT: #{report.alignment.inspect}"
|
338
|
+
@realigned_primers
|
339
|
+
end
|
340
|
+
|
341
|
+
def self.parse(reg_str)
|
342
|
+
reg_str.chomp!
|
343
|
+
snp = SNP.new
|
344
|
+
snp.gene, snp.original, snp.position, snp.snp = reg_str.split(",")
|
345
|
+
snp.position = snp.position.to_i
|
346
|
+
snp.original.upcase!
|
347
|
+
snp.snp.upcase!
|
348
|
+
snp
|
349
|
+
end
|
350
|
+
|
351
|
+
def self.parse_file(filename)
|
352
|
+
File.open(filename) do | f |
|
353
|
+
f.each_line do | line |
|
354
|
+
snp = SNP.parse(line)
|
355
|
+
if snp.position > 0
|
356
|
+
yield snp
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
|
363
|
+
def add_record(primer3record)
|
364
|
+
@primer3_errors = Set.new unless @primer3_errors
|
365
|
+
@template_length = primer3record.sequence_template.size
|
366
|
+
if primer3record.primer_error != nil
|
367
|
+
primer3_errors << primer3record.primer_error
|
368
|
+
return
|
369
|
+
end
|
370
|
+
case
|
371
|
+
|
372
|
+
when primer3record.line == @line_1
|
373
|
+
|
374
|
+
@line_1_template = primer3record.sequence_template
|
375
|
+
|
376
|
+
when primer3record.line == @line_2
|
377
|
+
@line_2_template = primer3record.sequence_template
|
378
|
+
else
|
379
|
+
raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
|
380
|
+
end
|
381
|
+
|
382
|
+
if primer3record.primer_left_num_returned.to_i > 0
|
383
|
+
case
|
384
|
+
when primer3record.line == @line_1
|
385
|
+
primers_line_1 << primer3record
|
386
|
+
#puts primer3record.inspect
|
387
|
+
@primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
|
388
|
+
when primer3record.line == @line_2
|
389
|
+
primers_line_2 << primer3record
|
390
|
+
@primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
|
391
|
+
else
|
392
|
+
raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
|
393
|
+
end
|
394
|
+
else
|
395
|
+
primer3_errors << "#{primer3record.line}(#{primer3record.orientation}):#{primer3record.primer_left_explain.gsub!(',',';')}"
|
396
|
+
primer3_errors << "common(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_right_explain.gsub!(',',';')}"
|
397
|
+
primer3_errors << "pair(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_pair_explain.gsub!(',',';')}"
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
class Primer3Record
|
403
|
+
include Comparable
|
404
|
+
attr_accessor :properties, :polymorphism
|
405
|
+
attr_accessor :scores
|
406
|
+
|
407
|
+
|
408
|
+
def best_pair
|
409
|
+
return @best_pair if @best_pair
|
410
|
+
@best_pair = nil
|
411
|
+
@primerPairs.each do | primer |
|
412
|
+
@best_pair = primer if @best_pair.nil?
|
413
|
+
@best_pair = primer if primer.size < @best_pair.size
|
414
|
+
end
|
415
|
+
#@best_pair = @primerPairs.min
|
416
|
+
@best_pair
|
417
|
+
end
|
418
|
+
|
419
|
+
def primer_error
|
420
|
+
return @properties[:primer_error] if @properties[:primer_error]
|
421
|
+
return nil
|
422
|
+
end
|
423
|
+
|
424
|
+
def method_missing(method_name, *args)
|
425
|
+
return @properties[method_name] if @properties[method_name]
|
426
|
+
$stderr.puts "Missing #{method_name}"
|
427
|
+
$stderr.puts @properties.inspect
|
428
|
+
return "" #if a property is missing, return blank.
|
429
|
+
raise NoMethodError.new()
|
430
|
+
end
|
431
|
+
|
432
|
+
def find_left_tm(primer)
|
433
|
+
last = size - 1
|
434
|
+
(0..last).each do | i |
|
435
|
+
seq_prop = "primer_left_#{i}_sequence".to_sym
|
436
|
+
# $stderr.puts seq_prop
|
437
|
+
temp_property = "primer_left_#{i}_tm".to_sym
|
438
|
+
# $stderr.puts "comparing #{@properties[seq_prop] } == #{primer}"
|
439
|
+
return @properties[temp_property] if @properties[seq_prop] == primer
|
440
|
+
|
441
|
+
end
|
442
|
+
return nil
|
443
|
+
end
|
444
|
+
|
445
|
+
def score
|
446
|
+
ret = 0
|
447
|
+
ret += @scores[type]
|
448
|
+
ret += @scores[:exon] if exon?
|
449
|
+
ret -= product_length
|
450
|
+
ret
|
451
|
+
end
|
452
|
+
|
453
|
+
def <=>(anOther)
|
454
|
+
return anOther.score <=> score
|
455
|
+
end
|
456
|
+
|
457
|
+
def parse_coordinates(str)
|
458
|
+
coords = str.split(',')
|
459
|
+
coords[0] = coords[0].to_i
|
460
|
+
coords[1] = coords[1].to_i
|
461
|
+
coords
|
462
|
+
end
|
463
|
+
|
464
|
+
|
465
|
+
def left_coordinates
|
466
|
+
#@left_coordinates = parse_coordinates(self.primer_left_0) unless @left_coordinates
|
467
|
+
@left_coordinates = best_pair.left.coordinates
|
468
|
+
@left_coordinates
|
469
|
+
end
|
470
|
+
|
471
|
+
def right_coordinates
|
472
|
+
unless @right_coordinates
|
473
|
+
@right_coordinates = best_pair.right.coordinates
|
474
|
+
@right_coordinates[0] = @right_coordinates[0] - @right_coordinates[1] + 1
|
475
|
+
end
|
476
|
+
@right_coordinates
|
477
|
+
end
|
478
|
+
|
479
|
+
def left_primer
|
480
|
+
#@left_primer = self.sequence_template[left_coordinates[0],left_coordinates[1]] unless @left_primer
|
481
|
+
@left_primer = best_pair.left.sequence
|
482
|
+
@left_primer
|
483
|
+
end
|
484
|
+
|
485
|
+
def left_primer_snp(snp)
|
486
|
+
tmp_primer = String.new(left_primer)
|
487
|
+
if self.orientation == :forward
|
488
|
+
base_original = snp.original
|
489
|
+
base_snp = snp.snp
|
490
|
+
elsif self.orientation == :reverse
|
491
|
+
#puts self.inspect
|
492
|
+
base_original =Primer3Record.reverse_complement_string(snp.original )
|
493
|
+
base_snp = Primer3Record.reverse_complement_string(snp.snp)
|
494
|
+
else
|
495
|
+
raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
|
496
|
+
end
|
497
|
+
|
498
|
+
#puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
|
499
|
+
if tmp_primer[-1] == base_original
|
500
|
+
tmp_primer[-1] = base_snp
|
501
|
+
elsif tmp_primer[-1] == base_snp
|
502
|
+
tmp_primer[-1] = base_original
|
503
|
+
else
|
504
|
+
raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
|
505
|
+
end
|
506
|
+
#puts "tmp_primer: #{tmp_primer}"
|
507
|
+
return tmp_primer
|
508
|
+
end
|
509
|
+
|
510
|
+
def left_primer_with_coordinates(coordinates, other_orientation)
|
511
|
+
|
512
|
+
seq = self.sequence_template
|
513
|
+
seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
|
514
|
+
seq[coordinates[0],coordinates[1]]
|
515
|
+
end
|
516
|
+
|
517
|
+
def self.reverse_complement_string(sequenc_str)
|
518
|
+
complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
|
519
|
+
complement.reverse!
|
520
|
+
end
|
521
|
+
|
522
|
+
def right_primer_delete
|
523
|
+
@right_primer = self.sequence_template[right_coordinates[0],right_coordinates[1]] unless @right_primer
|
524
|
+
@right_primer = Primer3Record.reverse_complement_string(@right_primer)
|
525
|
+
@right_primer
|
526
|
+
end
|
527
|
+
|
528
|
+
def right_primer
|
529
|
+
return best_pair.right.sequence
|
530
|
+
end
|
531
|
+
|
532
|
+
def product_length
|
533
|
+
return best_pair.size
|
534
|
+
end
|
535
|
+
|
536
|
+
def initialize
|
537
|
+
@properties = Hash.new
|
538
|
+
@scores = Hash.new
|
539
|
+
@scores[:chromosome_specific] = 1000
|
540
|
+
@scores[:chromosome_semispecific] = 100
|
541
|
+
@scores[:chromosome_nonspecific] = 0
|
542
|
+
@scores[:exon] = 50
|
543
|
+
|
544
|
+
end
|
545
|
+
|
546
|
+
def snp
|
547
|
+
return @snp if @snp
|
548
|
+
parse_header
|
549
|
+
@snp
|
550
|
+
end
|
551
|
+
|
552
|
+
#CL3339Contig1:T509C AvocetS chromosome_specific exon 4D forward
|
553
|
+
def parse_header
|
554
|
+
#puts "Parsing header: '#{self.sequence_id}'"
|
555
|
+
arr = self.sequence_id.split(" ")
|
556
|
+
|
557
|
+
#if arr.size == 7 This validation can be useful to get the best primers regardless of the chromosome,
|
558
|
+
#But it is commented as it will require further testing.
|
559
|
+
@snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = arr
|
560
|
+
#else
|
561
|
+
# if arr.size == 6
|
562
|
+
# @snp, @line, @type, @in, @polymorphism, @orientation = arr
|
563
|
+
# @chromosome = ""
|
564
|
+
# end
|
565
|
+
#end
|
566
|
+
|
567
|
+
@type = @type.to_sym
|
568
|
+
if @in
|
569
|
+
@in = @in.to_sym == :exon
|
570
|
+
else
|
571
|
+
@exon = false
|
572
|
+
end
|
573
|
+
|
574
|
+
if @polymorphism.to_sym == :homoeologous
|
575
|
+
@homoeologous = true
|
576
|
+
else
|
577
|
+
@homoeologous = false
|
578
|
+
end
|
579
|
+
@parsed = true
|
580
|
+
@orientation = @orientation.to_sym
|
581
|
+
end
|
582
|
+
|
583
|
+
def orientation
|
584
|
+
return @orientation if @parsed
|
585
|
+
parse_header
|
586
|
+
@orientation
|
587
|
+
end
|
588
|
+
|
589
|
+
def chromosome
|
590
|
+
return @chromosome if @parsed
|
591
|
+
parse_header
|
592
|
+
@chromosome
|
593
|
+
end
|
594
|
+
|
595
|
+
def homoeologous?
|
596
|
+
return @homoeologous if @parsed
|
597
|
+
parse_header
|
598
|
+
@homoeologous
|
599
|
+
end
|
600
|
+
|
601
|
+
def type
|
602
|
+
return @type if @parsed
|
603
|
+
parse_header
|
604
|
+
@type
|
605
|
+
end
|
606
|
+
|
607
|
+
def exon?
|
608
|
+
return @exon if @parsed
|
609
|
+
parse_header
|
610
|
+
@exon
|
611
|
+
end
|
612
|
+
|
613
|
+
def line
|
614
|
+
return @line if @parsed
|
615
|
+
parse_header
|
616
|
+
@line
|
617
|
+
end
|
618
|
+
|
619
|
+
def size
|
620
|
+
@properties[:primer_pair_num_returned].to_i
|
621
|
+
end
|
622
|
+
|
623
|
+
def parse_blocks
|
624
|
+
total_blocks = size - 1
|
625
|
+
@primerPairs = Array.new
|
626
|
+
for i in 0..total_blocks
|
627
|
+
@primerPairs << PrimerPair.new(self, i)
|
628
|
+
end
|
629
|
+
|
630
|
+
end
|
631
|
+
|
632
|
+
def self.parse_file(filename, scores: nil)
|
633
|
+
File.open(filename) do | f |
|
634
|
+
record = Primer3Record.new
|
635
|
+
record.scores = scores if scores
|
636
|
+
f.each_line do | line |
|
637
|
+
line.chomp!
|
638
|
+
if line == "="
|
639
|
+
|
640
|
+
record.parse_blocks
|
641
|
+
yield record
|
642
|
+
record = Primer3Record.new
|
643
|
+
record.scores = scores if scores
|
644
|
+
else
|
645
|
+
tokens = line.split("=")
|
646
|
+
i = 0
|
647
|
+
reg = ""
|
648
|
+
#TODO: Look if there is a join function or something similar to go around this...
|
649
|
+
tokens.each do |tok|
|
650
|
+
if i > 0
|
651
|
+
if i > 1
|
652
|
+
reg << "="
|
653
|
+
end
|
654
|
+
reg << tok
|
655
|
+
end
|
656
|
+
i+=1
|
657
|
+
end
|
658
|
+
record.properties[tokens[0].downcase.to_sym] = reg
|
659
|
+
end
|
660
|
+
end
|
661
|
+
end
|
662
|
+
end
|
663
|
+
end
|
664
|
+
|
665
|
+
|
666
|
+
class Primer
|
667
|
+
attr_accessor :pair
|
668
|
+
def initialize
|
669
|
+
@values = Hash.new
|
670
|
+
end
|
671
|
+
|
672
|
+
def method_missing(m, *args, &block)
|
673
|
+
return @values[m.to_s] if @values[m.to_s] != nil
|
674
|
+
raise NoMethodError.new(), "There's no method called #{m}, available: #{@values.keys.to_s}."
|
675
|
+
end
|
676
|
+
|
677
|
+
def set_value(key, value)
|
678
|
+
@values[key] = value
|
679
|
+
end
|
680
|
+
|
681
|
+
|
682
|
+
|
683
|
+
end
|
684
|
+
|
685
|
+
class PrimerPair
|
686
|
+
include Comparable
|
687
|
+
attr_reader :record
|
688
|
+
attr_reader :left, :right
|
689
|
+
|
690
|
+
def parse_coordinates(str)
|
691
|
+
coords = str.split(',')
|
692
|
+
coords[0] = coords[0].to_i
|
693
|
+
coords[1] = coords[1].to_i
|
694
|
+
coords
|
695
|
+
end
|
696
|
+
|
697
|
+
def size
|
698
|
+
return product_size.to_i
|
699
|
+
end
|
700
|
+
|
701
|
+
def <=>(anOther)
|
702
|
+
penalty.to_f <=> anOther.penalty.to_f
|
703
|
+
end
|
704
|
+
|
705
|
+
def initialize(record, index)
|
706
|
+
raise Primer3Exception.new(), "Index #{index} is greater than the number of records" unless index < record.size
|
707
|
+
@record = record
|
708
|
+
@left = Primer.new
|
709
|
+
@right = Primer.new
|
710
|
+
@values = Hash.new
|
711
|
+
|
712
|
+
|
713
|
+
@left.set_value("added", false)
|
714
|
+
@right.set_value("added", false)
|
715
|
+
@left.pair = self
|
716
|
+
@right.pair = self
|
717
|
+
index_s = index.to_s
|
718
|
+
record.properties.each do |key, value|
|
719
|
+
tokens = key.to_s.split("_")
|
720
|
+
if tokens.size > 2 and tokens[2] == index_s
|
721
|
+
primer = nil
|
722
|
+
primer = @right if tokens[1] == "right"
|
723
|
+
primer = @left if tokens[1] == "left"
|
724
|
+
if primer != nil
|
725
|
+
primer.set_value("added", true)
|
726
|
+
if tokens.size == 3
|
727
|
+
primer.set_value("coordinates", parse_coordinates(value) )
|
728
|
+
else
|
729
|
+
|
730
|
+
to_add = value
|
731
|
+
to_add = value.to_f unless tokens[3]=="sequence"
|
732
|
+
n_key = tokens[3..6].join("_")
|
733
|
+
primer.set_value(n_key, to_add)
|
734
|
+
end
|
735
|
+
else
|
736
|
+
n_key = tokens[3..6].join("_")
|
737
|
+
@values[n_key] = value
|
738
|
+
end
|
739
|
+
|
740
|
+
end
|
741
|
+
end
|
742
|
+
|
743
|
+
raise Primer3Exception.new(), "The pair is not complete (l:#{left.added}, r:#{right.added})" if @left.added == false or @right.added == false
|
744
|
+
|
745
|
+
end
|
746
|
+
|
747
|
+
def method_missing(m, *args, &block)
|
748
|
+
|
749
|
+
return @values[m.to_s] if @values[m.to_s]
|
750
|
+
raise NoMethodError.new(), "There's no method called #{m}. Available methods: #{@values.keys.to_s}"
|
751
|
+
end
|
752
|
+
end
|
753
|
+
|
754
|
+
class KASPContainer
|
755
|
+
|
756
|
+
attr_accessor :line_1, :line_2
|
757
|
+
attr_accessor :snp_hash
|
758
|
+
attr_accessor :scores
|
759
|
+
|
760
|
+
def add_snp_file(filename)
|
761
|
+
@snp_hash=Hash.new unless @snp_hash
|
762
|
+
SNP.parse_file(filename) do |snp|
|
763
|
+
@snp_hash[snp.to_s] = snp
|
764
|
+
snp.line_1 = @line_1
|
765
|
+
snp.line_2 = @line_2
|
766
|
+
end
|
767
|
+
end
|
768
|
+
|
769
|
+
def add_snp(snp_in)
|
770
|
+
#TODO: Here we need to also copy the errors that will be printed.
|
771
|
+
@snp_hash=Hash.new unless @snp_hash
|
772
|
+
snp = SNP.new
|
773
|
+
snp.gene = snp_in.gene
|
774
|
+
snp.original = snp_in.original
|
775
|
+
snp.primer3_errors = Set.new snp_in.errors
|
776
|
+
snp.position = snp_in.position
|
777
|
+
snp.snp = snp_in.snp
|
778
|
+
snp.repetitive = snp_in.repetitive
|
779
|
+
#puts snp_in.inspect
|
780
|
+
snp.hit_count = snp_in.hit_count
|
781
|
+
snp.snp_type = snp_in.snp_type
|
782
|
+
snp.line_1 = @line_1
|
783
|
+
snp.line_2 = @line_2
|
784
|
+
snp.snp_from = snp_in
|
785
|
+
snp.regions = snp_in.exon_list.values.collect { |x| x.collect {|y| y.target_region.to_s }}
|
786
|
+
@snp_hash[snp.to_s] = snp
|
787
|
+
snp
|
788
|
+
end
|
789
|
+
|
790
|
+
def add_primers_file(filename)
|
791
|
+
#primer3record.scores = @scores if @scores
|
792
|
+
Primer3Record.parse_file(filename, scores: @scores) do | primer3record |
|
793
|
+
current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
|
794
|
+
current_snp.add_record(primer3record)
|
795
|
+
end
|
796
|
+
end
|
797
|
+
|
798
|
+
def print_primers
|
799
|
+
str = ""
|
800
|
+
snp_hash.each do |k, snp|
|
801
|
+
str << snp.print_primers << "\n"
|
802
|
+
end
|
803
|
+
return str
|
804
|
+
end
|
805
|
+
|
806
|
+
def print_primers_with_tails(tail_a: "GAAGGTCGGAGTCAACGGATT", tail_b: "GAAGGTGACCAAGTTCATGCT")
|
807
|
+
str = ""
|
808
|
+
snp_hash.each do |k, snp|
|
809
|
+
if snp.found_primers?
|
810
|
+
str << snp.gene << snp.original << "_1st\t" << tail_a << snp.first_primer << "\n"
|
811
|
+
str << snp.gene << snp.snp << "_2nd\t" << tail_b << snp.second_primer << "\n"
|
812
|
+
str << snp.gene << "_common\t" << snp.common_primer << "\n"
|
813
|
+
end
|
814
|
+
end
|
815
|
+
return str
|
816
|
+
end
|
817
|
+
|
818
|
+
end
|
819
|
+
end
|
820
|
+
|