bio-polyploid-tools 0.5.2 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/bin/polymarker.rb +44 -15
- data/bin/snp_position_to_polymarker.rb +1 -1
- data/bio-polyploid-tools.gemspec +7 -3
- data/lib/bio/PolyploidTools/SNP.rb +8 -2
- data/lib/bio/PolyploidTools/SNPMutant.rb +85 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +8 -8
- data/lib/bio/db/primer3.rb +139 -108
- data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
- data/test/data/test_from_mutant.csv +3 -0
- data/test/test_snp_parsing.rb +27 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76d57ca94d667e3aff5324977e57d8b7a5f0d91e
|
4
|
+
data.tar.gz: ccd2fad348fbc8a68927b944adc8e13b7d776edf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99edd393245dc13ebf8b10abab165c86a9e808f5cabfe95fccd6607360383d79c277a84e37c831782412817ac35b82dddde717448510dbd5f89b4e5a268640d9
|
7
|
+
data.tar.gz: c387e7b9a9b8d5c6bbb8e286a2fa71e4ecd92093578e58d04d260f5d8f7ba66f47d03856486c030f91488f39078129b27f600329596faec2183e0c521eaf6785
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.6.0
|
data/bin/polymarker.rb
CHANGED
@@ -36,6 +36,8 @@ options[:bucket] = 1
|
|
36
36
|
options[:model] = "est2genome"
|
37
37
|
options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ;
|
38
38
|
options[:flanking_size] = 150;
|
39
|
+
options[:variation_free_region] = 0
|
40
|
+
options[:extract_found_contigs] = false
|
39
41
|
options[:primer_3_preferences] = {
|
40
42
|
:primer_product_size_range => "50-150" ,
|
41
43
|
:primer_max_size => 25 ,
|
@@ -60,6 +62,11 @@ OptionParser.new do |opts|
|
|
60
62
|
opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
|
61
63
|
options[:snp_list] = o
|
62
64
|
end
|
65
|
+
|
66
|
+
opts.on("-t", "--mutant_list FILE", "File with the list of positions with mutation and the mutation line.\n\
|
67
|
+
requires --reference to get the sequence using a position") do |o|
|
68
|
+
options[:mutant_list] = o
|
69
|
+
end
|
63
70
|
|
64
71
|
opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
|
65
72
|
options[:reference] = o
|
@@ -71,9 +78,9 @@ OptionParser.new do |opts|
|
|
71
78
|
|
72
79
|
opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
|
73
80
|
options[:model] = o
|
74
|
-
|
81
|
+
end
|
75
82
|
|
76
|
-
|
83
|
+
opts.on("-a", "--arm_selection arm_selection_embl|arm_selection_morex|arm_selection_first_two", "Function to decide the chromome arm") do |o|
|
77
84
|
options[:arm_selection] = arm_selection_functions[o.to_sym];
|
78
85
|
end
|
79
86
|
|
@@ -81,8 +88,20 @@ OptionParser.new do |opts|
|
|
81
88
|
options[:primer_3_preferences] = Bio::DB::Primer3.read_primer_preferences(o, options[:primer_3_preferences] )
|
82
89
|
end
|
83
90
|
|
91
|
+
opts.on("-v", "--variation_free_region INT", "If present, avoid generating the common primer if there are homoeologous SNPs within the specified distance") do |o|
|
92
|
+
options[:variation_free_region] = o.to_i
|
93
|
+
end
|
94
|
+
|
95
|
+
opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
|
96
|
+
options[:extract_found_contigs] = true
|
97
|
+
end
|
98
|
+
|
99
|
+
opts.on("-P", "--primers_to_order")do
|
100
|
+
#TODO: have a string with the tails, optional.
|
101
|
+
options[:primers_to_order] = true
|
102
|
+
end
|
103
|
+
|
84
104
|
|
85
|
-
|
86
105
|
end.parse!
|
87
106
|
|
88
107
|
if options[:primer_3_preferences][:primer_product_size_range]
|
@@ -108,8 +127,9 @@ snp_in="B"
|
|
108
127
|
|
109
128
|
fasta_reference = nil
|
110
129
|
#test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
|
111
|
-
test_file=options[:marker_list]
|
130
|
+
test_file=options[:marker_list] if options[:marker_list]
|
112
131
|
test_file=options[:snp_list] if options[:snp_list]
|
132
|
+
test_file=options[:mutant_list] if options[:mutant_list]
|
113
133
|
fasta_reference = options[:reference]
|
114
134
|
output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
|
115
135
|
output_folder= options[:output_folder] if options[:output_folder]
|
@@ -122,12 +142,12 @@ primer_3_input="#{output_folder}/primer_3_input_temp"
|
|
122
142
|
primer_3_output="#{output_folder}/primer_3_output_temp"
|
123
143
|
exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
|
124
144
|
output_primers="#{output_folder}/primers.csv"
|
145
|
+
output_to_order="#{output_folder}/primers_to_order.csv"
|
125
146
|
@status_file="#{output_folder}/status.txt"
|
126
147
|
|
127
148
|
primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
|
128
149
|
model=options[:model]
|
129
150
|
|
130
|
-
|
131
151
|
def write_status(status)
|
132
152
|
f=File.open(@status_file, "a")
|
133
153
|
f.puts "#{Time.now.to_s},#{status}"
|
@@ -148,9 +168,6 @@ if fasta_reference
|
|
148
168
|
p "Fasta reference: #{fasta_reference}"
|
149
169
|
end
|
150
170
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
171
|
#1. Read all the SNP files
|
155
172
|
#chromosome = nil
|
156
173
|
write_status "Reading SNPs"
|
@@ -167,13 +184,22 @@ File.open(test_file) do | f |
|
|
167
184
|
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
168
185
|
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
169
186
|
else
|
170
|
-
|
187
|
+
write_status "WARN: Unable to find entry for #{snp.gene}"
|
188
|
+
end
|
189
|
+
elsif options[:mutant_list] and options[:reference] #List and fasta file
|
190
|
+
snp = Bio::PolyploidTools::SNPMutant.parse(line)
|
191
|
+
entry = fasta_reference_db.index.region_for_entry(snp.contig)
|
192
|
+
if entry
|
193
|
+
region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
|
194
|
+
snp.full_sequence = fasta_reference_db.fetch_sequence(region)
|
195
|
+
else
|
196
|
+
write_status "WARN: Unable to find entry for #{snp.gene}"
|
171
197
|
end
|
172
|
-
|
173
198
|
else
|
174
199
|
rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
175
200
|
end
|
176
201
|
rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
202
|
+
|
177
203
|
snp.snp_in = snp_in
|
178
204
|
snp.original_name = original_name
|
179
205
|
if snp.position
|
@@ -206,7 +232,7 @@ file.close
|
|
206
232
|
#chr_group = chromosome[0]
|
207
233
|
write_status "Searching markers in genome"
|
208
234
|
exo_f = File.open(exonerate_file, "w")
|
209
|
-
contigs_f = File.open(temp_contigs, "w")
|
235
|
+
contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
|
210
236
|
filename=path_to_contigs
|
211
237
|
puts filename
|
212
238
|
target=filename
|
@@ -224,13 +250,13 @@ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>mod
|
|
224
250
|
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
225
251
|
region = entry.get_full_region
|
226
252
|
seq = fasta_file.fetch_sequence(region)
|
227
|
-
contigs_f.puts(">#{aln.target_id}\n#{seq}")
|
253
|
+
contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
|
228
254
|
end
|
229
255
|
end
|
230
256
|
end
|
231
257
|
|
232
|
-
exo_f.close()
|
233
|
-
contigs_f.close()
|
258
|
+
exo_f.close()
|
259
|
+
contigs_f.close() if options[:extract_found_contigs]
|
234
260
|
|
235
261
|
#4. Load all the results from exonerate and get the input filename for primer3
|
236
262
|
#Custom arm selection function that only uses the first two characters. Maybe
|
@@ -241,12 +267,13 @@ write_status "Reading best alignment on each chromosome"
|
|
241
267
|
container= Bio::PolyploidTools::ExonContainer.new
|
242
268
|
container.flanking_size=options[:flanking_size]
|
243
269
|
container.gene_models(temp_fasta_query)
|
244
|
-
container.chromosomes(
|
270
|
+
container.chromosomes(fasta_reference)
|
245
271
|
container.add_parental({:name=>snp_in})
|
246
272
|
container.add_parental({:name=>original_name})
|
247
273
|
snps.each do |snp|
|
248
274
|
snp.container = container
|
249
275
|
snp.flanking_size = container.flanking_size
|
276
|
+
snp.variation_free_region = options[:variation_free_region]
|
250
277
|
container.add_snp(snp)
|
251
278
|
end
|
252
279
|
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>options[:arm_selection] , :min_identity=>min_identity})
|
@@ -281,6 +308,8 @@ kasp_container.add_primers_file(primer_3_output) if added_exons > 0
|
|
281
308
|
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
|
282
309
|
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
283
310
|
|
311
|
+
File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) }
|
312
|
+
|
284
313
|
write_status "DONE"
|
285
314
|
rescue StandardError => e
|
286
315
|
write_status "ERROR\t#{e.message}"
|
@@ -28,7 +28,7 @@ OptionParser.new do |opts|
|
|
28
28
|
|
29
29
|
opts.banner = "Usage: snp_postion_to_polymarker.rb [options]"
|
30
30
|
|
31
|
-
opts.on("-s", "--
|
31
|
+
opts.on("-s", "--snp_file CSV", "CSV file with the following columnns:\nID,Allele_1,position,Allele_1,target_chromosome") do |o|
|
32
32
|
options[:snp_file] = o
|
33
33
|
end
|
34
34
|
opts.on("-r", "--reference FASTA", "reference with the genes/contings/marker seuqnece") do |o|
|
data/bio-polyploid-tools.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: bio-polyploid-tools 0.
|
5
|
+
# stub: bio-polyploid-tools 0.6.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "bio-polyploid-tools"
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.6.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Ricardo H. Ramirez-Gonzalez"]
|
14
|
-
s.date = "
|
14
|
+
s.date = "2015-02-15"
|
15
15
|
s.description = "Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat"
|
16
16
|
s.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
|
17
17
|
s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "find_best_exonerate.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snp_position_to_polymarker.rb", "snps_between_bams.rb"]
|
@@ -81,6 +81,7 @@ Gem::Specification.new do |s|
|
|
81
81
|
"lib/bio/PolyploidTools/Marker.rb",
|
82
82
|
"lib/bio/PolyploidTools/PrimerRegion.rb",
|
83
83
|
"lib/bio/PolyploidTools/SNP.rb",
|
84
|
+
"lib/bio/PolyploidTools/SNPMutant.rb",
|
84
85
|
"lib/bio/PolyploidTools/SNPSequence.rb",
|
85
86
|
"lib/bio/db/exonerate.rb",
|
86
87
|
"lib/bio/db/primer3.rb",
|
@@ -91,6 +92,8 @@ Gem::Specification.new do |s|
|
|
91
92
|
"test/data/BS00068396_51_contigs.fa",
|
92
93
|
"test/data/BS00068396_51_exonerate.tab",
|
93
94
|
"test/data/BS00068396_51_genes.txt",
|
95
|
+
"test/data/IWGSC_CSS_1AL_scaff_1455974.fa",
|
96
|
+
"test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa",
|
94
97
|
"test/data/LIB1716.bam",
|
95
98
|
"test/data/LIB1716.bam.bai",
|
96
99
|
"test/data/LIB1719.bam",
|
@@ -111,6 +114,7 @@ Gem::Specification.new do |s|
|
|
111
114
|
"test/data/patological_cases5D.csv",
|
112
115
|
"test/data/primer_3_input_header_test",
|
113
116
|
"test/data/short_primer_design_test.csv",
|
117
|
+
"test/data/test_from_mutant.csv",
|
114
118
|
"test/data/test_iselect.csv",
|
115
119
|
"test/data/test_iselect_reference.fa",
|
116
120
|
"test/data/test_iselect_reference.fa.fai",
|
@@ -14,6 +14,7 @@ module Bio::PolyploidTools
|
|
14
14
|
attr_accessor :genomes_count
|
15
15
|
attr_accessor :primer_3_min_seq_length
|
16
16
|
attr_accessor :chromosome
|
17
|
+
attr_accessor :variation_free_region
|
17
18
|
|
18
19
|
#Format:
|
19
20
|
#Gene_name,Original,SNP_Pos,pos,chromosome
|
@@ -37,6 +38,7 @@ module Bio::PolyploidTools
|
|
37
38
|
def initialize
|
38
39
|
@genomes_count = 3 #TODO: if we want to use this with other polyploids, me need to set this as a variable in the main script.
|
39
40
|
@primer_3_min_seq_length = 50
|
41
|
+
@variation_free_region = 0
|
40
42
|
end
|
41
43
|
|
42
44
|
def to_polymarker_sequence(flanking_size)
|
@@ -260,8 +262,14 @@ module Bio::PolyploidTools
|
|
260
262
|
sequence = reverse_complement_string(sequence)
|
261
263
|
orientation = "reverse"
|
262
264
|
end
|
265
|
+
if @variation_free_region > 0
|
266
|
+
check_str = sequence[right+1, @variation_free_region]
|
267
|
+
return nil if check_str != check_str.downcase
|
268
|
+
end
|
269
|
+
|
263
270
|
end
|
264
271
|
|
272
|
+
|
265
273
|
str = "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
|
266
274
|
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
|
267
275
|
str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
|
@@ -279,8 +287,6 @@ module Bio::PolyploidTools
|
|
279
287
|
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
|
280
288
|
str << "SEQUENCE_TEMPLATE=#{sequence}\n"
|
281
289
|
str << "=\n"
|
282
|
-
else
|
283
|
-
|
284
290
|
end
|
285
291
|
|
286
292
|
str
|
@@ -0,0 +1,85 @@
|
|
1
|
+
|
2
|
+
require_relative "SNPSequence"
|
3
|
+
require 'bio-samtools'
|
4
|
+
module Bio::PolyploidTools
|
5
|
+
class SNPSequenceException < RuntimeError
|
6
|
+
end
|
7
|
+
|
8
|
+
class SNPMutant < SNPSequence
|
9
|
+
|
10
|
+
attr_accessor :library, :contig, :chr
|
11
|
+
#Format:
|
12
|
+
#seqid,library,position,wt_base,mut_base
|
13
|
+
#IWGSC_CSS_1AL_scaff_1455974,Kronos2281,127,C,T
|
14
|
+
def self.parse(reg_str)
|
15
|
+
reg_str.chomp!
|
16
|
+
snp = SNPMutant.new
|
17
|
+
|
18
|
+
arr = reg_str.split(",")
|
19
|
+
|
20
|
+
throw SNPSequenceException.new "Need five fields to parse, and got #{arr.size} in #{reg_str}" unless arr.size == 5
|
21
|
+
|
22
|
+
snp.contig, snp.library, snp.position, snp.original, snp.snp = reg_str.split(",")
|
23
|
+
snp.position = snp.position.to_i
|
24
|
+
snp.gene = "EMPTY"
|
25
|
+
begin
|
26
|
+
toks = snp.contig.split('_')
|
27
|
+
#1AL_1455974_Kronos2281_127C
|
28
|
+
#snp.chr = contig.split('_')[2][0,2] #This parses the default from the IWGSC. We may want to make this a lambda
|
29
|
+
#snp.chr = toks[2][0,2]
|
30
|
+
name = toks[2] + "_" + toks[4] + "_" + snp.library + "_" + snp.position.to_s
|
31
|
+
snp.gene = name
|
32
|
+
snp.chromosome = toks[2][0,2]
|
33
|
+
snp.chr = snp.chromosome
|
34
|
+
|
35
|
+
rescue Exception => e
|
36
|
+
$stderr.puts "WARN: snp.chr couldnt be set, the sequence id to parse was #{snp.contig}. We expect something like: IWGSC_CSS_1AL_scaff_1455974"
|
37
|
+
snp.gene = "Error"
|
38
|
+
$stderr.puts e
|
39
|
+
end
|
40
|
+
|
41
|
+
snp.exon_list = Hash.new()
|
42
|
+
snp.flanking_size=100
|
43
|
+
snp
|
44
|
+
end
|
45
|
+
|
46
|
+
def full_sequence=(seq)
|
47
|
+
self.template_sequence = seq
|
48
|
+
self.sequence_original = self.to_polymarker_sequence(self.flanking_size)
|
49
|
+
self.parse_sequence_snp
|
50
|
+
end
|
51
|
+
|
52
|
+
def full_sequence()
|
53
|
+
self.template_sequence
|
54
|
+
end
|
55
|
+
|
56
|
+
def chromosome_group
|
57
|
+
chr[0]
|
58
|
+
end
|
59
|
+
|
60
|
+
def chromosome_genome
|
61
|
+
chr[1]
|
62
|
+
end
|
63
|
+
|
64
|
+
def chromosome_genome
|
65
|
+
return chr[3] if chr[3]
|
66
|
+
return nil
|
67
|
+
end
|
68
|
+
|
69
|
+
def parse_sequence_snp
|
70
|
+
pos = 0
|
71
|
+
match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
|
72
|
+
if match_data
|
73
|
+
@position = Regexp.last_match(:pre).size + 1
|
74
|
+
@original = Regexp.last_match(:org)
|
75
|
+
@snp = Regexp.last_match(:snp)
|
76
|
+
amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
|
77
|
+
@template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
@@ -41,14 +41,14 @@ module Bio::PolyploidTools
|
|
41
41
|
pos = 0
|
42
42
|
match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
|
43
43
|
if match_data
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
44
|
+
@position = Regexp.last_match(:pre).size + 1
|
45
|
+
@original = Regexp.last_match(:org)
|
46
|
+
@snp = Regexp.last_match(:snp)
|
47
|
+
|
48
|
+
amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
|
49
|
+
|
50
|
+
@template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
|
51
|
+
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
data/lib/bio/db/primer3.rb
CHANGED
@@ -17,18 +17,18 @@ module Bio::DB::Primer3
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.prepare_input_file(file, opts2={})
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
20
|
+
opts = {
|
21
|
+
:primer_product_size_range => "50-150" ,
|
22
|
+
:primer_max_size => 25 ,
|
23
|
+
:primer_lib_ambiguity_codes_consensus => 1,
|
24
|
+
:primer_liberal_base => 1,
|
25
|
+
:primer_num_return => 5,
|
26
|
+
:primer_explain_flag => 1,
|
27
|
+
:primer_thermodynamic_parameters_path => File.expand_path(File.dirname(__FILE__) + '../../../../conf/primer3_config/') + '/'
|
28
28
|
}.merge(opts2)
|
29
29
|
|
30
30
|
opts.each do |key,value|
|
31
|
-
|
31
|
+
file.puts "#{key.to_s.upcase}=#{value}\n"
|
32
32
|
end
|
33
33
|
# file.puts "="
|
34
34
|
end
|
@@ -68,7 +68,7 @@ module Bio::DB::Primer3
|
|
68
68
|
@primers_line_1 = SortedSet.new
|
69
69
|
@primers_line_2 = SortedSet.new
|
70
70
|
@regions = SortedSet.new
|
71
|
-
|
71
|
+
@primer3_errors = Set.new
|
72
72
|
end
|
73
73
|
|
74
74
|
def line_2_name
|
@@ -106,24 +106,23 @@ module Bio::DB::Primer3
|
|
106
106
|
nil
|
107
107
|
end
|
108
108
|
|
109
|
-
|
110
|
-
|
111
|
-
#TODO: Retrieve error messages
|
109
|
+
def values
|
110
|
+
return @values if @values
|
112
111
|
left_start = 0
|
113
112
|
left_end = 0
|
114
113
|
right_start = 0
|
115
114
|
right_end = 0
|
116
115
|
total_columns_before_messages=17
|
117
|
-
values = Array.new
|
118
|
-
|
119
|
-
values << gene
|
120
|
-
values << "#{original}#{position}#{snp}"
|
121
|
-
values << template_length
|
122
|
-
values << snp_from.chromosome
|
123
|
-
values << regions.size
|
124
|
-
values << regions.join("|")
|
116
|
+
@values = Array.new
|
117
|
+
#@values << "#{gene},,#{template_length},"
|
118
|
+
@values << gene
|
119
|
+
@values << "#{original}#{position}#{snp}"
|
120
|
+
@values << template_length
|
121
|
+
@values << snp_from.chromosome
|
122
|
+
@values << regions.size
|
123
|
+
@values << regions.join("|")
|
125
124
|
if primer3_line_1 and primer3_line_2
|
126
|
-
values << primer3_line_1.polymorphism
|
125
|
+
@values << primer3_line_1.polymorphism
|
127
126
|
|
128
127
|
#Block that searches both if both pairs have a TM
|
129
128
|
primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
|
@@ -133,27 +132,27 @@ module Bio::DB::Primer3
|
|
133
132
|
# $stderr.puts primer_1
|
134
133
|
# $stderr.puts primer_2
|
135
134
|
if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
|
136
|
-
values << primer3_line_1.left_primer
|
137
|
-
values << primer_2
|
138
|
-
values << primer3_line_1.right_primer
|
139
|
-
values << primer3_line_1.type.to_s
|
140
|
-
values << primer3_line_1.orientation.to_s
|
141
|
-
values << primer3_line_1.best_pair.left.tm
|
142
|
-
values << primer_2_tm
|
143
|
-
values << primer3_line_1.best_pair.right.tm
|
144
|
-
values << "first"
|
145
|
-
values << primer3_line_1.best_pair.product_size
|
135
|
+
@values << primer3_line_1.left_primer
|
136
|
+
@values << primer_2
|
137
|
+
@values << primer3_line_1.right_primer
|
138
|
+
@values << primer3_line_1.type.to_s
|
139
|
+
@values << primer3_line_1.orientation.to_s
|
140
|
+
@values << primer3_line_1.best_pair.left.tm
|
141
|
+
@values << primer_2_tm
|
142
|
+
@values << primer3_line_1.best_pair.right.tm
|
143
|
+
@values << "first"
|
144
|
+
@values << primer3_line_1.best_pair.product_size
|
146
145
|
elsif primer_1_tm != "NA"
|
147
|
-
values << primer_1
|
148
|
-
values << primer3_line_2.left_primer
|
149
|
-
values << primer3_line_2.right_primer
|
150
|
-
values << primer3_line_2.type.to_s
|
151
|
-
values << primer3_line_2.orientation.to_s
|
152
|
-
values << primer_1_tm
|
153
|
-
values << primer3_line_2.best_pair.left.tm
|
154
|
-
values << primer3_line_2.best_pair.right.tm
|
155
|
-
values << "second"
|
156
|
-
values << primer3_line_2.best_pair.product_size
|
146
|
+
@values << primer_1
|
147
|
+
@values << primer3_line_2.left_primer
|
148
|
+
@values << primer3_line_2.right_primer
|
149
|
+
@values << primer3_line_2.type.to_s
|
150
|
+
@values << primer3_line_2.orientation.to_s
|
151
|
+
@values << primer_1_tm
|
152
|
+
@values << primer3_line_2.best_pair.left.tm
|
153
|
+
@values << primer3_line_2.best_pair.right.tm
|
154
|
+
@values << "second"
|
155
|
+
@values << primer3_line_2.best_pair.product_size
|
157
156
|
else
|
158
157
|
|
159
158
|
first_candidate = find_primer_pair_first
|
@@ -172,80 +171,99 @@ module Bio::DB::Primer3
|
|
172
171
|
|
173
172
|
if first_candidate and second_candidate and first_candidate < second_candidate
|
174
173
|
#puts "A"
|
175
|
-
values << first_candidate.left_primer
|
176
|
-
values << primer_2
|
177
|
-
values << first_candidate.right_primer
|
178
|
-
values << first_candidate.type.to_s
|
179
|
-
values << first_candidate.orientation.to_s
|
180
|
-
values << first_candidate.best_pair.left.tm
|
181
|
-
values << primer_2_tm
|
182
|
-
values << first_candidate.best_pair.right.tm
|
183
|
-
values << "first"
|
184
|
-
values << first_candidate.best_pair.product_size
|
174
|
+
@values << first_candidate.left_primer
|
175
|
+
@values << primer_2
|
176
|
+
@values << first_candidate.right_primer
|
177
|
+
@values << first_candidate.type.to_s
|
178
|
+
@values << first_candidate.orientation.to_s
|
179
|
+
@values << first_candidate.best_pair.left.tm
|
180
|
+
@values << primer_2_tm
|
181
|
+
@values << first_candidate.best_pair.right.tm
|
182
|
+
@values << "first"
|
183
|
+
@values << first_candidate.best_pair.product_size
|
185
184
|
elsif second_candidate
|
186
185
|
#puts "B"
|
187
|
-
values << primer_1
|
188
|
-
values << second_candidate.left_primer
|
189
|
-
values << second_candidate.right_primer
|
190
|
-
values << second_candidate.type.to_s
|
191
|
-
values << second_candidate.orientation.to_s
|
192
|
-
values << primer_1_tm
|
193
|
-
values << second_candidate.best_pair.left.tm
|
194
|
-
values << second_candidate.best_pair.right.tm
|
195
|
-
values << "second"
|
196
|
-
values << second_candidate.best_pair.product_size
|
186
|
+
@values << primer_1
|
187
|
+
@values << second_candidate.left_primer
|
188
|
+
@values << second_candidate.right_primer
|
189
|
+
@values << second_candidate.type.to_s
|
190
|
+
@values << second_candidate.orientation.to_s
|
191
|
+
@values << primer_1_tm
|
192
|
+
@values << second_candidate.best_pair.left.tm
|
193
|
+
@values << second_candidate.best_pair.right.tm
|
194
|
+
@values << "second"
|
195
|
+
@values << second_candidate.best_pair.product_size
|
197
196
|
elsif first_candidate
|
198
197
|
#puts "C"
|
199
|
-
values << first_candidate.left_primer
|
200
|
-
values << primer_2
|
201
|
-
values << first_candidate.right_primer
|
202
|
-
values << first_candidate.type.to_s
|
203
|
-
values << first_candidate.orientation.to_s
|
204
|
-
values << primer_2_tm
|
205
|
-
values << first_candidate.best_pair.left.tm
|
206
|
-
values << first_candidate.best_pair.right.tm
|
207
|
-
values << "first"
|
208
|
-
values << first_candidate.best_pair.product_size
|
209
|
-
# else
|
210
|
-
# values << ""
|
198
|
+
@values << first_candidate.left_primer
|
199
|
+
@values << primer_2
|
200
|
+
@values << first_candidate.right_primer
|
201
|
+
@values << first_candidate.type.to_s
|
202
|
+
@values << first_candidate.orientation.to_s
|
203
|
+
@values << primer_2_tm
|
204
|
+
@values << first_candidate.best_pair.left.tm
|
205
|
+
@values << first_candidate.best_pair.right.tm
|
206
|
+
@values << "first"
|
207
|
+
@values << first_candidate.best_pair.product_size
|
211
208
|
end
|
212
|
-
|
213
209
|
end
|
214
210
|
|
215
211
|
elsif primer3_line_1
|
216
|
-
values << primer3_line_1.polymorphism
|
217
|
-
values << primer3_line_1.left_primer
|
218
|
-
values << primer3_line_1.left_primer_snp(self)
|
219
|
-
values << primer3_line_1.right_primer
|
220
|
-
values << primer3_line_1.type.to_s
|
221
|
-
values << primer3_line_1.orientation.to_s
|
222
|
-
values << primer3_line_1.best_pair.left.tm
|
223
|
-
values << "NA"
|
224
|
-
values << primer3_line_1.best_pair.right.tm
|
225
|
-
|
226
|
-
values << "first+"
|
227
|
-
values << primer3_line_1.best_pair.product_size
|
212
|
+
@values << primer3_line_1.polymorphism
|
213
|
+
@values << primer3_line_1.left_primer
|
214
|
+
@values << primer3_line_1.left_primer_snp(self)
|
215
|
+
@values << primer3_line_1.right_primer
|
216
|
+
@values << primer3_line_1.type.to_s
|
217
|
+
@values << primer3_line_1.orientation.to_s
|
218
|
+
@values << primer3_line_1.best_pair.left.tm
|
219
|
+
@values << "NA"
|
220
|
+
@values << primer3_line_1.best_pair.right.tm
|
221
|
+
|
222
|
+
@values << "first+"
|
223
|
+
@values << primer3_line_1.best_pair.product_size
|
228
224
|
elsif primer3_line_2
|
229
|
-
values << primer3_line_2.polymorphism
|
230
|
-
values << primer3_line_2.left_primer_snp(self)
|
231
|
-
values << primer3_line_2.left_primer
|
232
|
-
values << primer3_line_2.right_primer
|
233
|
-
values << primer3_line_2.type.to_s
|
234
|
-
values << primer3_line_2.orientation.to_s
|
235
|
-
values << "NA"
|
236
|
-
values << primer3_line_2.best_pair.left.tm
|
237
|
-
values << primer3_line_2.best_pair.right.tm
|
238
|
-
values << "second+"
|
239
|
-
values << primer3_line_2.best_pair.product_size
|
225
|
+
@values << primer3_line_2.polymorphism
|
226
|
+
@values << primer3_line_2.left_primer_snp(self)
|
227
|
+
@values << primer3_line_2.left_primer
|
228
|
+
@values << primer3_line_2.right_primer
|
229
|
+
@values << primer3_line_2.type.to_s
|
230
|
+
@values << primer3_line_2.orientation.to_s
|
231
|
+
@values << "NA"
|
232
|
+
@values << primer3_line_2.best_pair.left.tm
|
233
|
+
@values << primer3_line_2.best_pair.right.tm
|
234
|
+
@values << "second+"
|
235
|
+
@values << primer3_line_2.best_pair.product_size
|
240
236
|
|
241
237
|
end
|
242
|
-
if values.size < total_columns_before_messages
|
243
|
-
values[total_columns_before_messages] = primer3_errors.to_a.join("|")
|
238
|
+
if @values.size < total_columns_before_messages
|
239
|
+
@values[total_columns_before_messages] = primer3_errors.to_a.join("|")
|
244
240
|
else
|
245
|
-
values << nil
|
241
|
+
@values << nil
|
246
242
|
end
|
243
|
+
return @values
|
244
|
+
end
|
245
|
+
|
246
|
+
def print_primers
|
247
|
+
self.values.join(",")
|
248
|
+
end
|
249
|
+
|
250
|
+
def found_primers?
|
251
|
+
return self.values[7] && self.values[7] != nil
|
252
|
+
end
|
253
|
+
|
254
|
+
def first_primer
|
255
|
+
return self.values[7] if self.values[7] && self.values[7] != nil
|
256
|
+
return ""
|
257
|
+
end
|
258
|
+
|
259
|
+
def second_primer
|
260
|
+
return self.values[8] if self.values[8] && self.values[8] != nil
|
261
|
+
return ""
|
262
|
+
end
|
247
263
|
|
248
|
-
|
264
|
+
def common_primer
|
265
|
+
return self.values[9] if self.values[9]&& self.values[9] != nil
|
266
|
+
return ""
|
249
267
|
end
|
250
268
|
|
251
269
|
def self.parse(reg_str)
|
@@ -273,10 +291,10 @@ module Bio::DB::Primer3
|
|
273
291
|
def add_record(primer3record)
|
274
292
|
@primer3_errors = Set.new unless @primer3_errors
|
275
293
|
@template_length = primer3record.sequence_template.size
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
294
|
+
if primer3record.primer_error != nil
|
295
|
+
primer3_errors << primer3record.primer_error
|
296
|
+
return
|
297
|
+
end
|
280
298
|
case
|
281
299
|
|
282
300
|
when primer3record.line == @line_1
|
@@ -451,7 +469,7 @@ module Bio::DB::Primer3
|
|
451
469
|
@scores[:chromosome_semispecific] = 100
|
452
470
|
@scores[:chromosome_nonspecific] = 0
|
453
471
|
@scores[:exon] = 50
|
454
|
-
|
472
|
+
|
455
473
|
end
|
456
474
|
|
457
475
|
def snp
|
@@ -654,7 +672,7 @@ module Bio::DB::Primer3
|
|
654
672
|
|
655
673
|
attr_accessor :line_1, :line_2
|
656
674
|
attr_accessor :snp_hash
|
657
|
-
|
675
|
+
|
658
676
|
|
659
677
|
def add_snp_file(filename)
|
660
678
|
@snp_hash=Hash.new unless @snp_hash
|
@@ -696,6 +714,19 @@ module Bio::DB::Primer3
|
|
696
714
|
end
|
697
715
|
return str
|
698
716
|
end
|
717
|
+
|
718
|
+
def print_primers_with_tails(tail_a: "GAAGGTCGGAGTCAACGGATT", tail_b: "GAAGGTGACCAAGTTCATGCT")
|
719
|
+
str = ""
|
720
|
+
snp_hash.each do |k, snp|
|
721
|
+
if snp.found_primers?
|
722
|
+
str << snp.gene << snp.original << "\t" << tail_a << snp.first_primer << "\n"
|
723
|
+
str << snp.gene << snp.snp << "\t" << tail_b << snp.second_primer << "\n"
|
724
|
+
str << snp.gene << "\t" << snp.common_primer << "\n"
|
725
|
+
end
|
726
|
+
end
|
727
|
+
return str
|
728
|
+
end
|
729
|
+
|
699
730
|
end
|
700
731
|
end
|
701
732
|
|