bio-polyploid-tools 0.7.3 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +17 -0
- data/Gemfile +10 -7
- data/README.md +44 -0
- data/Rakefile +14 -14
- data/VERSION +1 -1
- data/bin/bfr.rb +2 -2
- data/bin/blast_triads.rb +166 -0
- data/bin/blast_triads_promoters.rb +192 -0
- data/bin/find_homoeologue_variations.rb +385 -0
- data/bin/get_longest_hsp_blastx_triads.rb +66 -0
- data/bin/hexaploid_primers.rb +2 -2
- data/bin/homokaryot_primers.rb +2 -2
- data/bin/mafft_triads.rb +120 -0
- data/bin/mafft_triads_promoters.rb +403 -0
- data/bin/polymarker.rb +73 -17
- data/bin/polymarker_capillary.rb +416 -0
- data/bin/snp_position_to_polymarker.rb +5 -3
- data/bin/snps_between_bams.rb +0 -29
- data/bin/vcfLineToTable.rb +56 -0
- data/bio-polyploid-tools.gemspec +74 -32
- data/lib/bio/BFRTools.rb +1 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +2 -6
- data/lib/bio/PolyploidTools/ExonContainer.rb +31 -8
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +286 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +9 -1
- data/lib/bio/PolyploidTools/SNP.rb +58 -18
- data/lib/bio/PolyploidTools/SNPMutant.rb +5 -3
- data/lib/bio/db/blast.rb +112 -0
- data/lib/bio/db/exonerate.rb +4 -5
- data/lib/bio/db/primer3.rb +83 -14
- data/test/data/BS00068396_51_blast.tab +4 -0
- data/test/data/BS00068396_51_contigs.nhr +0 -0
- data/test/data/BS00068396_51_contigs.nin +0 -0
- data/test/data/BS00068396_51_contigs.nsq +0 -0
- data/test/data/BS00068396_51_for_polymarker.fa +1 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
- data/test/data/S22380157.vcf +67 -0
- data/test/data/S58861868/LIB1716.bam +0 -0
- data/test/data/S58861868/LIB1716.sam +651 -0
- data/test/data/S58861868/LIB1719.bam +0 -0
- data/test/data/S58861868/LIB1719.sam +805 -0
- data/test/data/S58861868/LIB1721.bam +0 -0
- data/test/data/S58861868/LIB1721.sam +1790 -0
- data/test/data/S58861868/LIB1722.bam +0 -0
- data/test/data/S58861868/LIB1722.sam +1271 -0
- data/test/data/S58861868/S58861868.fa +16 -0
- data/test/data/S58861868/S58861868.fa.fai +1 -0
- data/test/data/S58861868/S58861868.vcf +76 -0
- data/test/data/S58861868/header.txt +9 -0
- data/test/data/S58861868/merged.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam.bai +0 -0
- data/test/data/bfr_out_test.csv +5 -5
- data/test/data/headerMergeed.txt +9 -0
- data/test/data/headerS2238015 +1 -0
- data/test/data/mergedLibs.bam +0 -0
- data/test/data/mergedLibsReheader.bam +0 -0
- data/test/data/mergedLibsSorted.bam +0 -0
- data/test/data/mergedLibsSorted.bam.bai +0 -0
- data/test/test_bfr.rb +26 -34
- data/test/test_blast.rb +47 -0
- data/test/test_exonearate.rb +4 -9
- data/test/test_snp_parsing.rb +42 -22
- metadata +81 -20
- data/Gemfile.lock +0 -67
@@ -1,6 +1,10 @@
|
|
1
1
|
module Bio::PolyploidTools
|
2
2
|
class PrimerRegion
|
3
|
-
attr_accessor :snp_pos, :
|
3
|
+
attr_accessor :snp_pos, :almost_chromosome_specific_in_mask
|
4
|
+
attr_accessor :chromosome_specific_in_mask, :sequence
|
5
|
+
attr_accessor :chromosome_specific, :almost_chromosome_specific
|
6
|
+
attr_accessor :crhomosome_specific_intron , :almost_crhomosome_specific_intron
|
7
|
+
attr_accessor :homoeologous, :position_in_mask_from_template
|
4
8
|
|
5
9
|
def initialize
|
6
10
|
|
@@ -8,6 +12,10 @@ module Bio::PolyploidTools
|
|
8
12
|
@almost_chromosome_specific = Array.new
|
9
13
|
@crhomosome_specific_intron = Array.new
|
10
14
|
@almost_crhomosome_specific_intron = Array.new
|
15
|
+
#For deletions
|
16
|
+
@chromosome_specific_in_mask = Array.new
|
17
|
+
@almost_chromosome_specific_in_mask = Array.new
|
18
|
+
@position_in_mask_from_template = Hash.new
|
11
19
|
end
|
12
20
|
|
13
21
|
def tail_candidates
|
@@ -18,7 +18,7 @@ module Bio::PolyploidTools
|
|
18
18
|
|
19
19
|
#Format:
|
20
20
|
#Gene_name,Original,SNP_Pos,pos,chromosome
|
21
|
-
#A_comp0_c0_seq1,C,519,A
|
21
|
+
#A_comp0_c0_seq1,C,519,A,2A
|
22
22
|
def self.parse(reg_str)
|
23
23
|
reg_str.chomp!
|
24
24
|
snp = SNP.new
|
@@ -35,19 +35,43 @@ module Bio::PolyploidTools
|
|
35
35
|
snp
|
36
36
|
end
|
37
37
|
|
38
|
+
def setTemplateFromFastaFile(fastaFile ,flanking_size = 100)
|
39
|
+
reg = Bio::DB::Fasta::Region.new
|
40
|
+
reg.entry = gene
|
41
|
+
reg.entry = @contig if @contig
|
42
|
+
#puts reg.entry
|
43
|
+
#puts @contig
|
44
|
+
#puts gene
|
45
|
+
reg.start = position - flanking_size
|
46
|
+
reg.end = position + flanking_size +1
|
47
|
+
reg.orientation = :forward
|
48
|
+
entry = fastaFile.index.region_for_entry(gene)
|
49
|
+
reg.start = 1 if reg.start < 1
|
50
|
+
reg.end = entry.length if reg.end > entry.length
|
51
|
+
amb = Bio::NucleicAcid.to_IUAPC("#{original}#{snp}")
|
52
|
+
@position = @position - reg.start + 1
|
53
|
+
@position = 1 if @position < 1
|
54
|
+
self.template_sequence = fastaFile.fetch_sequence(reg)
|
55
|
+
template_sequence[position - 1] = amb
|
56
|
+
end
|
57
|
+
|
38
58
|
def initialize
|
39
|
-
@genomes_count = 3
|
59
|
+
@genomes_count = 3
|
40
60
|
@primer_3_min_seq_length = 50
|
41
61
|
@variation_free_region = 0
|
62
|
+
@contig = false
|
42
63
|
end
|
43
64
|
|
44
|
-
def to_polymarker_sequence(flanking_size)
|
65
|
+
def to_polymarker_sequence(flanking_size, total:nil)
|
45
66
|
out = template_sequence.clone
|
67
|
+
#puts "changing: #{position} #{flanking_size} len: #{total}"
|
46
68
|
out[position-1] = "[#{original}/#{snp}]"
|
47
|
-
|
48
69
|
start = position - flanking_size - 1
|
70
|
+
#puts "Start: #{start}"
|
49
71
|
start = 0 if start < 0
|
50
|
-
total = flanking_size * 2
|
72
|
+
total = flanking_size * 2 unless total
|
73
|
+
total += 5
|
74
|
+
#puts "Total: #{total}"
|
51
75
|
out[start , total ]
|
52
76
|
end
|
53
77
|
|
@@ -172,10 +196,6 @@ module Bio::PolyploidTools
|
|
172
196
|
ret_str << "#{seq}\n"
|
173
197
|
end
|
174
198
|
|
175
|
-
#self.exon_sequences.each do | chromosome, exon_seq |
|
176
|
-
# ex_seq = cut_sequence_to_primer_region(exon_seq)
|
177
|
-
# ret_str << ">#{chromosome}\n#{ex_seq}\n"
|
178
|
-
#end
|
179
199
|
self.surrounding_exon_sequences.each do |chromosome, exon_seq|
|
180
200
|
ret_str << ">#{chromosome}\n#{exon_seq}\n"
|
181
201
|
end
|
@@ -253,6 +273,8 @@ module Bio::PolyploidTools
|
|
253
273
|
left = opts[:left_pos]
|
254
274
|
right = opts[:right_pos]
|
255
275
|
sequence = opts[:sequence]
|
276
|
+
extra = opts[:extra]
|
277
|
+
|
256
278
|
orientation = "forward"
|
257
279
|
if opts[:right_pos]
|
258
280
|
orientation = "forward"
|
@@ -271,9 +293,13 @@ module Bio::PolyploidTools
|
|
271
293
|
|
272
294
|
|
273
295
|
str = "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
|
274
|
-
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
|
296
|
+
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless opts[:extra_f]
|
275
297
|
str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
|
298
|
+
str << extra if extra
|
299
|
+
str << opts[:extra_f] if opts[:extra_f]
|
276
300
|
str << "SEQUENCE_TEMPLATE=#{sequence}\n"
|
301
|
+
|
302
|
+
|
277
303
|
str << "=\n"
|
278
304
|
|
279
305
|
|
@@ -284,8 +310,10 @@ module Bio::PolyploidTools
|
|
284
310
|
orientation = "reverse"
|
285
311
|
sequence = reverse_complement_string(sequence)
|
286
312
|
str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
|
287
|
-
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
|
313
|
+
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless opts[:extra_r]
|
314
|
+
str << opts[:extra_r] if opts[:extra_r]
|
288
315
|
str << "SEQUENCE_TEMPLATE=#{sequence}\n"
|
316
|
+
str << extra if extra
|
289
317
|
str << "=\n"
|
290
318
|
end
|
291
319
|
|
@@ -298,8 +326,10 @@ module Bio::PolyploidTools
|
|
298
326
|
primer_3_propertes = Array.new
|
299
327
|
|
300
328
|
seq_original = String.new(pr.sequence)
|
301
|
-
|
329
|
+
puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
|
302
330
|
return primer_3_propertes if seq_original.size < primer_3_min_seq_length
|
331
|
+
#puts self.inspect
|
332
|
+
puts pr.snp_pos.to_s << "(" << seq_original.length.to_s << ")"
|
303
333
|
|
304
334
|
seq_original[pr.snp_pos] = self.original
|
305
335
|
seq_original_reverse = reverse_complement_string(seq_original)
|
@@ -402,8 +432,8 @@ module Bio::PolyploidTools
|
|
402
432
|
|
403
433
|
seq[local_pos_in_gene] = self.snp if name == self.snp_in
|
404
434
|
@parental_sequences [name] = seq
|
405
|
-
|
406
|
-
|
435
|
+
puts name
|
436
|
+
puts seq
|
407
437
|
end
|
408
438
|
@parental_sequences
|
409
439
|
end
|
@@ -420,8 +450,9 @@ module Bio::PolyploidTools
|
|
420
450
|
seq = bam.consensus_with_ambiguities({:region=>gene_region}).to_s
|
421
451
|
else
|
422
452
|
seq = container.gene_model_sequence(gene_region)
|
453
|
+
|
423
454
|
unless name == self.snp_in
|
424
|
-
|
455
|
+
#puts "Modifing original: #{name} #{seq}"
|
425
456
|
seq[local_pos_in_gene] = self.original
|
426
457
|
end
|
427
458
|
end
|
@@ -466,9 +497,12 @@ module Bio::PolyploidTools
|
|
466
497
|
def aligned_sequences
|
467
498
|
|
468
499
|
return @aligned_sequences if @aligned_sequences
|
500
|
+
|
501
|
+
|
469
502
|
options = ['--maxiterate', '1000', '--localpair', '--quiet']
|
470
503
|
mafft = Bio::MAFFT.new( "mafft" , options)
|
471
|
-
|
504
|
+
#puts "Before MAFT:#{sequences_to_align.inspect}"
|
505
|
+
|
472
506
|
report = mafft.query_align(sequences_to_align)
|
473
507
|
@aligned_sequences = report.alignment
|
474
508
|
# puts "MAFFT: #{report.alignment.inspect}"
|
@@ -508,7 +542,11 @@ module Bio::PolyploidTools
|
|
508
542
|
local_pos = 0
|
509
543
|
started = false
|
510
544
|
#TODO: Validate the cases when the alignment has padding on the left on all the chromosomes
|
511
|
-
|
545
|
+
#unless parental_strings[0]
|
546
|
+
#puts "parental hash: #{parental_sequences}"
|
547
|
+
#puts "Aligned sequences: #{aligned_sequences.to_fasta}"
|
548
|
+
# puts "parental_strings: #{parental_strings.to_s}"
|
549
|
+
#end
|
512
550
|
while i < parental_strings[0].size do
|
513
551
|
if local_pos_in_gene == local_pos
|
514
552
|
pos = i
|
@@ -541,10 +579,11 @@ module Bio::PolyploidTools
|
|
541
579
|
different = 0
|
542
580
|
cov = 0
|
543
581
|
from_group = 0
|
582
|
+
nCount = 0
|
544
583
|
names.each do | chr |
|
545
584
|
if aligned_sequences[chr] and aligned_sequences[chr][i] != "-"
|
546
585
|
cov += 1
|
547
|
-
|
586
|
+
nCount += 1 if aligned_sequences[chr][i] == 'N' or aligned_sequences[chr][i] == 'n' # maybe fix this to use ambiguity codes instead.
|
548
587
|
from_group += 1 if chr[0] == chromosome_group
|
549
588
|
#puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
|
550
589
|
if chr != chromosome
|
@@ -556,6 +595,7 @@ module Bio::PolyploidTools
|
|
556
595
|
end
|
557
596
|
masked_snps[i] = "-" if different == 0
|
558
597
|
masked_snps[i] = "-" if cov == 1
|
598
|
+
masked_snps[i] = "-" if nCount > 0
|
559
599
|
masked_snps[i] = "*" if cov == 0
|
560
600
|
expected_snps = names.size - 1
|
561
601
|
# puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
|
@@ -7,7 +7,7 @@ module Bio::PolyploidTools
|
|
7
7
|
|
8
8
|
class SNPMutant < SNPSequence
|
9
9
|
|
10
|
-
attr_accessor :library, :contig, :chr
|
10
|
+
attr_accessor :library, :contig, :chr, :parsed_start, :parsed_flanking, :region_size
|
11
11
|
#Format:
|
12
12
|
#seqid,library,position,wt_base,mut_base
|
13
13
|
#IWGSC_CSS_1AL_scaff_1455974,Kronos2281,127,C,T
|
@@ -17,9 +17,9 @@ module Bio::PolyploidTools
|
|
17
17
|
|
18
18
|
arr = reg_str.split(",")
|
19
19
|
|
20
|
-
throw SNPSequenceException.new "Need five fields to parse, and got #{arr.size} in #{reg_str}"
|
20
|
+
throw SNPSequenceException.new "Need five fields to parse, and got #{arr.size} in #{reg_str}" if arr.size < 5
|
21
21
|
|
22
|
-
snp.contig, snp.library, snp.position, snp.original, snp.snp = reg_str.split(",")
|
22
|
+
snp.contig, snp.library, snp.position, snp.original, snp.snp, parsed_flanking, region_size = reg_str.split(",")
|
23
23
|
snp.position = snp.position.to_i
|
24
24
|
snp.gene = "EMPTY"
|
25
25
|
begin
|
@@ -40,6 +40,8 @@ module Bio::PolyploidTools
|
|
40
40
|
|
41
41
|
snp.exon_list = Hash.new()
|
42
42
|
snp.flanking_size=100
|
43
|
+
snp.region_size = region_size.to_i if region_size
|
44
|
+
snp.flanking_size = parsed_flanking.to_i if parsed_flanking
|
43
45
|
snp
|
44
46
|
end
|
45
47
|
|
data/lib/bio/db/blast.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
module Bio::DB::Blast
|
2
|
+
|
3
|
+
def self.to_sugar(line)
|
4
|
+
fields = line.split("\t")[0..8]
|
5
|
+
|
6
|
+
if fields[3] =="-1"
|
7
|
+
fields[3] = "-"
|
8
|
+
fields[2] = fields[2].to_i - 1
|
9
|
+
else
|
10
|
+
fields[3] = "+"
|
11
|
+
fields[1] = fields[1].to_i - 1
|
12
|
+
end
|
13
|
+
if fields[7] =="-1"
|
14
|
+
fields[7] = "-"
|
15
|
+
fields[6] = fields[6].to_i - 1
|
16
|
+
else
|
17
|
+
fields[7] = "+"
|
18
|
+
fields[5] = fields[5].to_i - 1
|
19
|
+
end
|
20
|
+
fields.join(" ")
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.to_vulgar(line)
|
24
|
+
qseq, sseq = line.split("\t")[12..13]
|
25
|
+
|
26
|
+
len = qseq.length
|
27
|
+
l_status = ""
|
28
|
+
l_len = 0
|
29
|
+
str = Array.new
|
30
|
+
statuses = ""
|
31
|
+
for i in 0..len
|
32
|
+
if qseq[i] == "-"
|
33
|
+
status = "D"
|
34
|
+
elsif sseq[i] == "-"
|
35
|
+
status = "I"
|
36
|
+
else
|
37
|
+
status = "M"
|
38
|
+
end
|
39
|
+
statuses << status
|
40
|
+
end
|
41
|
+
statuses.split('').each do |e|
|
42
|
+
if l_status != e
|
43
|
+
case l_status
|
44
|
+
when "M"
|
45
|
+
str << ["M", l_len, l_len]
|
46
|
+
when "I"
|
47
|
+
str << ["G", l_len, 0]
|
48
|
+
when "D"
|
49
|
+
str << ["G", 0, l_len]
|
50
|
+
end
|
51
|
+
l_len = 0
|
52
|
+
end
|
53
|
+
l_status = e
|
54
|
+
l_len += 1
|
55
|
+
end
|
56
|
+
l_len -= 1
|
57
|
+
case l_status
|
58
|
+
when "M"
|
59
|
+
str << ["M", l_len, l_len]
|
60
|
+
when "I"
|
61
|
+
str << ["G", l_len, 0]
|
62
|
+
when "D"
|
63
|
+
str << ["G", 0, l_len]
|
64
|
+
end
|
65
|
+
|
66
|
+
str.flatten!.join(" ")
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.to_exo(line)
|
70
|
+
arr = Array.new
|
71
|
+
arr << "RESULT:"
|
72
|
+
arr << to_sugar(line)
|
73
|
+
arr << line.split("\t")[9..11]
|
74
|
+
arr << "."
|
75
|
+
arr << to_vulgar(line)
|
76
|
+
arr.join("\t")
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.align(opts={})
|
80
|
+
target=opts[:target]
|
81
|
+
query=opts[:query]
|
82
|
+
cmdline = "blastn -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
|
83
|
+
|
84
|
+
status, stdout, stderr = systemu cmdline
|
85
|
+
if status.exitstatus == 0
|
86
|
+
alns = Array.new unless block_given?
|
87
|
+
stdout.each_line do |e_l|
|
88
|
+
#puts e_l
|
89
|
+
line = to_exo(e_l)
|
90
|
+
#puts line
|
91
|
+
arr = line.split("\t")
|
92
|
+
aln = Bio::DB::Exonerate::Alignment.parse_custom(line)
|
93
|
+
if aln
|
94
|
+
if block_given?
|
95
|
+
yield aln
|
96
|
+
else
|
97
|
+
alns << aln
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
return alns unless block_given?
|
102
|
+
else
|
103
|
+
raise BlasteException.new(), "Error running exonerate. Command line was '#{cmdline}'\n Blast STDERR was:\n#{stderr}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class BlasteException < RuntimeError
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
|
data/lib/bio/db/exonerate.rb
CHANGED
@@ -16,10 +16,9 @@ module Bio::DB::Exonerate
|
|
16
16
|
|
17
17
|
target=opts[:target]
|
18
18
|
query=opts[:query]
|
19
|
-
#
|
20
19
|
|
21
|
-
|
22
|
-
|
20
|
+
cmdline = "exonerate --verbose 0 --showalignment no --bestn #{opts[:bestn]} --showvulgar no --model #{opts[:model]} --ryo '#{opts[:ryo]}' #{query} #{target}"
|
21
|
+
status, stdout, stderr = systemu cmdline
|
23
22
|
#$stderr.puts cmdline
|
24
23
|
if status.exitstatus == 0
|
25
24
|
alns = Array.new unless block_given?
|
@@ -170,9 +169,9 @@ module Bio::DB::Exonerate
|
|
170
169
|
end
|
171
170
|
|
172
171
|
@vulgar_block = Array.new
|
173
|
-
#
|
172
|
+
#p "VULGAR #{vulgar_str}"
|
174
173
|
vulgar_str.split(/\s/).each_slice(3) do | block |
|
175
|
-
#
|
174
|
+
#p block
|
176
175
|
vulgar = Vulgar.new(block[0].to_sym, block[1].to_i, block[2].to_i, tarcurrent, target_multiply, query_current, query_multiply, self)
|
177
176
|
query_current = vulgar.query_end
|
178
177
|
tarcurrent = vulgar.target_end
|
data/lib/bio/db/primer3.rb
CHANGED
@@ -261,11 +261,76 @@ module Bio::DB::Primer3
|
|
261
261
|
return ""
|
262
262
|
end
|
263
263
|
|
264
|
-
def common_primer
|
265
|
-
return self.values[9] if self.values[9]&& self.values[9] != nil
|
264
|
+
def common_primer
|
265
|
+
return self.values[9] if self.values[9] && self.values[9] != nil
|
266
266
|
return ""
|
267
267
|
end
|
268
268
|
|
269
|
+
def product_size
|
270
|
+
return self.values[16].to_i if self.values[16]&& self.values[16] != nil
|
271
|
+
return 0
|
272
|
+
end
|
273
|
+
|
274
|
+
def orientation
|
275
|
+
puts "insideOrientation: #{self.values[11]}"
|
276
|
+
return self.values[11] if self.values[11]&& self.values[11] != nil
|
277
|
+
return 'unknown'
|
278
|
+
end
|
279
|
+
|
280
|
+
|
281
|
+
def first_product
|
282
|
+
left = first_primer
|
283
|
+
right = common_primer
|
284
|
+
nlen = product_size - left.size - right.size
|
285
|
+
product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
|
286
|
+
#puts "orientation: #{orientation}"
|
287
|
+
|
288
|
+
product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
|
289
|
+
|
290
|
+
product
|
291
|
+
end
|
292
|
+
|
293
|
+
def second_product
|
294
|
+
left = second_primer
|
295
|
+
right = common_primer
|
296
|
+
nlen = product_size - left.size - right.size
|
297
|
+
product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
|
298
|
+
product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
|
299
|
+
|
300
|
+
|
301
|
+
product
|
302
|
+
end
|
303
|
+
|
304
|
+
|
305
|
+
def realigned_primers_fasta
|
306
|
+
ret_str = ""
|
307
|
+
realigned_primers.each_pair do |name, seq|
|
308
|
+
ret_str << ">#{self.to_s}-#{name}\n#{seq}\n"
|
309
|
+
end
|
310
|
+
ret_str
|
311
|
+
end
|
312
|
+
|
313
|
+
|
314
|
+
def realigned_primers
|
315
|
+
|
316
|
+
return @realigned_primers if @realigned_primers
|
317
|
+
sequences_to_align = Hash.new
|
318
|
+
sequences_to_align["first_product"] = first_product
|
319
|
+
sequences_to_align["second_product"] = second_product
|
320
|
+
sequences_to_align.merge!(snp_from.surrounding_exon_sequences)
|
321
|
+
if sequences_to_align.size == 1
|
322
|
+
@realigned_primers = sequences_to_align
|
323
|
+
return @realigned_primers
|
324
|
+
end
|
325
|
+
options = ['--maxiterate', '1000', '--localpair', '--quiet']
|
326
|
+
mafft = Bio::MAFFT.new( "mafft" , options)
|
327
|
+
#puts "Before MAFT:#{sequences_to_align.inspect}"
|
328
|
+
report = mafft.query_align(sequences_to_align)
|
329
|
+
@realigned_primers = report.alignment
|
330
|
+
#puts "MAFFT: #{report.alignment.inspect}"
|
331
|
+
@realigned_primers
|
332
|
+
end
|
333
|
+
|
269
334
|
def self.parse(reg_str)
|
270
335
|
reg_str.chomp!
|
271
336
|
snp = SNP.new
|
@@ -330,14 +395,14 @@ module Bio::DB::Primer3
|
|
330
395
|
class Primer3Record
|
331
396
|
include Comparable
|
332
397
|
attr_accessor :properties, :polymorphism
|
333
|
-
attr_accessor :
|
398
|
+
attr_accessor :scores
|
334
399
|
|
335
400
|
|
336
401
|
def best_pair
|
337
402
|
return @best_pair if @best_pair
|
338
403
|
@best_pair = nil
|
339
404
|
@primerPairs.each do | primer |
|
340
|
-
@best_pair = primer if @best_pair
|
405
|
+
@best_pair = primer if @best_pair.nil?
|
341
406
|
@best_pair = primer if primer.size < @best_pair.size
|
342
407
|
end
|
343
408
|
#@best_pair = @primerPairs.min
|
@@ -354,7 +419,7 @@ module Bio::DB::Primer3
|
|
354
419
|
$stderr.puts "Missing #{method_name}"
|
355
420
|
$stderr.puts @properties.inspect
|
356
421
|
return "" #if a property is missing, return blank.
|
357
|
-
|
422
|
+
raise NoMethodError.new()
|
358
423
|
end
|
359
424
|
|
360
425
|
def find_left_tm(primer)
|
@@ -416,13 +481,14 @@ module Bio::DB::Primer3
|
|
416
481
|
base_original = snp.original
|
417
482
|
base_snp = snp.snp
|
418
483
|
elsif self.orientation == :reverse
|
419
|
-
|
420
|
-
|
484
|
+
#puts self.inspect
|
485
|
+
base_original =Primer3Record.reverse_complement_string(snp.original )
|
486
|
+
base_snp = Primer3Record.reverse_complement_string(snp.snp)
|
421
487
|
else
|
422
488
|
raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
|
423
489
|
end
|
424
490
|
|
425
|
-
#
|
491
|
+
#puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
|
426
492
|
if tmp_primer[-1] == base_original
|
427
493
|
tmp_primer[-1] = base_snp
|
428
494
|
elsif tmp_primer[-1] == base_snp
|
@@ -438,19 +504,19 @@ module Bio::DB::Primer3
|
|
438
504
|
|
439
505
|
seq = self.sequence_template
|
440
506
|
#puts "Left coordinates: #{seq}"
|
441
|
-
seq = reverse_complement_string(seq) if self.orientation != other_orientation
|
507
|
+
seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
|
442
508
|
|
443
509
|
seq[coordinates[0],coordinates[1]]
|
444
510
|
end
|
445
511
|
|
446
|
-
def reverse_complement_string(sequenc_str)
|
512
|
+
def self.reverse_complement_string(sequenc_str)
|
447
513
|
complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
|
448
514
|
complement.reverse!
|
449
515
|
end
|
450
516
|
|
451
517
|
def right_primer_delete
|
452
518
|
@right_primer = self.sequence_template[right_coordinates[0],right_coordinates[1]] unless @right_primer
|
453
|
-
@right_primer = reverse_complement_string(@right_primer)
|
519
|
+
@right_primer = Primer3Record.reverse_complement_string(@right_primer)
|
454
520
|
@right_primer
|
455
521
|
end
|
456
522
|
|
@@ -547,9 +613,10 @@ module Bio::DB::Primer3
|
|
547
613
|
|
548
614
|
end
|
549
615
|
|
550
|
-
def self.parse_file(filename)
|
616
|
+
def self.parse_file(filename, scores: nil)
|
551
617
|
File.open(filename) do | f |
|
552
618
|
record = Primer3Record.new
|
619
|
+
record.scores = scores if scores
|
553
620
|
f.each_line do | line |
|
554
621
|
line.chomp!
|
555
622
|
if line == "="
|
@@ -557,6 +624,7 @@ module Bio::DB::Primer3
|
|
557
624
|
record.parse_blocks
|
558
625
|
yield record
|
559
626
|
record = Primer3Record.new
|
627
|
+
record.scores = scores if scores
|
560
628
|
else
|
561
629
|
tokens = line.split("=")
|
562
630
|
i = 0
|
@@ -672,7 +740,7 @@ module Bio::DB::Primer3
|
|
672
740
|
|
673
741
|
attr_accessor :line_1, :line_2
|
674
742
|
attr_accessor :snp_hash
|
675
|
-
|
743
|
+
attr_accessor :scores
|
676
744
|
|
677
745
|
def add_snp_file(filename)
|
678
746
|
@snp_hash=Hash.new unless @snp_hash
|
@@ -701,7 +769,8 @@ module Bio::DB::Primer3
|
|
701
769
|
end
|
702
770
|
|
703
771
|
def add_primers_file(filename)
|
704
|
-
|
772
|
+
#primer3record.scores = @scores if @scores
|
773
|
+
Primer3Record.parse_file(filename, scores: @scores) do | primer3record |
|
705
774
|
current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
|
706
775
|
current_snp.add_record(primer3record)
|
707
776
|
end
|