bio-polyploid-tools 0.7.3 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +17 -0
- data/Gemfile +10 -7
- data/README.md +44 -0
- data/Rakefile +14 -14
- data/VERSION +1 -1
- data/bin/bfr.rb +2 -2
- data/bin/blast_triads.rb +166 -0
- data/bin/blast_triads_promoters.rb +192 -0
- data/bin/find_homoeologue_variations.rb +385 -0
- data/bin/get_longest_hsp_blastx_triads.rb +66 -0
- data/bin/hexaploid_primers.rb +2 -2
- data/bin/homokaryot_primers.rb +2 -2
- data/bin/mafft_triads.rb +120 -0
- data/bin/mafft_triads_promoters.rb +403 -0
- data/bin/polymarker.rb +73 -17
- data/bin/polymarker_capillary.rb +416 -0
- data/bin/snp_position_to_polymarker.rb +5 -3
- data/bin/snps_between_bams.rb +0 -29
- data/bin/vcfLineToTable.rb +56 -0
- data/bio-polyploid-tools.gemspec +74 -32
- data/lib/bio/BFRTools.rb +1 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +2 -6
- data/lib/bio/PolyploidTools/ExonContainer.rb +31 -8
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +286 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +9 -1
- data/lib/bio/PolyploidTools/SNP.rb +58 -18
- data/lib/bio/PolyploidTools/SNPMutant.rb +5 -3
- data/lib/bio/db/blast.rb +112 -0
- data/lib/bio/db/exonerate.rb +4 -5
- data/lib/bio/db/primer3.rb +83 -14
- data/test/data/BS00068396_51_blast.tab +4 -0
- data/test/data/BS00068396_51_contigs.nhr +0 -0
- data/test/data/BS00068396_51_contigs.nin +0 -0
- data/test/data/BS00068396_51_contigs.nsq +0 -0
- data/test/data/BS00068396_51_for_polymarker.fa +1 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
- data/test/data/S22380157.vcf +67 -0
- data/test/data/S58861868/LIB1716.bam +0 -0
- data/test/data/S58861868/LIB1716.sam +651 -0
- data/test/data/S58861868/LIB1719.bam +0 -0
- data/test/data/S58861868/LIB1719.sam +805 -0
- data/test/data/S58861868/LIB1721.bam +0 -0
- data/test/data/S58861868/LIB1721.sam +1790 -0
- data/test/data/S58861868/LIB1722.bam +0 -0
- data/test/data/S58861868/LIB1722.sam +1271 -0
- data/test/data/S58861868/S58861868.fa +16 -0
- data/test/data/S58861868/S58861868.fa.fai +1 -0
- data/test/data/S58861868/S58861868.vcf +76 -0
- data/test/data/S58861868/header.txt +9 -0
- data/test/data/S58861868/merged.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam.bai +0 -0
- data/test/data/bfr_out_test.csv +5 -5
- data/test/data/headerMergeed.txt +9 -0
- data/test/data/headerS2238015 +1 -0
- data/test/data/mergedLibs.bam +0 -0
- data/test/data/mergedLibsReheader.bam +0 -0
- data/test/data/mergedLibsSorted.bam +0 -0
- data/test/data/mergedLibsSorted.bam.bai +0 -0
- data/test/test_bfr.rb +26 -34
- data/test/test_blast.rb +47 -0
- data/test/test_exonearate.rb +4 -9
- data/test/test_snp_parsing.rb +42 -22
- metadata +81 -20
- data/Gemfile.lock +0 -67
@@ -1,6 +1,10 @@
|
|
1
1
|
module Bio::PolyploidTools
|
2
2
|
class PrimerRegion
|
3
|
-
attr_accessor :snp_pos, :
|
3
|
+
attr_accessor :snp_pos, :almost_chromosome_specific_in_mask
|
4
|
+
attr_accessor :chromosome_specific_in_mask, :sequence
|
5
|
+
attr_accessor :chromosome_specific, :almost_chromosome_specific
|
6
|
+
attr_accessor :crhomosome_specific_intron , :almost_crhomosome_specific_intron
|
7
|
+
attr_accessor :homoeologous, :position_in_mask_from_template
|
4
8
|
|
5
9
|
def initialize
|
6
10
|
|
@@ -8,6 +12,10 @@ module Bio::PolyploidTools
|
|
8
12
|
@almost_chromosome_specific = Array.new
|
9
13
|
@crhomosome_specific_intron = Array.new
|
10
14
|
@almost_crhomosome_specific_intron = Array.new
|
15
|
+
#For deletions
|
16
|
+
@chromosome_specific_in_mask = Array.new
|
17
|
+
@almost_chromosome_specific_in_mask = Array.new
|
18
|
+
@position_in_mask_from_template = Hash.new
|
11
19
|
end
|
12
20
|
|
13
21
|
def tail_candidates
|
@@ -18,7 +18,7 @@ module Bio::PolyploidTools
|
|
18
18
|
|
19
19
|
#Format:
|
20
20
|
#Gene_name,Original,SNP_Pos,pos,chromosome
|
21
|
-
#A_comp0_c0_seq1,C,519,A
|
21
|
+
#A_comp0_c0_seq1,C,519,A,2A
|
22
22
|
def self.parse(reg_str)
|
23
23
|
reg_str.chomp!
|
24
24
|
snp = SNP.new
|
@@ -35,19 +35,43 @@ module Bio::PolyploidTools
|
|
35
35
|
snp
|
36
36
|
end
|
37
37
|
|
38
|
+
def setTemplateFromFastaFile(fastaFile ,flanking_size = 100)
|
39
|
+
reg = Bio::DB::Fasta::Region.new
|
40
|
+
reg.entry = gene
|
41
|
+
reg.entry = @contig if @contig
|
42
|
+
#puts reg.entry
|
43
|
+
#puts @contig
|
44
|
+
#puts gene
|
45
|
+
reg.start = position - flanking_size
|
46
|
+
reg.end = position + flanking_size +1
|
47
|
+
reg.orientation = :forward
|
48
|
+
entry = fastaFile.index.region_for_entry(gene)
|
49
|
+
reg.start = 1 if reg.start < 1
|
50
|
+
reg.end = entry.length if reg.end > entry.length
|
51
|
+
amb = Bio::NucleicAcid.to_IUAPC("#{original}#{snp}")
|
52
|
+
@position = @position - reg.start + 1
|
53
|
+
@position = 1 if @position < 1
|
54
|
+
self.template_sequence = fastaFile.fetch_sequence(reg)
|
55
|
+
template_sequence[position - 1] = amb
|
56
|
+
end
|
57
|
+
|
38
58
|
def initialize
|
39
|
-
@genomes_count = 3
|
59
|
+
@genomes_count = 3
|
40
60
|
@primer_3_min_seq_length = 50
|
41
61
|
@variation_free_region = 0
|
62
|
+
@contig = false
|
42
63
|
end
|
43
64
|
|
44
|
-
def to_polymarker_sequence(flanking_size)
|
65
|
+
def to_polymarker_sequence(flanking_size, total:nil)
|
45
66
|
out = template_sequence.clone
|
67
|
+
#puts "changing: #{position} #{flanking_size} len: #{total}"
|
46
68
|
out[position-1] = "[#{original}/#{snp}]"
|
47
|
-
|
48
69
|
start = position - flanking_size - 1
|
70
|
+
#puts "Start: #{start}"
|
49
71
|
start = 0 if start < 0
|
50
|
-
total = flanking_size * 2
|
72
|
+
total = flanking_size * 2 unless total
|
73
|
+
total += 5
|
74
|
+
#puts "Total: #{total}"
|
51
75
|
out[start , total ]
|
52
76
|
end
|
53
77
|
|
@@ -172,10 +196,6 @@ module Bio::PolyploidTools
|
|
172
196
|
ret_str << "#{seq}\n"
|
173
197
|
end
|
174
198
|
|
175
|
-
#self.exon_sequences.each do | chromosome, exon_seq |
|
176
|
-
# ex_seq = cut_sequence_to_primer_region(exon_seq)
|
177
|
-
# ret_str << ">#{chromosome}\n#{ex_seq}\n"
|
178
|
-
#end
|
179
199
|
self.surrounding_exon_sequences.each do |chromosome, exon_seq|
|
180
200
|
ret_str << ">#{chromosome}\n#{exon_seq}\n"
|
181
201
|
end
|
@@ -253,6 +273,8 @@ module Bio::PolyploidTools
|
|
253
273
|
left = opts[:left_pos]
|
254
274
|
right = opts[:right_pos]
|
255
275
|
sequence = opts[:sequence]
|
276
|
+
extra = opts[:extra]
|
277
|
+
|
256
278
|
orientation = "forward"
|
257
279
|
if opts[:right_pos]
|
258
280
|
orientation = "forward"
|
@@ -271,9 +293,13 @@ module Bio::PolyploidTools
|
|
271
293
|
|
272
294
|
|
273
295
|
str = "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
|
274
|
-
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
|
296
|
+
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless opts[:extra_f]
|
275
297
|
str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
|
298
|
+
str << extra if extra
|
299
|
+
str << opts[:extra_f] if opts[:extra_f]
|
276
300
|
str << "SEQUENCE_TEMPLATE=#{sequence}\n"
|
301
|
+
|
302
|
+
|
277
303
|
str << "=\n"
|
278
304
|
|
279
305
|
|
@@ -284,8 +310,10 @@ module Bio::PolyploidTools
|
|
284
310
|
orientation = "reverse"
|
285
311
|
sequence = reverse_complement_string(sequence)
|
286
312
|
str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
|
287
|
-
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
|
313
|
+
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless opts[:extra_r]
|
314
|
+
str << opts[:extra_r] if opts[:extra_r]
|
288
315
|
str << "SEQUENCE_TEMPLATE=#{sequence}\n"
|
316
|
+
str << extra if extra
|
289
317
|
str << "=\n"
|
290
318
|
end
|
291
319
|
|
@@ -298,8 +326,10 @@ module Bio::PolyploidTools
|
|
298
326
|
primer_3_propertes = Array.new
|
299
327
|
|
300
328
|
seq_original = String.new(pr.sequence)
|
301
|
-
|
329
|
+
puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
|
302
330
|
return primer_3_propertes if seq_original.size < primer_3_min_seq_length
|
331
|
+
#puts self.inspect
|
332
|
+
puts pr.snp_pos.to_s << "(" << seq_original.length.to_s << ")"
|
303
333
|
|
304
334
|
seq_original[pr.snp_pos] = self.original
|
305
335
|
seq_original_reverse = reverse_complement_string(seq_original)
|
@@ -402,8 +432,8 @@ module Bio::PolyploidTools
|
|
402
432
|
|
403
433
|
seq[local_pos_in_gene] = self.snp if name == self.snp_in
|
404
434
|
@parental_sequences [name] = seq
|
405
|
-
|
406
|
-
|
435
|
+
puts name
|
436
|
+
puts seq
|
407
437
|
end
|
408
438
|
@parental_sequences
|
409
439
|
end
|
@@ -420,8 +450,9 @@ module Bio::PolyploidTools
|
|
420
450
|
seq = bam.consensus_with_ambiguities({:region=>gene_region}).to_s
|
421
451
|
else
|
422
452
|
seq = container.gene_model_sequence(gene_region)
|
453
|
+
|
423
454
|
unless name == self.snp_in
|
424
|
-
|
455
|
+
#puts "Modifing original: #{name} #{seq}"
|
425
456
|
seq[local_pos_in_gene] = self.original
|
426
457
|
end
|
427
458
|
end
|
@@ -466,9 +497,12 @@ module Bio::PolyploidTools
|
|
466
497
|
def aligned_sequences
|
467
498
|
|
468
499
|
return @aligned_sequences if @aligned_sequences
|
500
|
+
|
501
|
+
|
469
502
|
options = ['--maxiterate', '1000', '--localpair', '--quiet']
|
470
503
|
mafft = Bio::MAFFT.new( "mafft" , options)
|
471
|
-
|
504
|
+
#puts "Before MAFT:#{sequences_to_align.inspect}"
|
505
|
+
|
472
506
|
report = mafft.query_align(sequences_to_align)
|
473
507
|
@aligned_sequences = report.alignment
|
474
508
|
# puts "MAFFT: #{report.alignment.inspect}"
|
@@ -508,7 +542,11 @@ module Bio::PolyploidTools
|
|
508
542
|
local_pos = 0
|
509
543
|
started = false
|
510
544
|
#TODO: Validate the cases when the alignment has padding on the left on all the chromosomes
|
511
|
-
|
545
|
+
#unless parental_strings[0]
|
546
|
+
#puts "parental hash: #{parental_sequences}"
|
547
|
+
#puts "Aligned sequences: #{aligned_sequences.to_fasta}"
|
548
|
+
# puts "parental_strings: #{parental_strings.to_s}"
|
549
|
+
#end
|
512
550
|
while i < parental_strings[0].size do
|
513
551
|
if local_pos_in_gene == local_pos
|
514
552
|
pos = i
|
@@ -541,10 +579,11 @@ module Bio::PolyploidTools
|
|
541
579
|
different = 0
|
542
580
|
cov = 0
|
543
581
|
from_group = 0
|
582
|
+
nCount = 0
|
544
583
|
names.each do | chr |
|
545
584
|
if aligned_sequences[chr] and aligned_sequences[chr][i] != "-"
|
546
585
|
cov += 1
|
547
|
-
|
586
|
+
nCount += 1 if aligned_sequences[chr][i] == 'N' or aligned_sequences[chr][i] == 'n' # maybe fix this to use ambiguity codes instead.
|
548
587
|
from_group += 1 if chr[0] == chromosome_group
|
549
588
|
#puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
|
550
589
|
if chr != chromosome
|
@@ -556,6 +595,7 @@ module Bio::PolyploidTools
|
|
556
595
|
end
|
557
596
|
masked_snps[i] = "-" if different == 0
|
558
597
|
masked_snps[i] = "-" if cov == 1
|
598
|
+
masked_snps[i] = "-" if nCount > 0
|
559
599
|
masked_snps[i] = "*" if cov == 0
|
560
600
|
expected_snps = names.size - 1
|
561
601
|
# puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
|
@@ -7,7 +7,7 @@ module Bio::PolyploidTools
|
|
7
7
|
|
8
8
|
class SNPMutant < SNPSequence
|
9
9
|
|
10
|
-
attr_accessor :library, :contig, :chr
|
10
|
+
attr_accessor :library, :contig, :chr, :parsed_start, :parsed_flanking, :region_size
|
11
11
|
#Format:
|
12
12
|
#seqid,library,position,wt_base,mut_base
|
13
13
|
#IWGSC_CSS_1AL_scaff_1455974,Kronos2281,127,C,T
|
@@ -17,9 +17,9 @@ module Bio::PolyploidTools
|
|
17
17
|
|
18
18
|
arr = reg_str.split(",")
|
19
19
|
|
20
|
-
throw SNPSequenceException.new "Need five fields to parse, and got #{arr.size} in #{reg_str}"
|
20
|
+
throw SNPSequenceException.new "Need five fields to parse, and got #{arr.size} in #{reg_str}" if arr.size < 5
|
21
21
|
|
22
|
-
snp.contig, snp.library, snp.position, snp.original, snp.snp = reg_str.split(",")
|
22
|
+
snp.contig, snp.library, snp.position, snp.original, snp.snp, parsed_flanking, region_size = reg_str.split(",")
|
23
23
|
snp.position = snp.position.to_i
|
24
24
|
snp.gene = "EMPTY"
|
25
25
|
begin
|
@@ -40,6 +40,8 @@ module Bio::PolyploidTools
|
|
40
40
|
|
41
41
|
snp.exon_list = Hash.new()
|
42
42
|
snp.flanking_size=100
|
43
|
+
snp.region_size = region_size.to_i if region_size
|
44
|
+
snp.flanking_size = parsed_flanking.to_i if parsed_flanking
|
43
45
|
snp
|
44
46
|
end
|
45
47
|
|
data/lib/bio/db/blast.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
module Bio::DB::Blast
|
2
|
+
|
3
|
+
def self.to_sugar(line)
|
4
|
+
fields = line.split("\t")[0..8]
|
5
|
+
|
6
|
+
if fields[3] =="-1"
|
7
|
+
fields[3] = "-"
|
8
|
+
fields[2] = fields[2].to_i - 1
|
9
|
+
else
|
10
|
+
fields[3] = "+"
|
11
|
+
fields[1] = fields[1].to_i - 1
|
12
|
+
end
|
13
|
+
if fields[7] =="-1"
|
14
|
+
fields[7] = "-"
|
15
|
+
fields[6] = fields[6].to_i - 1
|
16
|
+
else
|
17
|
+
fields[7] = "+"
|
18
|
+
fields[5] = fields[5].to_i - 1
|
19
|
+
end
|
20
|
+
fields.join(" ")
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.to_vulgar(line)
|
24
|
+
qseq, sseq = line.split("\t")[12..13]
|
25
|
+
|
26
|
+
len = qseq.length
|
27
|
+
l_status = ""
|
28
|
+
l_len = 0
|
29
|
+
str = Array.new
|
30
|
+
statuses = ""
|
31
|
+
for i in 0..len
|
32
|
+
if qseq[i] == "-"
|
33
|
+
status = "D"
|
34
|
+
elsif sseq[i] == "-"
|
35
|
+
status = "I"
|
36
|
+
else
|
37
|
+
status = "M"
|
38
|
+
end
|
39
|
+
statuses << status
|
40
|
+
end
|
41
|
+
statuses.split('').each do |e|
|
42
|
+
if l_status != e
|
43
|
+
case l_status
|
44
|
+
when "M"
|
45
|
+
str << ["M", l_len, l_len]
|
46
|
+
when "I"
|
47
|
+
str << ["G", l_len, 0]
|
48
|
+
when "D"
|
49
|
+
str << ["G", 0, l_len]
|
50
|
+
end
|
51
|
+
l_len = 0
|
52
|
+
end
|
53
|
+
l_status = e
|
54
|
+
l_len += 1
|
55
|
+
end
|
56
|
+
l_len -= 1
|
57
|
+
case l_status
|
58
|
+
when "M"
|
59
|
+
str << ["M", l_len, l_len]
|
60
|
+
when "I"
|
61
|
+
str << ["G", l_len, 0]
|
62
|
+
when "D"
|
63
|
+
str << ["G", 0, l_len]
|
64
|
+
end
|
65
|
+
|
66
|
+
str.flatten!.join(" ")
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.to_exo(line)
|
70
|
+
arr = Array.new
|
71
|
+
arr << "RESULT:"
|
72
|
+
arr << to_sugar(line)
|
73
|
+
arr << line.split("\t")[9..11]
|
74
|
+
arr << "."
|
75
|
+
arr << to_vulgar(line)
|
76
|
+
arr.join("\t")
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.align(opts={})
|
80
|
+
target=opts[:target]
|
81
|
+
query=opts[:query]
|
82
|
+
cmdline = "blastn -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
|
83
|
+
|
84
|
+
status, stdout, stderr = systemu cmdline
|
85
|
+
if status.exitstatus == 0
|
86
|
+
alns = Array.new unless block_given?
|
87
|
+
stdout.each_line do |e_l|
|
88
|
+
#puts e_l
|
89
|
+
line = to_exo(e_l)
|
90
|
+
#puts line
|
91
|
+
arr = line.split("\t")
|
92
|
+
aln = Bio::DB::Exonerate::Alignment.parse_custom(line)
|
93
|
+
if aln
|
94
|
+
if block_given?
|
95
|
+
yield aln
|
96
|
+
else
|
97
|
+
alns << aln
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
return alns unless block_given?
|
102
|
+
else
|
103
|
+
raise BlasteException.new(), "Error running exonerate. Command line was '#{cmdline}'\n Blast STDERR was:\n#{stderr}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class BlasteException < RuntimeError
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
|
data/lib/bio/db/exonerate.rb
CHANGED
@@ -16,10 +16,9 @@ module Bio::DB::Exonerate
|
|
16
16
|
|
17
17
|
target=opts[:target]
|
18
18
|
query=opts[:query]
|
19
|
-
#
|
20
19
|
|
21
|
-
|
22
|
-
|
20
|
+
cmdline = "exonerate --verbose 0 --showalignment no --bestn #{opts[:bestn]} --showvulgar no --model #{opts[:model]} --ryo '#{opts[:ryo]}' #{query} #{target}"
|
21
|
+
status, stdout, stderr = systemu cmdline
|
23
22
|
#$stderr.puts cmdline
|
24
23
|
if status.exitstatus == 0
|
25
24
|
alns = Array.new unless block_given?
|
@@ -170,9 +169,9 @@ module Bio::DB::Exonerate
|
|
170
169
|
end
|
171
170
|
|
172
171
|
@vulgar_block = Array.new
|
173
|
-
#
|
172
|
+
#p "VULGAR #{vulgar_str}"
|
174
173
|
vulgar_str.split(/\s/).each_slice(3) do | block |
|
175
|
-
#
|
174
|
+
#p block
|
176
175
|
vulgar = Vulgar.new(block[0].to_sym, block[1].to_i, block[2].to_i, tarcurrent, target_multiply, query_current, query_multiply, self)
|
177
176
|
query_current = vulgar.query_end
|
178
177
|
tarcurrent = vulgar.target_end
|
data/lib/bio/db/primer3.rb
CHANGED
@@ -261,11 +261,76 @@ module Bio::DB::Primer3
|
|
261
261
|
return ""
|
262
262
|
end
|
263
263
|
|
264
|
-
def common_primer
|
265
|
-
return self.values[9] if self.values[9]&& self.values[9] != nil
|
264
|
+
def common_primer
|
265
|
+
return self.values[9] if self.values[9] && self.values[9] != nil
|
266
266
|
return ""
|
267
267
|
end
|
268
268
|
|
269
|
+
def product_size
|
270
|
+
return self.values[16].to_i if self.values[16]&& self.values[16] != nil
|
271
|
+
return 0
|
272
|
+
end
|
273
|
+
|
274
|
+
def orientation
|
275
|
+
puts "insideOrientation: #{self.values[11]}"
|
276
|
+
return self.values[11] if self.values[11]&& self.values[11] != nil
|
277
|
+
return 'unknown'
|
278
|
+
end
|
279
|
+
|
280
|
+
|
281
|
+
def first_product
|
282
|
+
left = first_primer
|
283
|
+
right = common_primer
|
284
|
+
nlen = product_size - left.size - right.size
|
285
|
+
product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
|
286
|
+
#puts "orientation: #{orientation}"
|
287
|
+
|
288
|
+
product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
|
289
|
+
|
290
|
+
product
|
291
|
+
end
|
292
|
+
|
293
|
+
def second_product
|
294
|
+
left = second_primer
|
295
|
+
right = common_primer
|
296
|
+
nlen = product_size - left.size - right.size
|
297
|
+
product = left + ('n' * nlen) + Primer3Record.reverse_complement_string(right)
|
298
|
+
product = Primer3Record.reverse_complement_string(product) if orientation == 'reverse'
|
299
|
+
|
300
|
+
|
301
|
+
product
|
302
|
+
end
|
303
|
+
|
304
|
+
|
305
|
+
def realigned_primers_fasta
|
306
|
+
ret_str = ""
|
307
|
+
realigned_primers.each_pair do |name, seq|
|
308
|
+
ret_str << ">#{self.to_s}-#{name}\n#{seq}\n"
|
309
|
+
end
|
310
|
+
ret_str
|
311
|
+
end
|
312
|
+
|
313
|
+
|
314
|
+
def realigned_primers
|
315
|
+
|
316
|
+
return @realigned_primers if @realigned_primers
|
317
|
+
sequences_to_align = Hash.new
|
318
|
+
sequences_to_align["first_product"] = first_product
|
319
|
+
sequences_to_align["second_product"] = second_product
|
320
|
+
sequences_to_align.merge!(snp_from.surrounding_exon_sequences)
|
321
|
+
if sequences_to_align.size == 1
|
322
|
+
@realigned_primers = sequences_to_align
|
323
|
+
return @realigned_primers
|
324
|
+
end
|
325
|
+
options = ['--maxiterate', '1000', '--localpair', '--quiet']
|
326
|
+
mafft = Bio::MAFFT.new( "mafft" , options)
|
327
|
+
#puts "Before MAFT:#{sequences_to_align.inspect}"
|
328
|
+
report = mafft.query_align(sequences_to_align)
|
329
|
+
@realigned_primers = report.alignment
|
330
|
+
#puts "MAFFT: #{report.alignment.inspect}"
|
331
|
+
@realigned_primers
|
332
|
+
end
|
333
|
+
|
269
334
|
def self.parse(reg_str)
|
270
335
|
reg_str.chomp!
|
271
336
|
snp = SNP.new
|
@@ -330,14 +395,14 @@ module Bio::DB::Primer3
|
|
330
395
|
class Primer3Record
|
331
396
|
include Comparable
|
332
397
|
attr_accessor :properties, :polymorphism
|
333
|
-
attr_accessor :
|
398
|
+
attr_accessor :scores
|
334
399
|
|
335
400
|
|
336
401
|
def best_pair
|
337
402
|
return @best_pair if @best_pair
|
338
403
|
@best_pair = nil
|
339
404
|
@primerPairs.each do | primer |
|
340
|
-
@best_pair = primer if @best_pair
|
405
|
+
@best_pair = primer if @best_pair.nil?
|
341
406
|
@best_pair = primer if primer.size < @best_pair.size
|
342
407
|
end
|
343
408
|
#@best_pair = @primerPairs.min
|
@@ -354,7 +419,7 @@ module Bio::DB::Primer3
|
|
354
419
|
$stderr.puts "Missing #{method_name}"
|
355
420
|
$stderr.puts @properties.inspect
|
356
421
|
return "" #if a property is missing, return blank.
|
357
|
-
|
422
|
+
raise NoMethodError.new()
|
358
423
|
end
|
359
424
|
|
360
425
|
def find_left_tm(primer)
|
@@ -416,13 +481,14 @@ module Bio::DB::Primer3
|
|
416
481
|
base_original = snp.original
|
417
482
|
base_snp = snp.snp
|
418
483
|
elsif self.orientation == :reverse
|
419
|
-
|
420
|
-
|
484
|
+
#puts self.inspect
|
485
|
+
base_original =Primer3Record.reverse_complement_string(snp.original )
|
486
|
+
base_snp = Primer3Record.reverse_complement_string(snp.snp)
|
421
487
|
else
|
422
488
|
raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
|
423
489
|
end
|
424
490
|
|
425
|
-
#
|
491
|
+
#puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
|
426
492
|
if tmp_primer[-1] == base_original
|
427
493
|
tmp_primer[-1] = base_snp
|
428
494
|
elsif tmp_primer[-1] == base_snp
|
@@ -438,19 +504,19 @@ module Bio::DB::Primer3
|
|
438
504
|
|
439
505
|
seq = self.sequence_template
|
440
506
|
#puts "Left coordinates: #{seq}"
|
441
|
-
seq = reverse_complement_string(seq) if self.orientation != other_orientation
|
507
|
+
seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
|
442
508
|
|
443
509
|
seq[coordinates[0],coordinates[1]]
|
444
510
|
end
|
445
511
|
|
446
|
-
def reverse_complement_string(sequenc_str)
|
512
|
+
def self.reverse_complement_string(sequenc_str)
|
447
513
|
complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
|
448
514
|
complement.reverse!
|
449
515
|
end
|
450
516
|
|
451
517
|
def right_primer_delete
|
452
518
|
@right_primer = self.sequence_template[right_coordinates[0],right_coordinates[1]] unless @right_primer
|
453
|
-
@right_primer = reverse_complement_string(@right_primer)
|
519
|
+
@right_primer = Primer3Record.reverse_complement_string(@right_primer)
|
454
520
|
@right_primer
|
455
521
|
end
|
456
522
|
|
@@ -547,9 +613,10 @@ module Bio::DB::Primer3
|
|
547
613
|
|
548
614
|
end
|
549
615
|
|
550
|
-
def self.parse_file(filename)
|
616
|
+
def self.parse_file(filename, scores: nil)
|
551
617
|
File.open(filename) do | f |
|
552
618
|
record = Primer3Record.new
|
619
|
+
record.scores = scores if scores
|
553
620
|
f.each_line do | line |
|
554
621
|
line.chomp!
|
555
622
|
if line == "="
|
@@ -557,6 +624,7 @@ module Bio::DB::Primer3
|
|
557
624
|
record.parse_blocks
|
558
625
|
yield record
|
559
626
|
record = Primer3Record.new
|
627
|
+
record.scores = scores if scores
|
560
628
|
else
|
561
629
|
tokens = line.split("=")
|
562
630
|
i = 0
|
@@ -672,7 +740,7 @@ module Bio::DB::Primer3
|
|
672
740
|
|
673
741
|
attr_accessor :line_1, :line_2
|
674
742
|
attr_accessor :snp_hash
|
675
|
-
|
743
|
+
attr_accessor :scores
|
676
744
|
|
677
745
|
def add_snp_file(filename)
|
678
746
|
@snp_hash=Hash.new unless @snp_hash
|
@@ -701,7 +769,8 @@ module Bio::DB::Primer3
|
|
701
769
|
end
|
702
770
|
|
703
771
|
def add_primers_file(filename)
|
704
|
-
|
772
|
+
#primer3record.scores = @scores if @scores
|
773
|
+
Primer3Record.parse_file(filename, scores: @scores) do | primer3record |
|
705
774
|
current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
|
706
775
|
current_snp.add_record(primer3record)
|
707
776
|
end
|