bio-polymarker 1.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +24 -0
- data/Gemfile +23 -0
- data/README.md +205 -0
- data/Rakefile +61 -0
- data/SECURITY.md +16 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +128 -0
- data/bin/blast_triads.rb +166 -0
- data/bin/blast_triads_promoters.rb +192 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +69 -0
- data/bin/filter_exonerate_by_identity.rb +38 -0
- data/bin/find_best_blat_hit.rb +33 -0
- data/bin/find_best_exonerate.rb +17 -0
- data/bin/get_longest_hsp_blastx_triads.rb +66 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +183 -0
- data/bin/mafft_triads.rb +120 -0
- data/bin/mafft_triads_promoters.rb +403 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/marker_to_vcf.rb +241 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/mask_triads.rb +169 -0
- data/bin/polymarker.rb +410 -0
- data/bin/polymarker_capillary.rb +443 -0
- data/bin/polymarker_deletions.rb +350 -0
- data/bin/snp_position_to_polymarker.rb +101 -0
- data/bin/snps_between_bams.rb +107 -0
- data/bin/tag_stats.rb +75 -0
- data/bin/vcfLineToTable.rb +56 -0
- data/bin/vcfToPolyMarker.rb +82 -0
- data/bio-polymarker.gemspec +227 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +465 -0
- data/lib/bio/BIOExtensions.rb +153 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/Mask.rb +116 -0
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
- data/lib/bio/PolyploidTools/SNP.rb +804 -0
- data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
- data/lib/bio/db/blast.rb +114 -0
- data/lib/bio/db/exonerate.rb +333 -0
- data/lib/bio/db/primer3.rb +820 -0
- data/lib/bio-polymarker.rb +28 -0
- data/test/data/7B_amplicon_test.fa +12 -0
- data/test/data/7B_amplicon_test.fa.fai +1 -0
- data/test/data/7B_amplicon_test_reference.fa +110 -0
- data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
- data/test/data/7B_marker_test.txt +1 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_blast.tab +4 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_contigs.fa.fai +4 -0
- data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
- data/test/data/BS00068396_51_contigs.fa.nin +0 -0
- data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
- data/test/data/BS00068396_51_contigs.nhr +0 -0
- data/test/data/BS00068396_51_contigs.nin +0 -0
- data/test/data/BS00068396_51_contigs.nsq +0 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_for_polymarker.txt +1 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/PST130_7067.csv +1 -0
- data/test/data/PST130_7067.fa +2 -0
- data/test/data/PST130_7067.fa.fai +1 -0
- data/test/data/PST130_7067.fa.ndb +0 -0
- data/test/data/PST130_7067.fa.nhr +0 -0
- data/test/data/PST130_7067.fa.nin +0 -0
- data/test/data/PST130_7067.fa.not +0 -0
- data/test/data/PST130_7067.fa.nsq +0 -0
- data/test/data/PST130_7067.fa.ntf +0 -0
- data/test/data/PST130_7067.fa.nto +0 -0
- data/test/data/PST130_reverse_primer.csv +1 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/S22380157.vcf +67 -0
- data/test/data/S58861868/LIB1716.bam +0 -0
- data/test/data/S58861868/LIB1716.sam +651 -0
- data/test/data/S58861868/LIB1719.bam +0 -0
- data/test/data/S58861868/LIB1719.sam +805 -0
- data/test/data/S58861868/LIB1721.bam +0 -0
- data/test/data/S58861868/LIB1721.sam +1790 -0
- data/test/data/S58861868/LIB1722.bam +0 -0
- data/test/data/S58861868/LIB1722.sam +1271 -0
- data/test/data/S58861868/S58861868.fa +16 -0
- data/test/data/S58861868/S58861868.fa.fai +1 -0
- data/test/data/S58861868/S58861868.vcf +76 -0
- data/test/data/S58861868/header.txt +9 -0
- data/test/data/S58861868/merged.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam.bai +0 -0
- data/test/data/Test3Aspecific.csv +2 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/bfr_out_test.csv +5 -0
- data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
- data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
- data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
- data/test/data/headerMergeed.txt +9 -0
- data/test/data/headerS2238015 +1 -0
- data/test/data/mergedLibs.bam +0 -0
- data/test/data/mergedLibsReheader.bam +0 -0
- data/test/data/mergedLibsSorted.bam +0 -0
- data/test/data/mergedLibsSorted.bam.bai +0 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/primer_3_input_header_test +5 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/some_tests/some_tests.csv +201 -0
- data/test/data/test_from_mutant.csv +3 -0
- data/test/data/test_iselect.csv +196 -0
- data/test/data/test_iselect_reference.fa +1868 -0
- data/test/data/test_iselect_reference.fa.fai +934 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +135 -0
- data/test/test_blast.rb +47 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +48 -0
- data/test/test_integration.rb +76 -0
- data/test/test_snp_parsing.rb +121 -0
- data/test/test_wrong_selection.sh +5 -0
- metadata +356 -0
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'bio'
|
2
|
+
|
3
|
+
class Array
|
4
|
+
def sum
|
5
|
+
inject(0.0) { |result, el| result + el }
|
6
|
+
end
|
7
|
+
|
8
|
+
def mean
|
9
|
+
sum / size
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
module Bio::PolyploidTools::Mask
|
14
|
+
def self.find_end(seqs)
|
15
|
+
size = seqs.values[0].size
|
16
|
+
names = seqs.keys
|
17
|
+
i = size - 1
|
18
|
+
gap_count = 3
|
19
|
+
while i > 0 and gap_count > 0
|
20
|
+
gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1:0 }.inject(0, :+)
|
21
|
+
i -= 1
|
22
|
+
end
|
23
|
+
i + 1
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.find_start(seqs)
|
27
|
+
size = seqs.values[0].size
|
28
|
+
names = seqs.keys
|
29
|
+
i = 0
|
30
|
+
gap_count = 3
|
31
|
+
while i < size and gap_count > 0
|
32
|
+
gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1 : 0 } .inject(0, :+)
|
33
|
+
|
34
|
+
i += 1
|
35
|
+
end
|
36
|
+
i - 1
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.get(seqs, target: nil, seq_start: 0, seq_end: 0)
|
40
|
+
names = seqs.keys
|
41
|
+
target = names[0] if target.nil?
|
42
|
+
masked_snps = seqs[target].downcase
|
43
|
+
i = 0
|
44
|
+
while i < masked_snps.size
|
45
|
+
different = 0
|
46
|
+
cov = 0
|
47
|
+
gap = false
|
48
|
+
names.each do | chr |
|
49
|
+
if seqs[chr][i] != "-" and seqs[chr][i] != "n" and seqs[chr][i] != "N"
|
50
|
+
cov += 1
|
51
|
+
end
|
52
|
+
if chr != target
|
53
|
+
different += 1 if masked_snps[i].upcase != seqs[chr][i].upcase
|
54
|
+
end
|
55
|
+
if seqs[chr][i] == "-" and chr == target
|
56
|
+
gap = true
|
57
|
+
end
|
58
|
+
end
|
59
|
+
masked_snps[i] = "." if different == 0
|
60
|
+
masked_snps[i] = "." if cov == 1
|
61
|
+
masked_snps[i] = "*" if cov == 0
|
62
|
+
expected_snps = names.size - 1
|
63
|
+
masked_snps[i] = masked_snps[i].upcase if different == expected_snps
|
64
|
+
if gap
|
65
|
+
masked_snps[i] = different == expected_snps ? "-" : "_"
|
66
|
+
end
|
67
|
+
masked_snps[i] = "|" if i < seq_start or i > seq_end
|
68
|
+
i += 1
|
69
|
+
end
|
70
|
+
masked_snps
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.stats(mask, triad, gene, genome, reference)
|
74
|
+
specific = []
|
75
|
+
semispecific = []
|
76
|
+
sp_i = 0
|
77
|
+
semi = 0
|
78
|
+
i = 0
|
79
|
+
mask.to_s.each_char do |e|
|
80
|
+
case e
|
81
|
+
when "n","N"
|
82
|
+
i += 1
|
83
|
+
when /[[:lower:]]/ then
|
84
|
+
semispecific << semi
|
85
|
+
semi = 0
|
86
|
+
i += 1
|
87
|
+
when /[[:upper:]]/ then
|
88
|
+
specific << sp_i
|
89
|
+
semispecific << semi
|
90
|
+
sp_i = 0
|
91
|
+
semi = 0
|
92
|
+
i += 1
|
93
|
+
when "." then
|
94
|
+
semi += 1
|
95
|
+
sp_i += 1
|
96
|
+
i += 1
|
97
|
+
end
|
98
|
+
end
|
99
|
+
{
|
100
|
+
reference: reference,
|
101
|
+
triad: triad,
|
102
|
+
genome: genome,
|
103
|
+
gene: gene,
|
104
|
+
semispecific_mean: semispecific.mean,
|
105
|
+
semispecific_bases: semispecific.size,
|
106
|
+
semispecific_identity: (1 - (semispecific.size.to_f / i)) * 100 ,
|
107
|
+
specific_mean: specific.mean,
|
108
|
+
specific_bases: specific.size,
|
109
|
+
specific_identity: (1 - (specific.size.to_f / i )) * 100,
|
110
|
+
aligned_length: i,
|
111
|
+
specific: specific,
|
112
|
+
semispecific: semispecific
|
113
|
+
}
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
@@ -0,0 +1,292 @@
|
|
1
|
+
|
2
|
+
require_relative "SNP"
|
3
|
+
require 'bio-samtools-wrapper'
|
4
|
+
module Bio::PolyploidTools
|
5
|
+
class SNPSequenceException < RuntimeError
|
6
|
+
end
|
7
|
+
|
8
|
+
class NoSNPSequence < SNP
|
9
|
+
|
10
|
+
attr_accessor :sequence_original
|
11
|
+
#Format:
|
12
|
+
#snp name,chromsome from contig,microarray sequence
|
13
|
+
#BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
|
14
|
+
def self.parse(reg_str)
|
15
|
+
reg_str.chomp!
|
16
|
+
snp = NoSNPSequence.new
|
17
|
+
|
18
|
+
arr = reg_str.split(",")
|
19
|
+
|
20
|
+
if arr.size == 3
|
21
|
+
snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
|
22
|
+
elsif arr.size == 2
|
23
|
+
snp.gene, snp.sequence_original = arr
|
24
|
+
else
|
25
|
+
throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
|
26
|
+
end
|
27
|
+
#snp.position = snp.position.to_i
|
28
|
+
#snp.original.upcase!
|
29
|
+
#snp.snp.upcase!
|
30
|
+
snp.chromosome. strip!
|
31
|
+
snp.snp_in = snp.chromosome
|
32
|
+
snp.parse_sequence_snp
|
33
|
+
snp.exon_list = Hash.new()
|
34
|
+
snp
|
35
|
+
end
|
36
|
+
|
37
|
+
def parse_snp
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_sequence_snp
|
42
|
+
@position = (sequence_original.length / 2).to_i
|
43
|
+
@original = sequence_original[@position]
|
44
|
+
@snp = @original
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_s
|
48
|
+
"#{gene}:#{chromosome}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def sequences_to_align
|
52
|
+
@sequences_to_align = surrounding_exon_sequences unless @sequences_to_align
|
53
|
+
@sequences_to_align
|
54
|
+
end
|
55
|
+
|
56
|
+
def mask_aligned_chromosomal_snp(chromosome)
|
57
|
+
return nil if aligned_sequences.values.size == 0
|
58
|
+
names = aligned_sequences.keys
|
59
|
+
parentals = parental_sequences.keys
|
60
|
+
names = names - parentals
|
61
|
+
|
62
|
+
|
63
|
+
best_target = get_target_sequence(names, chromosome)
|
64
|
+
masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
|
65
|
+
masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[best_target]
|
66
|
+
|
67
|
+
#TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
|
68
|
+
i = 0
|
69
|
+
while i < masked_snps.size
|
70
|
+
different = 0
|
71
|
+
cov = 0
|
72
|
+
from_group = 0
|
73
|
+
names.each do | chr |
|
74
|
+
if aligned_sequences[chr] and aligned_sequences[chr][i] != "-"
|
75
|
+
cov += 1
|
76
|
+
|
77
|
+
from_group += 1 if chr[0] == chromosome_group
|
78
|
+
#puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
|
79
|
+
if chr != chromosome
|
80
|
+
$stderr.puts "WARN: No base for #{masked_snps} : ##{i}" unless masked_snps[i].upcase
|
81
|
+
$stderr.puts "WARN: No base for #{aligned_sequences[chr]} : ##{i}" unless masked_snps[i].upcase
|
82
|
+
different += 1 if masked_snps[i].upcase != aligned_sequences[chr][i].upcase
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
masked_snps[i] = "-" if different == 0
|
87
|
+
masked_snps[i] = "-" if cov == 1
|
88
|
+
masked_snps[i] = "*" if cov == 0
|
89
|
+
expected_snps = names.size - 1
|
90
|
+
#puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
|
91
|
+
|
92
|
+
masked_snps[i] = masked_snps[i].upcase if different == expected_snps and from_group == genomes_count
|
93
|
+
|
94
|
+
i += 1
|
95
|
+
end
|
96
|
+
masked_snps
|
97
|
+
end
|
98
|
+
|
99
|
+
def count_deletions_around(position,target_chromosome)
|
100
|
+
first_aligned = aligned_sequences[target_chromosome]
|
101
|
+
|
102
|
+
pos_start = position - flanking_size
|
103
|
+
pos_end = position + flanking_size
|
104
|
+
pos_start = 0 if pos_start < 0
|
105
|
+
pos_end = first_aligned.size - 1 if pos_end >= first_aligned.size
|
106
|
+
count = 0
|
107
|
+
for i in pos_start..pos_end
|
108
|
+
has_del = false
|
109
|
+
|
110
|
+
aligned_sequences.each_pair do |name, val|
|
111
|
+
has_del = true if val[i] == '-'
|
112
|
+
#print "#{val[i]}\t"
|
113
|
+
end
|
114
|
+
count += 1 if has_del
|
115
|
+
#print "#{count}\n"
|
116
|
+
end
|
117
|
+
return count
|
118
|
+
end
|
119
|
+
|
120
|
+
def primer_region(target_chromosome, parental_chr )
|
121
|
+
chromosome_seq = aligned_sequences[target_chromosome]
|
122
|
+
names = aligned_sequences.keys
|
123
|
+
target_chromosome = get_target_sequence(names, target_chromosome)
|
124
|
+
chromosome_seq = aligned_sequences[target_chromosome]
|
125
|
+
chromosome_seq = surrounding_exon_sequences[target_chromosome ]if aligned_sequences.size == 0
|
126
|
+
chromosome_seq = "-" * sequence_original.size unless chromosome_seq
|
127
|
+
chromosome_seq = chromosome_seq.downcase
|
128
|
+
#puts chromosome_seq
|
129
|
+
mask = mask_aligned_chromosomal_snp(target_chromosome)
|
130
|
+
|
131
|
+
pr = PrimerRegion.new
|
132
|
+
pr.homoeologous = false
|
133
|
+
position_in_region = 0
|
134
|
+
parental = chromosome_seq.clone
|
135
|
+
(0..chromosome_seq.size-1).each do |i|
|
136
|
+
|
137
|
+
if chromosome_seq[i] != '-'
|
138
|
+
case
|
139
|
+
when mask[i] == '-'
|
140
|
+
#When the mask doesnt detect a SNP, so we take the parental
|
141
|
+
parental[i] = chromosome_seq[i] unless Bio::NucleicAcid::is_unambiguous(parental[i])
|
142
|
+
when /[[:upper:]]/.match(mask[i])
|
143
|
+
#This is a good candidate for marking a SNP
|
144
|
+
#We validate that the consensus from the sam file accepts the variation from the chromosomal sequence
|
145
|
+
if parental[i] == '-'
|
146
|
+
parental[i] = mask[i]
|
147
|
+
pr.crhomosome_specific_intron << position_in_region
|
148
|
+
elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
|
149
|
+
parental[i] = mask[i]
|
150
|
+
pr.chromosome_specific << position_in_region #if count_deletions_around(1,target_chromosome) < 3
|
151
|
+
pr.chromosome_specific_in_mask << i
|
152
|
+
end
|
153
|
+
|
154
|
+
when /[[:lower:]]/.match(mask[i])
|
155
|
+
#this is not that good candidate, but sitll gives specificity
|
156
|
+
if parental[i] == '-'
|
157
|
+
parental[i] = mask[i]
|
158
|
+
pr.almost_crhomosome_specific_intron << position_in_region
|
159
|
+
elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
|
160
|
+
parental[i] = mask[i].upcase
|
161
|
+
pr.almost_chromosome_specific << position_in_region
|
162
|
+
pr.almost_chromosome_specific_in_mask << i
|
163
|
+
end
|
164
|
+
end #Case closes
|
165
|
+
pr.position_in_mask_from_template[position_in_region] = i
|
166
|
+
position_in_region += 1
|
167
|
+
end #Closes region with bases
|
168
|
+
end
|
169
|
+
pr.sequence=parental.gsub('-','')
|
170
|
+
pr
|
171
|
+
end
|
172
|
+
|
173
|
+
def return_primer_3_string(opts={})
|
174
|
+
#puts "return_primer_3_string #{opts.inspect}"
|
175
|
+
left = opts[:left_pos]
|
176
|
+
right = opts[:right_pos]
|
177
|
+
sequence = opts[:sequence].clone
|
178
|
+
orientation = "forward"
|
179
|
+
if opts[:right_pos]
|
180
|
+
orientation = "forward"
|
181
|
+
if left > right
|
182
|
+
left = sequence.size - left - 1
|
183
|
+
right = sequence.size - right - 1
|
184
|
+
sequence = reverse_complement_string(sequence)
|
185
|
+
orientation = "reverse"
|
186
|
+
end
|
187
|
+
if @variation_free_region > 0
|
188
|
+
check_str = sequence[right+1, @variation_free_region]
|
189
|
+
return nil if check_str != check_str.downcase
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
str = "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
|
196
|
+
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
|
197
|
+
str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
|
198
|
+
str << "SEQUENCE_TEMPLATE=#{sequence}\n"
|
199
|
+
str << "=\n"
|
200
|
+
|
201
|
+
|
202
|
+
#In case that we don't have a right primer, we do both orientations
|
203
|
+
unless opts[:right_pos]
|
204
|
+
sequence = opts[:sequence].clone
|
205
|
+
left = sequence.size - left - 1
|
206
|
+
orientation = "reverse"
|
207
|
+
sequence = reverse_complement_string(sequence)
|
208
|
+
str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
|
209
|
+
str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
|
210
|
+
str << "SEQUENCE_TEMPLATE=#{sequence}\n"
|
211
|
+
str << "=\n"
|
212
|
+
end
|
213
|
+
|
214
|
+
str
|
215
|
+
end
|
216
|
+
|
217
|
+
def get_base_in_different_chromosome(position, target_chromosome)
|
218
|
+
|
219
|
+
aligned_sequences.each_pair do |name, val|
|
220
|
+
next if target_chromosome == name
|
221
|
+
return val[position]
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def primer_3_all_strings(target_chromosome, parental, max_specific_primers: nil)
|
226
|
+
#puts "primer_3_all_strings: #{target_chromosome} #{parental}"
|
227
|
+
pr = primer_region(target_chromosome, parental )
|
228
|
+
#puts pr.inspect
|
229
|
+
primer_3_propertes = Array.new
|
230
|
+
|
231
|
+
seq_original = String.new(pr.sequence)
|
232
|
+
#puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
|
233
|
+
return primer_3_propertes if seq_original.size < primer_3_min_seq_length
|
234
|
+
|
235
|
+
if pr.homoeologous
|
236
|
+
snp_type = "homoeologous"
|
237
|
+
else
|
238
|
+
snp_type = "non-homoeologous"
|
239
|
+
end
|
240
|
+
|
241
|
+
pr.chromosome_specific.each_with_index do |pos , i|
|
242
|
+
seq_snp = seq_original.clone
|
243
|
+
#original_base = seq_snp[pos]
|
244
|
+
#puts "___"
|
245
|
+
#puts aligned_sequences.keys.inspect
|
246
|
+
#puts target_chromosome
|
247
|
+
t_chr = get_target_sequence(aligned_sequences.keys, target_chromosome)
|
248
|
+
other_chromosome_base = get_base_in_different_chromosome(pr.chromosome_specific_in_mask[i], t_chr)
|
249
|
+
|
250
|
+
args = {
|
251
|
+
:name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}",
|
252
|
+
:left_pos => pos,
|
253
|
+
:sequence=>seq_snp
|
254
|
+
}
|
255
|
+
|
256
|
+
seq_snp = seq_original.clone
|
257
|
+
primer_3_propertes << return_primer_3_string(args)
|
258
|
+
|
259
|
+
args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
|
260
|
+
seq_snp[pos] = other_chromosome_base.upcase
|
261
|
+
args[:sequence] = seq_snp
|
262
|
+
|
263
|
+
|
264
|
+
primer_3_propertes << return_primer_3_string(args)
|
265
|
+
end
|
266
|
+
|
267
|
+
|
268
|
+
primer_3_propertes
|
269
|
+
end
|
270
|
+
|
271
|
+
def aligned_sequences
|
272
|
+
|
273
|
+
return @aligned_sequences if @aligned_sequences
|
274
|
+
if sequences_to_align.size <= 1
|
275
|
+
@aligned_sequences = sequences_to_align
|
276
|
+
return @aligned_sequences
|
277
|
+
end
|
278
|
+
options = ['--maxiterate', '1000', '--localpair', '--quiet']
|
279
|
+
mafft = Bio::MAFFT.new( "mafft" , options)
|
280
|
+
# puts "Before MAFT:#{sequences_to_align.inspect}"
|
281
|
+
report = mafft.query_align(sequences_to_align)
|
282
|
+
@aligned_sequences = report.alignment
|
283
|
+
# puts "MAFFT: #{report.alignment.inspect}"
|
284
|
+
@aligned_sequences
|
285
|
+
end
|
286
|
+
|
287
|
+
|
288
|
+
|
289
|
+
|
290
|
+
|
291
|
+
end
|
292
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Bio::PolyploidTools
|
2
|
+
class PrimerRegion
|
3
|
+
attr_accessor :snp_pos, :almost_chromosome_specific_in_mask
|
4
|
+
attr_accessor :chromosome_specific_in_mask, :sequence
|
5
|
+
attr_accessor :chromosome_specific, :almost_chromosome_specific
|
6
|
+
attr_accessor :crhomosome_specific_intron , :almost_crhomosome_specific_intron
|
7
|
+
attr_accessor :homoeologous, :position_in_mask_from_template
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
|
11
|
+
@chromosome_specific = Array.new
|
12
|
+
@almost_chromosome_specific = Array.new
|
13
|
+
@crhomosome_specific_intron = Array.new
|
14
|
+
@almost_crhomosome_specific_intron = Array.new
|
15
|
+
#For deletions
|
16
|
+
@chromosome_specific_in_mask = Array.new
|
17
|
+
@almost_chromosome_specific_in_mask = Array.new
|
18
|
+
@position_in_mask_from_template = Hash.new
|
19
|
+
end
|
20
|
+
|
21
|
+
def tail_candidates
|
22
|
+
@chromosome_specific.size + @almost_chromosome_specific.size
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_fasta
|
26
|
+
">Primer_#{snp_pos}_#{chromosome_specific.to_s}_#{almost_chromosome_specific.to_s}_#{crhomosome_specific_intron.to_s}_#{almost_crhomosome_specific_intron.to_s}\n#{sequence}\n"
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|